52#define DEBUG_TYPE "amdgpu-lower-vgpr-encoding"
56class AMDGPULowerVGPREncoding {
57 static constexpr unsigned OpNum = 4;
58 static constexpr unsigned BitsPerField = 2;
59 static constexpr unsigned NumFields = 4;
60 static constexpr unsigned FieldMask = (1 << BitsPerField) - 1;
62 std::bitset<BitsPerField * NumFields>>;
64 class ModeTy :
public ModeType {
67 ModeTy() : ModeType(0) {}
69 operator int64_t()
const {
return raw_bits().to_ulong(); }
71 static ModeTy fullMask() {
89 bool CurrentModeKnown;
102 unsigned ClauseRemaining;
105 unsigned ClauseBreaks;
114 void resetMode(
MachineInstr *
I) { setMode(ModeTy(), ModeTy::fullMask(),
I); }
127 const AMDGPU::OpName
Ops[OpNum],
128 const AMDGPU::OpName *Ops2 =
nullptr);
136bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode, ModeTy Mask,
138 assert((NewMode.raw_bits() & ~Mask.raw_bits()).none());
140 if (CurrentModeKnown) {
141 auto Delta = NewMode.raw_bits() ^ CurrentMode.raw_bits();
143 if ((Delta & Mask.raw_bits()).none()) {
148 if (MostRecentModeSet && (Delta & CurrentMask.raw_bits()).none()) {
149 CurrentMode |= NewMode;
159 BuildMI(*
I->getParent(),
I, {},
TII->get(AMDGPU::S_SET_VGPR_MSB))
162 CurrentMode = NewMode;
164 CurrentModeKnown =
true;
168std::optional<unsigned>
175 if (!RC || !
TRI->isVGPRClass(RC))
178 unsigned Idx =
TRI->getHWRegIndex(
Reg);
182void AMDGPULowerVGPREncoding::computeMode(ModeTy &NewMode, ModeTy &Mask,
184 const AMDGPU::OpName
Ops[OpNum],
185 const AMDGPU::OpName *Ops2) {
189 for (
unsigned I = 0;
I < OpNum; ++
I) {
192 std::optional<unsigned> MSBits;
194 MSBits = getMSBs(*
Op);
197 if (MSBits.has_value() && Ops2) {
198 auto Op2 =
TII->getNamedOperand(
MI, Ops2[
I]);
200 std::optional<unsigned> MSBits2;
201 MSBits2 = getMSBs(*Op2);
202 if (MSBits2.has_value() && MSBits != MSBits2)
208 if (!MSBits.has_value() && Ops2) {
209 Op =
TII->getNamedOperand(
MI, Ops2[
I]);
211 MSBits = getMSBs(*
Op);
214 if (!MSBits.has_value())
220 if (
Ops[
I] == AMDGPU::OpName::src2 && !
Op->isDef() &&
Op->isTied() &&
223 TII->hasVALU32BitEncoding(
MI.getOpcode()))))
226 NewMode[
I] = MSBits.value();
231bool AMDGPULowerVGPREncoding::runOnMachineInstr(
MachineInstr &
MI) {
234 ModeTy NewMode, Mask;
235 computeMode(NewMode, Mask,
MI,
Ops.first,
Ops.second);
236 return setMode(NewMode, Mask, &
MI);
238 assert(!
TII->hasVGPRUses(
MI) ||
MI.isMetaInstruction() ||
MI.isPseudo());
244 if (!ClauseRemaining)
249 if (ClauseRemaining == ClauseLen) {
258 Clause->eraseFromBundle();
268 Clause->getOperand(0).setImm(ClauseLen | (ClauseBreaks << 8));
277 if (!ST.has1024AddressableVGPRs())
280 TII = ST.getInstrInfo();
281 TRI = ST.getRegisterInfo();
284 ClauseLen = ClauseRemaining = 0;
287 CurrentModeKnown =
true;
288 for (
auto &
MBB : MF) {
289 MostRecentModeSet =
nullptr;
292 if (
MI.isMetaInstruction())
295 if (
MI.isTerminator() ||
MI.isCall()) {
296 if (
MI.getOpcode() == AMDGPU::S_ENDPGM ||
297 MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED) {
299 CurrentModeKnown =
true;
305 if (
MI.isInlineAsm()) {
306 if (
TII->hasVGPRUses(
MI))
311 if (
MI.getOpcode() == AMDGPU::S_CLAUSE) {
312 assert(!ClauseRemaining &&
"Nested clauses are not supported");
313 ClauseLen =
MI.getOperand(0).getImm();
314 ClauseBreaks = (ClauseLen >> 8) & 15;
315 ClauseLen = ClauseRemaining = (ClauseLen & 63) + 1;
329 if (
Next &&
Next->pred_size() >= 2 &&
331 if (CurrentMode.raw_bits().any())
332 CurrentModeKnown =
false;
346 return AMDGPULowerVGPREncoding().run(MF);
357char AMDGPULowerVGPREncodingLegacy::ID = 0;
362 "AMDGPU Lower VGPR Encoding",
false,
false)
367 if (!AMDGPULowerVGPREncoding().run(MF))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
This file implements the PackedVector class.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Interface definition for SIInstrInfo.
Represent the analysis usage information of a pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
Wrapper class representing physical registers. Should be passed by value.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
Store a vector of values using a specific number of bits for each value.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
static bool isVOP2(const MachineInstr &MI)
static bool isVOP3(const MCInstrDesc &Desc)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
std::pair< const AMDGPU::OpName *, const AMDGPU::OpName * > getVGPRLoweringOperandTables(const MCInstrDesc &Desc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
char & AMDGPULowerVGPREncodingLegacyID