23#define DEBUG_TYPE "si-mode-register"
25STATISTIC(NumSetregInserted,
"Number of setreg of mode register inserted.");
57 unsigned NewMode = (
Mode & NewMask);
58 return Status(NewMask, NewMode);
114 std::vector<std::unique_ptr<BlockData>> BlockInfo;
115 std::queue<MachineBasicBlock *> Phase2List;
126 bool Changed =
false;
152 "Insert required mode register values",
false,
false)
154char SIModeRegister::
ID = 0;
166 unsigned Opcode =
MI.getOpcode();
167 if (
TII->usesFPDPRounding(
MI) ||
168 Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO ||
169 Opcode == AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO) {
171 case AMDGPU::V_INTERP_P1LL_F16:
172 case AMDGPU::V_INTERP_P1LV_F16:
173 case AMDGPU::V_INTERP_P2_F16:
177 case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO: {
178 unsigned Mode =
MI.getOperand(2).getImm();
181 if (
TII->getSubtarget().hasTrue16BitInsts()) {
184 MI.setDesc(
TII->get(AMDGPU::V_CVT_F16_F32_fake16_e64));
192 MI.setDesc(
TII->get(AMDGPU::V_CVT_F16_F32_e32));
195 case AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO: {
196 unsigned Mode =
MI.getOperand(2).getImm();
198 MI.setDesc(
TII->get(AMDGPU::V_CVT_F32_F64_e32));
202 return DefaultStatus;
215 while (InstrMode.
Mask) {
216 unsigned Offset = llvm::countr_zero<unsigned>(InstrMode.
Mask);
217 unsigned Width = llvm::countr_one<unsigned>(InstrMode.
Mask >>
Offset);
219 using namespace AMDGPU::Hwreg;
225 InstrMode.
Mask &= ~(((1 << Width) - 1) <<
Offset);
250 auto NewInfo = std::make_unique<BlockData>();
257 bool RequirePending =
true;
261 if (
MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
262 MI.getOpcode() == AMDGPU::S_SETREG_B32_mode ||
263 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
264 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
268 unsigned Dst =
TII->getNamedOperand(
MI, AMDGPU::OpName::simm16)->getImm();
269 using namespace AMDGPU::Hwreg;
270 auto [
Id,
Offset, Width] = HwregEncoding::decode(Dst);
274 unsigned Mask = maskTrailingOnes<unsigned>(Width) <<
Offset;
284 if (
MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
285 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
286 unsigned Val =
TII->getNamedOperand(
MI, AMDGPU::OpName::imm)->getImm();
291 RequirePending =
false;
292 NewInfo->Change = NewInfo->Change.merge(Setreg);
294 NewInfo->Change = NewInfo->Change.mergeUnknown(Mask);
296 }
else if (!NewInfo->Change.isCompatible(InstrMode)) {
304 if (RequirePending) {
309 NewInfo->Require = NewInfo->Change;
310 RequirePending =
false;
313 IPChange.
delta(NewInfo->Change));
314 IPChange = NewInfo->Change;
319 NewInfo->Change = NewInfo->Change.merge(InstrMode);
324 IPChange = NewInfo->Change;
325 NewInfo->Change = NewInfo->Change.
merge(InstrMode);
329 if (RequirePending) {
333 NewInfo->Require = NewInfo->Change;
338 NewInfo->Exit = NewInfo->Change;
348 bool RevisitRequired =
false;
349 bool ExitSet =
false;
353 BlockInfo[ThisBlock]->Pred = DefaultStatus;
368 unsigned PredBlock =
PB.getNumber();
369 if ((ThisBlock == PredBlock) && (std::next(
P) == E)) {
370 BlockInfo[ThisBlock]->Pred = DefaultStatus;
372 }
else if (BlockInfo[PredBlock]->ExitSet) {
373 BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
375 }
else if (PredBlock != ThisBlock)
376 RevisitRequired =
true;
378 for (
P = std::next(
P);
P != E;
P = std::next(
P)) {
381 if (BlockInfo[PredBlock]->ExitSet) {
382 if (BlockInfo[ThisBlock]->ExitSet) {
383 BlockInfo[ThisBlock]->Pred =
384 BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[PredBlock]->Exit);
386 BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
389 }
else if (PredBlock != ThisBlock)
390 RevisitRequired =
true;
394 BlockInfo[ThisBlock]->Pred.
merge(BlockInfo[ThisBlock]->Change);
395 if (BlockInfo[ThisBlock]->Exit != TmpStatus) {
396 BlockInfo[ThisBlock]->Exit = TmpStatus;
400 Phase2List.push(Succ);
402 BlockInfo[ThisBlock]->ExitSet = ExitSet;
404 Phase2List.push(&
MBB);
413 if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {
415 BlockInfo[ThisBlock]->Pred.
delta(BlockInfo[ThisBlock]->Require);
416 if (BlockInfo[ThisBlock]->FirstInsertionPoint)
417 insertSetreg(
MBB, BlockInfo[ThisBlock]->FirstInsertionPoint,
TII, Delta);
430 if (
F.hasFnAttribute(llvm::Attribute::StrictFP))
441 processBlockPhase1(BB,
TII);
447 Phase2List.push(&BB);
448 while (!Phase2List.empty()) {
449 processBlockPhase2(*Phase2List.front(),
TII);
456 processBlockPhase3(BB,
TII);
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
#define FP_ROUND_MODE_DP(x)
#define FP_ROUND_ROUND_TO_NEAREST
#define FP_ROUND_ROUND_TO_ZERO
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
MachineInstr * FirstInsertionPoint
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
FunctionPass class - This class is used to implement most global optimizations.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
SmallVectorImpl< MachineBasicBlock * >::iterator pred_iterator
pred_iterator pred_begin()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
MachineInstr & instr_front()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
LLVM Value Representation.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createSIModeRegisterPass()
Status delta(const Status &S) const
Status(unsigned NewMask, unsigned NewMode)
bool isCombinable(Status &S)
bool operator==(const Status &S) const
bool isCompatible(Status &S)
Status merge(const Status &S) const
Status intersect(const Status &S) const
bool operator!=(const Status &S) const
Status mergeUnknown(unsigned newMask)