23#define DEBUG_TYPE "si-mode-register"
25STATISTIC(NumSetregInserted,
"Number of setreg of mode register inserted.");
57 unsigned NewMode = (
Mode & NewMask);
58 return Status(NewMask, NewMode);
114 std::vector<std::unique_ptr<BlockData>> BlockInfo;
115 std::queue<MachineBasicBlock *> Phase2List;
126 bool Changed =
false;
152 "Insert required mode register values",
false,
false)
154char SIModeRegister::
ID = 0;
166 unsigned Opcode =
MI.getOpcode();
167 if (
TII->usesFPDPRounding(
MI) ||
168 Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO) {
170 case AMDGPU::V_INTERP_P1LL_F16:
171 case AMDGPU::V_INTERP_P1LV_F16:
172 case AMDGPU::V_INTERP_P2_F16:
176 case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO: {
177 unsigned Mode =
MI.getOperand(2).getImm();
180 if (
TII->getSubtarget().hasTrue16BitInsts()) {
183 MI.setDesc(
TII->get(AMDGPU::V_CVT_F16_F32_t16_e64));
191 MI.setDesc(
TII->get(AMDGPU::V_CVT_F16_F32_e32));
196 return DefaultStatus;
209 while (InstrMode.
Mask) {
210 unsigned Offset = llvm::countr_zero<unsigned>(InstrMode.
Mask);
211 unsigned Width = llvm::countr_one<unsigned>(InstrMode.
Mask >>
Offset);
213 using namespace AMDGPU::Hwreg;
219 InstrMode.
Mask &= ~(((1 << Width) - 1) <<
Offset);
244 auto NewInfo = std::make_unique<BlockData>();
251 bool RequirePending =
true;
255 if (
MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
256 MI.getOpcode() == AMDGPU::S_SETREG_B32_mode ||
257 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
258 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
262 unsigned Dst =
TII->getNamedOperand(
MI, AMDGPU::OpName::simm16)->getImm();
263 using namespace AMDGPU::Hwreg;
264 auto [
Id,
Offset, Width] = HwregEncoding::decode(Dst);
268 unsigned Mask = maskTrailingOnes<unsigned>(Width) <<
Offset;
271 if (InsertionPoint) {
272 insertSetreg(
MBB, InsertionPoint,
TII, IPChange.
delta(NewInfo->Change));
273 InsertionPoint =
nullptr;
278 if (
MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
279 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
280 unsigned Val =
TII->getNamedOperand(
MI, AMDGPU::OpName::imm)->getImm();
285 RequirePending =
false;
286 NewInfo->Change = NewInfo->Change.merge(Setreg);
288 NewInfo->Change = NewInfo->Change.mergeUnknown(Mask);
290 }
else if (!NewInfo->Change.isCompatible(InstrMode)) {
293 if (InsertionPoint) {
298 if (RequirePending) {
302 NewInfo->FirstInsertionPoint = InsertionPoint;
303 NewInfo->Require = NewInfo->Change;
304 RequirePending =
false;
306 insertSetreg(
MBB, InsertionPoint,
TII,
307 IPChange.
delta(NewInfo->Change));
308 IPChange = NewInfo->Change;
311 InsertionPoint = &
MI;
313 NewInfo->Change = NewInfo->Change.merge(InstrMode);
317 InsertionPoint = &
MI;
318 IPChange = NewInfo->Change;
319 NewInfo->Change = NewInfo->Change.
merge(InstrMode);
323 if (RequirePending) {
326 NewInfo->FirstInsertionPoint = InsertionPoint;
327 NewInfo->Require = NewInfo->Change;
328 }
else if (InsertionPoint) {
330 insertSetreg(
MBB, InsertionPoint,
TII, IPChange.
delta(NewInfo->Change));
332 NewInfo->Exit = NewInfo->Change;
342 bool RevisitRequired =
false;
343 bool ExitSet =
false;
347 BlockInfo[ThisBlock]->Pred = DefaultStatus;
362 unsigned PredBlock =
PB.getNumber();
363 if ((ThisBlock == PredBlock) && (std::next(
P) == E)) {
364 BlockInfo[ThisBlock]->Pred = DefaultStatus;
366 }
else if (BlockInfo[PredBlock]->ExitSet) {
367 BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
369 }
else if (PredBlock != ThisBlock)
370 RevisitRequired =
true;
372 for (
P = std::next(
P);
P != E;
P = std::next(
P)) {
375 if (BlockInfo[PredBlock]->ExitSet) {
376 if (BlockInfo[ThisBlock]->ExitSet) {
377 BlockInfo[ThisBlock]->Pred =
378 BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[PredBlock]->Exit);
380 BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
383 }
else if (PredBlock != ThisBlock)
384 RevisitRequired =
true;
388 BlockInfo[ThisBlock]->Pred.
merge(BlockInfo[ThisBlock]->Change);
389 if (BlockInfo[ThisBlock]->Exit != TmpStatus) {
390 BlockInfo[ThisBlock]->Exit = TmpStatus;
394 Phase2List.push(Succ);
396 BlockInfo[ThisBlock]->ExitSet = ExitSet;
398 Phase2List.push(&
MBB);
407 if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {
409 BlockInfo[ThisBlock]->Pred.
delta(BlockInfo[ThisBlock]->Require);
410 if (BlockInfo[ThisBlock]->FirstInsertionPoint)
411 insertSetreg(
MBB, BlockInfo[ThisBlock]->FirstInsertionPoint,
TII, Delta);
424 if (
F.hasFnAttribute(llvm::Attribute::StrictFP))
435 processBlockPhase1(BB,
TII);
441 Phase2List.push(&BB);
442 while (!Phase2List.empty()) {
443 processBlockPhase2(*Phase2List.front(),
TII);
450 processBlockPhase3(BB,
TII);
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
#define FP_ROUND_MODE_DP(x)
#define FP_ROUND_ROUND_TO_NEAREST
#define FP_ROUND_ROUND_TO_ZERO
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
MachineInstr * FirstInsertionPoint
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
FunctionPass class - This class is used to implement most global optimizations.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
SmallVectorImpl< MachineBasicBlock * >::iterator pred_iterator
pred_iterator pred_begin()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
MachineInstr & instr_front()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
LLVM Value Representation.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createSIModeRegisterPass()
Status delta(const Status &S) const
Status(unsigned NewMask, unsigned NewMode)
bool isCombinable(Status &S)
bool operator==(const Status &S) const
bool isCompatible(Status &S)
Status merge(const Status &S) const
Status intersect(const Status &S) const
bool operator!=(const Status &S) const
Status mergeUnknown(unsigned newMask)