23 #define DEBUG_TYPE "si-mode-register"
25 STATISTIC(NumSetregInserted,
"Number of setreg of mode register inserted.");
56 unsigned NewMask = (
Mask &
S.Mask) & (
Mode ^ ~
S.Mode);
57 unsigned NewMode = (
Mode & NewMask);
58 return Status(NewMask, NewMode);
73 return ((
Mask &
S.Mask) ==
S.Mask) && ((
Mode &
S.Mask) ==
S.Mode);
114 std::vector<std::unique_ptr<BlockData>> BlockInfo;
115 std::queue<MachineBasicBlock *> Phase2List;
126 bool Changed =
false;
152 "Insert required mode register values",
false,
false)
154 char SIModeRegister::
ID = 0;
166 if (
TII->usesFPDPRounding(
MI) ||
167 MI.getOpcode() == AMDGPU::FPTRUNC_UPWARD_PSEUDO ||
168 MI.getOpcode() == AMDGPU::FPTRUNC_DOWNWARD_PSEUDO) {
169 switch (
MI.getOpcode()) {
170 case AMDGPU::V_INTERP_P1LL_F16:
171 case AMDGPU::V_INTERP_P1LV_F16:
172 case AMDGPU::V_INTERP_P2_F16:
176 case AMDGPU::FPTRUNC_UPWARD_PSEUDO: {
178 MI.setDesc(
TII->get(AMDGPU::V_CVT_F16_F32_e32));
182 case AMDGPU::FPTRUNC_DOWNWARD_PSEUDO: {
184 MI.setDesc(
TII->get(AMDGPU::V_CVT_F16_F32_e32));
189 return DefaultStatus;
202 while (InstrMode.
Mask) {
203 unsigned Offset = countTrailingZeros<unsigned>(InstrMode.
Mask);
204 unsigned Width = countTrailingOnes<unsigned>(InstrMode.
Mask >> Offset);
238 auto NewInfo = std::make_unique<BlockData>();
245 bool RequirePending =
true;
249 if (
MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
250 MI.getOpcode() == AMDGPU::S_SETREG_B32_mode ||
251 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
252 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
256 unsigned Dst =
TII->getNamedOperand(
MI, AMDGPU::OpName::simm16)->getImm();
269 if (InsertionPoint) {
270 insertSetreg(
MBB, InsertionPoint,
TII, IPChange.
delta(NewInfo->Change));
271 InsertionPoint =
nullptr;
276 if (
MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
277 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
278 unsigned Val =
TII->getNamedOperand(
MI, AMDGPU::OpName::imm)->getImm();
283 RequirePending =
false;
284 NewInfo->Change = NewInfo->Change.merge(Setreg);
286 NewInfo->Change = NewInfo->Change.mergeUnknown(
Mask);
288 }
else if (!NewInfo->Change.isCompatible(InstrMode)) {
291 if (InsertionPoint) {
296 if (RequirePending) {
300 NewInfo->FirstInsertionPoint = InsertionPoint;
301 NewInfo->Require = NewInfo->Change;
302 RequirePending =
false;
304 insertSetreg(
MBB, InsertionPoint,
TII,
305 IPChange.
delta(NewInfo->Change));
306 IPChange = NewInfo->Change;
309 InsertionPoint = &
MI;
311 NewInfo->Change = NewInfo->Change.merge(InstrMode);
315 InsertionPoint = &
MI;
316 IPChange = NewInfo->Change;
317 NewInfo->Change = NewInfo->Change.
merge(InstrMode);
321 if (RequirePending) {
324 NewInfo->FirstInsertionPoint = InsertionPoint;
325 NewInfo->Require = NewInfo->Change;
326 }
else if (InsertionPoint) {
328 insertSetreg(
MBB, InsertionPoint,
TII, IPChange.
delta(NewInfo->Change));
330 NewInfo->Exit = NewInfo->Change;
340 bool RevisitRequired =
false;
341 bool ExitSet =
false;
345 BlockInfo[ThisBlock]->Pred = DefaultStatus;
360 unsigned PredBlock =
PB.getNumber();
361 if ((ThisBlock == PredBlock) && (std::next(
P) ==
E)) {
362 BlockInfo[ThisBlock]->Pred = DefaultStatus;
364 }
else if (BlockInfo[PredBlock]->ExitSet) {
365 BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
367 }
else if (PredBlock != ThisBlock)
368 RevisitRequired =
true;
370 for (
P = std::next(
P);
P !=
E;
P = std::next(
P)) {
373 if (BlockInfo[PredBlock]->ExitSet) {
374 if (BlockInfo[ThisBlock]->ExitSet) {
375 BlockInfo[ThisBlock]->Pred =
376 BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[PredBlock]->Exit);
378 BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
381 }
else if (PredBlock != ThisBlock)
382 RevisitRequired =
true;
386 BlockInfo[ThisBlock]->Pred.
merge(BlockInfo[ThisBlock]->Change);
387 if (BlockInfo[ThisBlock]->Exit != TmpStatus) {
388 BlockInfo[ThisBlock]->Exit = TmpStatus;
392 Phase2List.push(Succ);
394 BlockInfo[ThisBlock]->ExitSet = ExitSet;
396 Phase2List.push(&
MBB);
405 if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {
407 BlockInfo[ThisBlock]->Pred.
delta(BlockInfo[ThisBlock]->Require);
408 if (BlockInfo[ThisBlock]->FirstInsertionPoint)
409 insertSetreg(
MBB, BlockInfo[ThisBlock]->FirstInsertionPoint,
TII, Delta);
425 processBlockPhase1(
BB,
TII);
431 Phase2List.push(&
BB);
432 while (!Phase2List.empty()) {
433 processBlockPhase2(*Phase2List.front(),
TII);
440 processBlockPhase3(
BB,
TII);