LLVM  10.0.0svn
GCNDPPCombine.cpp
Go to the documentation of this file.
1 //=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
9 // operand. If any of the use instruction cannot be combined with the mov the
10 // whole sequence is reverted.
11 //
12 // $old = ...
13 // $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
14 // dpp_controls..., $row_mask, $bank_mask, $bound_ctrl
15 // $res = VALU $dpp_value [, src1]
16 //
17 // to
18 //
19 // $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,]
20 // dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl
21 //
22 // Combining rules :
23 //
24 // if $row_mask and $bank_mask are fully enabled (0xF) and
25 // $bound_ctrl==DPP_BOUND_ZERO or $old==0
26 // -> $combined_old = undef,
27 // $combined_bound_ctrl = DPP_BOUND_ZERO
28 //
29 // if the VALU op is binary and
30 // $bound_ctrl==DPP_BOUND_OFF and
31 // $old==identity value (immediate) for the VALU op
32 // -> $combined_old = src1,
33 // $combined_bound_ctrl = DPP_BOUND_OFF
34 //
35 // Otherwise cancel.
36 //
37 // The mov_dpp instruction should reside in the same BB as all its uses
38 //===----------------------------------------------------------------------===//
39 
40 #include "AMDGPU.h"
41 #include "AMDGPUSubtarget.h"
42 #include "SIInstrInfo.h"
44 #include "llvm/ADT/DenseMap.h"
45 #include "llvm/ADT/SmallVector.h"
46 #include "llvm/ADT/Statistic.h"
55 #include "llvm/Pass.h"
56 #include <cassert>
57 
58 using namespace llvm;
59 
60 #define DEBUG_TYPE "gcn-dpp-combine"
61 
62 STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");
63 
64 namespace {
65 
66 class GCNDPPCombine : public MachineFunctionPass {
68  const SIInstrInfo *TII;
69 
71 
72  MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const;
73 
74  MachineInstr *createDPPInst(MachineInstr &OrigMI,
75  MachineInstr &MovMI,
76  RegSubRegPair CombOldVGPR,
77  MachineOperand *OldOpnd,
78  bool CombBCZ) const;
79 
80  MachineInstr *createDPPInst(MachineInstr &OrigMI,
81  MachineInstr &MovMI,
82  RegSubRegPair CombOldVGPR,
83  bool CombBCZ) const;
84 
85  bool hasNoImmOrEqual(MachineInstr &MI,
86  unsigned OpndName,
87  int64_t Value,
88  int64_t Mask = -1) const;
89 
90  bool combineDPPMov(MachineInstr &MI) const;
91 
92 public:
93  static char ID;
94 
95  GCNDPPCombine() : MachineFunctionPass(ID) {
97  }
98 
99  bool runOnMachineFunction(MachineFunction &MF) override;
100 
101  StringRef getPassName() const override { return "GCN DPP Combine"; }
102 
103  void getAnalysisUsage(AnalysisUsage &AU) const override {
104  AU.setPreservesCFG();
106  }
107 };
108 
109 } // end anonymous namespace
110 
111 INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
112 
113 char GCNDPPCombine::ID = 0;
114 
115 char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
116 
118  return new GCNDPPCombine();
119 }
120 
121 static int getDPPOp(unsigned Op) {
122  auto DPP32 = AMDGPU::getDPPOp32(Op);
123  if (DPP32 != -1)
124  return DPP32;
125 
126  auto E32 = AMDGPU::getVOPe32(Op);
127  return E32 != -1 ? AMDGPU::getDPPOp32(E32) : -1;
128 }
129 
130 // tracks the register operand definition and returns:
131 // 1. immediate operand used to initialize the register if found
132 // 2. nullptr if the register operand is undef
133 // 3. the operand itself otherwise
134 MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {
135  auto *Def = getVRegSubRegDef(getRegSubRegPair(OldOpnd), *MRI);
136  if (!Def)
137  return nullptr;
138 
139  switch(Def->getOpcode()) {
140  default: break;
141  case AMDGPU::IMPLICIT_DEF:
142  return nullptr;
143  case AMDGPU::COPY:
144  case AMDGPU::V_MOV_B32_e32: {
145  auto &Op1 = Def->getOperand(1);
146  if (Op1.isImm())
147  return &Op1;
148  break;
149  }
150  }
151  return &OldOpnd;
152 }
153 
154 MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
155  MachineInstr &MovMI,
156  RegSubRegPair CombOldVGPR,
157  bool CombBCZ) const {
158  assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
159 
160  auto OrigOp = OrigMI.getOpcode();
161  auto DPPOp = getDPPOp(OrigOp);
162  if (DPPOp == -1) {
163  LLVM_DEBUG(dbgs() << " failed: no DPP opcode\n");
164  return nullptr;
165  }
166 
167  auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
168  OrigMI.getDebugLoc(), TII->get(DPPOp));
169  bool Fail = false;
170  do {
171  auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst);
172  assert(Dst);
173  DPPInst.add(*Dst);
174  int NumOperands = 1;
175 
176  const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
177  if (OldIdx != -1) {
178  assert(OldIdx == NumOperands);
179  assert(isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI));
180  auto *Def = getVRegSubRegDef(CombOldVGPR, *MRI);
181  DPPInst.addReg(CombOldVGPR.Reg, Def ? 0 : RegState::Undef,
182  CombOldVGPR.SubReg);
183  ++NumOperands;
184  } else {
185  // TODO: this discards MAC/FMA instructions for now, let's add it later
186  LLVM_DEBUG(dbgs() << " failed: no old operand in DPP instruction,"
187  " TBD\n");
188  Fail = true;
189  break;
190  }
191 
192  if (auto *Mod0 = TII->getNamedOperand(OrigMI,
193  AMDGPU::OpName::src0_modifiers)) {
194  assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
195  AMDGPU::OpName::src0_modifiers));
196  assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
197  DPPInst.addImm(Mod0->getImm());
198  ++NumOperands;
199  } else if (AMDGPU::getNamedOperandIdx(DPPOp,
200  AMDGPU::OpName::src0_modifiers) != -1) {
201  DPPInst.addImm(0);
202  ++NumOperands;
203  }
204  auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
205  assert(Src0);
206  if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
207  LLVM_DEBUG(dbgs() << " failed: src0 is illegal\n");
208  Fail = true;
209  break;
210  }
211  DPPInst.add(*Src0);
212  DPPInst->getOperand(NumOperands).setIsKill(false);
213  ++NumOperands;
214 
215  if (auto *Mod1 = TII->getNamedOperand(OrigMI,
216  AMDGPU::OpName::src1_modifiers)) {
217  assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
218  AMDGPU::OpName::src1_modifiers));
219  assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
220  DPPInst.addImm(Mod1->getImm());
221  ++NumOperands;
222  } else if (AMDGPU::getNamedOperandIdx(DPPOp,
223  AMDGPU::OpName::src1_modifiers) != -1) {
224  DPPInst.addImm(0);
225  ++NumOperands;
226  }
227  if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
228  if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
229  LLVM_DEBUG(dbgs() << " failed: src1 is illegal\n");
230  Fail = true;
231  break;
232  }
233  DPPInst.add(*Src1);
234  ++NumOperands;
235  }
236 
237  if (auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2)) {
238  if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
239  LLVM_DEBUG(dbgs() << " failed: src2 is illegal\n");
240  Fail = true;
241  break;
242  }
243  DPPInst.add(*Src2);
244  }
245 
246  DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
247  DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
248  DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
249  DPPInst.addImm(CombBCZ ? 1 : 0);
250  } while (false);
251 
252  if (Fail) {
253  DPPInst.getInstr()->eraseFromParent();
254  return nullptr;
255  }
256  LLVM_DEBUG(dbgs() << " combined: " << *DPPInst.getInstr());
257  return DPPInst.getInstr();
258 }
259 
260 static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) {
261  assert(OldOpnd->isImm());
262  switch (OrigMIOp) {
263  default: break;
264  case AMDGPU::V_ADD_U32_e32:
265  case AMDGPU::V_ADD_U32_e64:
266  case AMDGPU::V_ADD_I32_e32:
267  case AMDGPU::V_ADD_I32_e64:
268  case AMDGPU::V_OR_B32_e32:
269  case AMDGPU::V_OR_B32_e64:
270  case AMDGPU::V_SUBREV_U32_e32:
271  case AMDGPU::V_SUBREV_U32_e64:
272  case AMDGPU::V_SUBREV_I32_e32:
273  case AMDGPU::V_SUBREV_I32_e64:
274  case AMDGPU::V_MAX_U32_e32:
275  case AMDGPU::V_MAX_U32_e64:
276  case AMDGPU::V_XOR_B32_e32:
277  case AMDGPU::V_XOR_B32_e64:
278  if (OldOpnd->getImm() == 0)
279  return true;
280  break;
281  case AMDGPU::V_AND_B32_e32:
282  case AMDGPU::V_AND_B32_e64:
283  case AMDGPU::V_MIN_U32_e32:
284  case AMDGPU::V_MIN_U32_e64:
285  if (static_cast<uint32_t>(OldOpnd->getImm()) ==
287  return true;
288  break;
289  case AMDGPU::V_MIN_I32_e32:
290  case AMDGPU::V_MIN_I32_e64:
291  if (static_cast<int32_t>(OldOpnd->getImm()) ==
293  return true;
294  break;
295  case AMDGPU::V_MAX_I32_e32:
296  case AMDGPU::V_MAX_I32_e64:
297  if (static_cast<int32_t>(OldOpnd->getImm()) ==
298  std::numeric_limits<int32_t>::min())
299  return true;
300  break;
301  case AMDGPU::V_MUL_I32_I24_e32:
302  case AMDGPU::V_MUL_I32_I24_e64:
303  case AMDGPU::V_MUL_U32_U24_e32:
304  case AMDGPU::V_MUL_U32_U24_e64:
305  if (OldOpnd->getImm() == 1)
306  return true;
307  break;
308  }
309  return false;
310 }
311 
312 MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
313  MachineInstr &MovMI,
314  RegSubRegPair CombOldVGPR,
315  MachineOperand *OldOpndValue,
316  bool CombBCZ) const {
317  assert(CombOldVGPR.Reg);
318  if (!CombBCZ && OldOpndValue && OldOpndValue->isImm()) {
319  auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
320  if (!Src1 || !Src1->isReg()) {
321  LLVM_DEBUG(dbgs() << " failed: no src1 or it isn't a register\n");
322  return nullptr;
323  }
324  if (!isIdentityValue(OrigMI.getOpcode(), OldOpndValue)) {
325  LLVM_DEBUG(dbgs() << " failed: old immediate isn't an identity\n");
326  return nullptr;
327  }
328  CombOldVGPR = getRegSubRegPair(*Src1);
329  if (!isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI)) {
330  LLVM_DEBUG(dbgs() << " failed: src1 isn't a VGPR32 register\n");
331  return nullptr;
332  }
333  }
334  return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ);
335 }
336 
337 // returns true if MI doesn't have OpndName immediate operand or the
338 // operand has Value
339 bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
340  int64_t Value, int64_t Mask) const {
341  auto *Imm = TII->getNamedOperand(MI, OpndName);
342  if (!Imm)
343  return true;
344 
345  assert(Imm->isImm());
346  return (Imm->getImm() & Mask) == Value;
347 }
348 
349 bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
350  assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
351  LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
352 
353  auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
354  assert(DstOpnd && DstOpnd->isReg());
355  auto DPPMovReg = DstOpnd->getReg();
356  if (DPPMovReg.isPhysical()) {
357  LLVM_DEBUG(dbgs() << " failed: dpp move writes physreg\n");
358  return false;
359  }
360  if (execMayBeModifiedBeforeAnyUse(*MRI, DPPMovReg, MovMI)) {
361  LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same"
362  " for all uses\n");
363  return false;
364  }
365 
366  auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
367  assert(RowMaskOpnd && RowMaskOpnd->isImm());
368  auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
369  assert(BankMaskOpnd && BankMaskOpnd->isImm());
370  const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF &&
371  BankMaskOpnd->getImm() == 0xF;
372 
373  auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
374  assert(BCZOpnd && BCZOpnd->isImm());
375  bool BoundCtrlZero = BCZOpnd->getImm();
376 
377  auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
378  auto *SrcOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
379  assert(OldOpnd && OldOpnd->isReg());
380  assert(SrcOpnd && SrcOpnd->isReg());
381  if (OldOpnd->getReg().isPhysical() || SrcOpnd->getReg().isPhysical()) {
382  LLVM_DEBUG(dbgs() << " failed: dpp move reads physreg\n");
383  return false;
384  }
385 
386  auto * const OldOpndValue = getOldOpndValue(*OldOpnd);
387  // OldOpndValue is either undef (IMPLICIT_DEF) or immediate or something else
388  // We could use: assert(!OldOpndValue || OldOpndValue->isImm())
389  // but the third option is used to distinguish undef from non-immediate
390  // to reuse IMPLICIT_DEF instruction later
391  assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd);
392 
393  bool CombBCZ = false;
394 
395  if (MaskAllLanes && BoundCtrlZero) { // [1]
396  CombBCZ = true;
397  } else {
398  if (!OldOpndValue || !OldOpndValue->isImm()) {
399  LLVM_DEBUG(dbgs() << " failed: the DPP mov isn't combinable\n");
400  return false;
401  }
402 
403  if (OldOpndValue->getParent()->getParent() != MovMI.getParent()) {
404  LLVM_DEBUG(dbgs() <<
405  " failed: old reg def and mov should be in the same BB\n");
406  return false;
407  }
408 
409  if (OldOpndValue->getImm() == 0) {
410  if (MaskAllLanes) {
411  assert(!BoundCtrlZero); // by check [1]
412  CombBCZ = true;
413  }
414  } else if (BoundCtrlZero) {
415  assert(!MaskAllLanes); // by check [1]
416  LLVM_DEBUG(dbgs() <<
417  " failed: old!=0 and bctrl:0 and not all lanes isn't combinable\n");
418  return false;
419  }
420  }
421 
422  LLVM_DEBUG(dbgs() << " old=";
423  if (!OldOpndValue)
424  dbgs() << "undef";
425  else
426  dbgs() << *OldOpndValue;
427  dbgs() << ", bound_ctrl=" << CombBCZ << '\n');
428 
429  SmallVector<MachineInstr*, 4> OrigMIs, DPPMIs;
431  auto CombOldVGPR = getRegSubRegPair(*OldOpnd);
432  // try to reuse previous old reg if its undefined (IMPLICIT_DEF)
433  if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef
434  CombOldVGPR = RegSubRegPair(
435  MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass));
436  auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(),
437  TII->get(AMDGPU::IMPLICIT_DEF), CombOldVGPR.Reg);
438  DPPMIs.push_back(UndefInst.getInstr());
439  }
440 
441  OrigMIs.push_back(&MovMI);
442  bool Rollback = true;
444 
445  for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) {
446  Uses.push_back(&Use);
447  }
448 
449  while (!Uses.empty()) {
450  MachineOperand *Use = Uses.pop_back_val();
451  Rollback = true;
452 
453  auto &OrigMI = *Use->getParent();
454  LLVM_DEBUG(dbgs() << " try: " << OrigMI);
455 
456  auto OrigOp = OrigMI.getOpcode();
457  if (OrigOp == AMDGPU::REG_SEQUENCE) {
458  Register FwdReg = OrigMI.getOperand(0).getReg();
459  unsigned FwdSubReg = 0;
460 
461  if (execMayBeModifiedBeforeAnyUse(*MRI, FwdReg, OrigMI)) {
462  LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same"
463  " for all uses\n");
464  break;
465  }
466 
467  unsigned OpNo, E = OrigMI.getNumOperands();
468  for (OpNo = 1; OpNo < E; OpNo += 2) {
469  if (OrigMI.getOperand(OpNo).getReg() == DPPMovReg) {
470  FwdSubReg = OrigMI.getOperand(OpNo + 1).getImm();
471  break;
472  }
473  }
474 
475  if (!FwdSubReg)
476  break;
477 
478  for (auto &Op : MRI->use_nodbg_operands(FwdReg)) {
479  if (Op.getSubReg() == FwdSubReg)
480  Uses.push_back(&Op);
481  }
482  RegSeqWithOpNos[&OrigMI].push_back(OpNo);
483  continue;
484  }
485 
486  if (TII->isVOP3(OrigOp)) {
487  if (!TII->hasVALU32BitEncoding(OrigOp)) {
488  LLVM_DEBUG(dbgs() << " failed: VOP3 hasn't e32 equivalent\n");
489  break;
490  }
491  // check if other than abs|neg modifiers are set (opsel for example)
492  const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG);
493  if (!hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src0_modifiers, 0, Mask) ||
494  !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src1_modifiers, 0, Mask) ||
495  !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::clamp, 0) ||
496  !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::omod, 0)) {
497  LLVM_DEBUG(dbgs() << " failed: VOP3 has non-default modifiers\n");
498  break;
499  }
500  } else if (!TII->isVOP1(OrigOp) && !TII->isVOP2(OrigOp)) {
501  LLVM_DEBUG(dbgs() << " failed: not VOP1/2/3\n");
502  break;
503  }
504 
505  LLVM_DEBUG(dbgs() << " combining: " << OrigMI);
506  if (Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)) {
507  if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
508  OldOpndValue, CombBCZ)) {
509  DPPMIs.push_back(DPPInst);
510  Rollback = false;
511  }
512  } else if (OrigMI.isCommutable() &&
513  Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
514  auto *BB = OrigMI.getParent();
515  auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI);
516  BB->insert(OrigMI, NewMI);
517  if (TII->commuteInstruction(*NewMI)) {
518  LLVM_DEBUG(dbgs() << " commuted: " << *NewMI);
519  if (auto *DPPInst = createDPPInst(*NewMI, MovMI, CombOldVGPR,
520  OldOpndValue, CombBCZ)) {
521  DPPMIs.push_back(DPPInst);
522  Rollback = false;
523  }
524  } else
525  LLVM_DEBUG(dbgs() << " failed: cannot be commuted\n");
526  NewMI->eraseFromParent();
527  } else
528  LLVM_DEBUG(dbgs() << " failed: no suitable operands\n");
529  if (Rollback)
530  break;
531  OrigMIs.push_back(&OrigMI);
532  }
533 
534  Rollback |= !Uses.empty();
535 
536  for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs))
537  MI->eraseFromParent();
538 
539  if (!Rollback) {
540  for (auto &S : RegSeqWithOpNos) {
541  if (MRI->use_nodbg_empty(S.first->getOperand(0).getReg())) {
542  S.first->eraseFromParent();
543  continue;
544  }
545  while (!S.second.empty())
546  S.first->getOperand(S.second.pop_back_val()).setIsUndef(true);
547  }
548  }
549 
550  return !Rollback;
551 }
552 
553 bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
554  auto &ST = MF.getSubtarget<GCNSubtarget>();
555  if (!ST.hasDPP() || skipFunction(MF.getFunction()))
556  return false;
557 
558  MRI = &MF.getRegInfo();
559  TII = ST.getInstrInfo();
560 
561  assert(MRI->isSSA() && "Must be run on SSA");
562 
563  bool Changed = false;
564  for (auto &MBB : MF) {
565  for (auto I = MBB.rbegin(), E = MBB.rend(); I != E;) {
566  auto &MI = *I++;
567  if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
568  Changed = true;
569  ++NumDPPMovsCombined;
570  } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) {
571  auto Split = TII->expandMovDPP64(MI);
572  for (auto M : { Split.first, Split.second }) {
573  if (combineDPPMov(*M))
574  ++NumDPPMovsCombined;
575  }
576  Changed = true;
577  }
578  }
579  }
580  return Changed;
581 }
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P, const TargetRegisterClass &TRC, MachineRegisterInfo &MRI)
Returns true if a reg:subreg pair P has a TRC class.
Definition: SIInstrInfo.h:1033
static int getDPPOp(unsigned Op)
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:384
iterator_range< use_nodbg_iterator > use_nodbg_operands(unsigned Reg) const
STATISTIC(NumFunctions, "Total number of functions")
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
#define Fail
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:413
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:410
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
LLVM_READONLY int getDPPOp32(uint16_t Opcode)
TargetInstrInfo::RegSubRegPair RegSubRegPair
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
char & GCNDPPCombineID
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
FunctionPass * createGCNDPPCombinePass()
Represent the analysis usage information of a pass.
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
static bool isVOP2(const MachineInstr &MI)
Definition: SIInstrInfo.h:411
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:33
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:390
LLVM_READONLY int getVOPe32(uint16_t Opcode)
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
Definition: SIInstrInfo.h:1045
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
MachineOperand class - Representation of each machine instruction operand.
A pair composed of a register and a sub-register index.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
void initializeGCNDPPCombinePass(PassRegistry &)
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
static bool isVOP3(const MachineInstr &MI)
Definition: SIInstrInfo.h:419
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:255
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Interface definition for SIInstrInfo.
#define DEBUG_TYPE
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
#define I(x, y, z)
Definition: MD5.cpp:58
bool isReg() const
isReg - Tests if this is a MO_Register operand.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:74
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
Register getReg() const
getReg - Returns the register number.
static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd)
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:415
static bool isVOP1(const MachineInstr &MI)
Definition: SIInstrInfo.h:403
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MachineInstr.h:882
Wrapper class representing virtual and physical registers.
Definition: Register.h:19