LLVM  15.0.0git
GCNDPPCombine.cpp
Go to the documentation of this file.
1 //=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
9 // operand. If any of the use instruction cannot be combined with the mov the
10 // whole sequence is reverted.
11 //
12 // $old = ...
13 // $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
14 // dpp_controls..., $row_mask, $bank_mask, $bound_ctrl
15 // $res = VALU $dpp_value [, src1]
16 //
17 // to
18 //
19 // $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,]
20 // dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl
21 //
22 // Combining rules :
23 //
24 // if $row_mask and $bank_mask are fully enabled (0xF) and
25 // $bound_ctrl==DPP_BOUND_ZERO or $old==0
26 // -> $combined_old = undef,
27 // $combined_bound_ctrl = DPP_BOUND_ZERO
28 //
29 // if the VALU op is binary and
30 // $bound_ctrl==DPP_BOUND_OFF and
31 // $old==identity value (immediate) for the VALU op
32 // -> $combined_old = src1,
33 // $combined_bound_ctrl = DPP_BOUND_OFF
34 //
35 // Otherwise cancel.
36 //
37 // The mov_dpp instruction should reside in the same BB as all its uses
38 //===----------------------------------------------------------------------===//
39 
40 #include "AMDGPU.h"
41 #include "GCNSubtarget.h"
43 #include "llvm/ADT/Statistic.h"
45 
46 using namespace llvm;
47 
48 #define DEBUG_TYPE "gcn-dpp-combine"
49 
50 STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");
51 
52 namespace {
53 
54 class GCNDPPCombine : public MachineFunctionPass {
56  const SIInstrInfo *TII;
57  const GCNSubtarget *ST;
58 
60 
61  MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const;
62 
63  MachineInstr *createDPPInst(MachineInstr &OrigMI, MachineInstr &MovMI,
64  RegSubRegPair CombOldVGPR,
65  MachineOperand *OldOpnd, bool CombBCZ,
66  bool IsShrinkable) const;
67 
68  MachineInstr *createDPPInst(MachineInstr &OrigMI, MachineInstr &MovMI,
69  RegSubRegPair CombOldVGPR, bool CombBCZ,
70  bool IsShrinkable) const;
71 
72  bool hasNoImmOrEqual(MachineInstr &MI,
73  unsigned OpndName,
74  int64_t Value,
75  int64_t Mask = -1) const;
76 
77  bool combineDPPMov(MachineInstr &MI) const;
78 
79 public:
80  static char ID;
81 
82  GCNDPPCombine() : MachineFunctionPass(ID) {
84  }
85 
86  bool runOnMachineFunction(MachineFunction &MF) override;
87 
88  StringRef getPassName() const override { return "GCN DPP Combine"; }
89 
90  void getAnalysisUsage(AnalysisUsage &AU) const override {
91  AU.setPreservesCFG();
93  }
94 
95  MachineFunctionProperties getRequiredProperties() const override {
98  }
99 
100 private:
101  int getDPPOp(unsigned Op, bool IsShrinkable) const;
102  bool isShrinkable(MachineInstr &MI) const;
103 };
104 
105 } // end anonymous namespace
106 
107 INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
108 
109 char GCNDPPCombine::ID = 0;
110 
111 char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
112 
114  return new GCNDPPCombine();
115 }
116 
117 bool GCNDPPCombine::isShrinkable(MachineInstr &MI) const {
118  unsigned Op = MI.getOpcode();
119  if (!TII->isVOP3(Op)) {
120  return false;
121  }
122  if (!TII->hasVALU32BitEncoding(Op)) {
123  LLVM_DEBUG(dbgs() << " Inst hasn't e32 equivalent\n");
124  return false;
125  }
126  if (const auto *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) {
127  // Give up if there are any uses of the carry-out from instructions like
128  // V_ADD_CO_U32. The shrunken form of the instruction would write it to vcc
129  // instead of to a virtual register.
130  if (!MRI->use_nodbg_empty(SDst->getReg()))
131  return false;
132  }
133  // check if other than abs|neg modifiers are set (opsel for example)
134  const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG);
135  if (!hasNoImmOrEqual(MI, AMDGPU::OpName::src0_modifiers, 0, Mask) ||
136  !hasNoImmOrEqual(MI, AMDGPU::OpName::src1_modifiers, 0, Mask) ||
137  !hasNoImmOrEqual(MI, AMDGPU::OpName::clamp, 0) ||
138  !hasNoImmOrEqual(MI, AMDGPU::OpName::omod, 0)) {
139  LLVM_DEBUG(dbgs() << " Inst has non-default modifiers\n");
140  return false;
141  }
142  return true;
143 }
144 
145 int GCNDPPCombine::getDPPOp(unsigned Op, bool IsShrinkable) const {
146  auto DPP32 = AMDGPU::getDPPOp32(Op);
147  if (IsShrinkable) {
148  assert(DPP32 == -1);
149  auto E32 = AMDGPU::getVOPe32(Op);
150  DPP32 = (E32 == -1) ? -1 : AMDGPU::getDPPOp32(E32);
151  }
152  return (DPP32 == -1 || TII->pseudoToMCOpcode(DPP32) == -1) ? -1 : DPP32;
153 }
154 
155 // tracks the register operand definition and returns:
156 // 1. immediate operand used to initialize the register if found
157 // 2. nullptr if the register operand is undef
158 // 3. the operand itself otherwise
159 MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {
160  auto *Def = getVRegSubRegDef(getRegSubRegPair(OldOpnd), *MRI);
161  if (!Def)
162  return nullptr;
163 
164  switch(Def->getOpcode()) {
165  default: break;
166  case AMDGPU::IMPLICIT_DEF:
167  return nullptr;
168  case AMDGPU::COPY:
169  case AMDGPU::V_MOV_B32_e32:
170  case AMDGPU::V_MOV_B64_PSEUDO:
171  case AMDGPU::V_MOV_B64_e32:
172  case AMDGPU::V_MOV_B64_e64: {
173  auto &Op1 = Def->getOperand(1);
174  if (Op1.isImm())
175  return &Op1;
176  break;
177  }
178  }
179  return &OldOpnd;
180 }
181 
182 MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
183  MachineInstr &MovMI,
184  RegSubRegPair CombOldVGPR,
185  bool CombBCZ,
186  bool IsShrinkable) const {
187  assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp ||
188  MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp ||
189  MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
190 
191  auto OrigOp = OrigMI.getOpcode();
192  auto DPPOp = getDPPOp(OrigOp, IsShrinkable);
193  if (DPPOp == -1) {
194  LLVM_DEBUG(dbgs() << " failed: no DPP opcode\n");
195  return nullptr;
196  }
197 
198  auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
199  OrigMI.getDebugLoc(), TII->get(DPPOp))
200  .setMIFlags(OrigMI.getFlags());
201 
202  bool Fail = false;
203  do {
204  auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst);
205  assert(Dst);
206  DPPInst.add(*Dst);
207  int NumOperands = 1;
208 
209  const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
210  if (OldIdx != -1) {
211  assert(OldIdx == NumOperands);
213  CombOldVGPR,
214  *MRI->getRegClass(
215  TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg()),
216  *MRI));
217  auto *Def = getVRegSubRegDef(CombOldVGPR, *MRI);
218  DPPInst.addReg(CombOldVGPR.Reg, Def ? 0 : RegState::Undef,
219  CombOldVGPR.SubReg);
220  ++NumOperands;
221  } else {
222  // TODO: this discards MAC/FMA instructions for now, let's add it later
223  LLVM_DEBUG(dbgs() << " failed: no old operand in DPP instruction,"
224  " TBD\n");
225  Fail = true;
226  break;
227  }
228 
229  if (auto *Mod0 = TII->getNamedOperand(OrigMI,
230  AMDGPU::OpName::src0_modifiers)) {
231  assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
232  AMDGPU::OpName::src0_modifiers));
233  assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
234  DPPInst.addImm(Mod0->getImm());
235  ++NumOperands;
236  } else if (AMDGPU::getNamedOperandIdx(DPPOp,
237  AMDGPU::OpName::src0_modifiers) != -1) {
238  DPPInst.addImm(0);
239  ++NumOperands;
240  }
241  auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
242  assert(Src0);
243  if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
244  LLVM_DEBUG(dbgs() << " failed: src0 is illegal\n");
245  Fail = true;
246  break;
247  }
248  DPPInst.add(*Src0);
249  DPPInst->getOperand(NumOperands).setIsKill(false);
250  ++NumOperands;
251 
252  if (auto *Mod1 = TII->getNamedOperand(OrigMI,
253  AMDGPU::OpName::src1_modifiers)) {
254  assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
255  AMDGPU::OpName::src1_modifiers));
256  assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
257  DPPInst.addImm(Mod1->getImm());
258  ++NumOperands;
259  } else if (AMDGPU::getNamedOperandIdx(DPPOp,
260  AMDGPU::OpName::src1_modifiers) != -1) {
261  DPPInst.addImm(0);
262  ++NumOperands;
263  }
264  if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
265  if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
266  LLVM_DEBUG(dbgs() << " failed: src1 is illegal\n");
267  Fail = true;
268  break;
269  }
270  DPPInst.add(*Src1);
271  ++NumOperands;
272  }
273 
274  if (auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2)) {
275  if (!TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) ||
276  !TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
277  LLVM_DEBUG(dbgs() << " failed: src2 is illegal\n");
278  Fail = true;
279  break;
280  }
281  DPPInst.add(*Src2);
282  }
283 
284  DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
285  DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
286  DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
287  DPPInst.addImm(CombBCZ ? 1 : 0);
288  } while (false);
289 
290  if (Fail) {
291  DPPInst.getInstr()->eraseFromParent();
292  return nullptr;
293  }
294  LLVM_DEBUG(dbgs() << " combined: " << *DPPInst.getInstr());
295  return DPPInst.getInstr();
296 }
297 
298 static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) {
299  assert(OldOpnd->isImm());
300  switch (OrigMIOp) {
301  default: break;
302  case AMDGPU::V_ADD_U32_e32:
303  case AMDGPU::V_ADD_U32_e64:
304  case AMDGPU::V_ADD_CO_U32_e32:
305  case AMDGPU::V_ADD_CO_U32_e64:
306  case AMDGPU::V_OR_B32_e32:
307  case AMDGPU::V_OR_B32_e64:
308  case AMDGPU::V_SUBREV_U32_e32:
309  case AMDGPU::V_SUBREV_U32_e64:
310  case AMDGPU::V_SUBREV_CO_U32_e32:
311  case AMDGPU::V_SUBREV_CO_U32_e64:
312  case AMDGPU::V_MAX_U32_e32:
313  case AMDGPU::V_MAX_U32_e64:
314  case AMDGPU::V_XOR_B32_e32:
315  case AMDGPU::V_XOR_B32_e64:
316  if (OldOpnd->getImm() == 0)
317  return true;
318  break;
319  case AMDGPU::V_AND_B32_e32:
320  case AMDGPU::V_AND_B32_e64:
321  case AMDGPU::V_MIN_U32_e32:
322  case AMDGPU::V_MIN_U32_e64:
323  if (static_cast<uint32_t>(OldOpnd->getImm()) ==
325  return true;
326  break;
327  case AMDGPU::V_MIN_I32_e32:
328  case AMDGPU::V_MIN_I32_e64:
329  if (static_cast<int32_t>(OldOpnd->getImm()) ==
331  return true;
332  break;
333  case AMDGPU::V_MAX_I32_e32:
334  case AMDGPU::V_MAX_I32_e64:
335  if (static_cast<int32_t>(OldOpnd->getImm()) ==
337  return true;
338  break;
339  case AMDGPU::V_MUL_I32_I24_e32:
340  case AMDGPU::V_MUL_I32_I24_e64:
341  case AMDGPU::V_MUL_U32_U24_e32:
342  case AMDGPU::V_MUL_U32_U24_e64:
343  if (OldOpnd->getImm() == 1)
344  return true;
345  break;
346  }
347  return false;
348 }
349 
350 MachineInstr *GCNDPPCombine::createDPPInst(
351  MachineInstr &OrigMI, MachineInstr &MovMI, RegSubRegPair CombOldVGPR,
352  MachineOperand *OldOpndValue, bool CombBCZ, bool IsShrinkable) const {
353  assert(CombOldVGPR.Reg);
354  if (!CombBCZ && OldOpndValue && OldOpndValue->isImm()) {
355  auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
356  if (!Src1 || !Src1->isReg()) {
357  LLVM_DEBUG(dbgs() << " failed: no src1 or it isn't a register\n");
358  return nullptr;
359  }
360  if (!isIdentityValue(OrigMI.getOpcode(), OldOpndValue)) {
361  LLVM_DEBUG(dbgs() << " failed: old immediate isn't an identity\n");
362  return nullptr;
363  }
364  CombOldVGPR = getRegSubRegPair(*Src1);
365  auto MovDst = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
366  const TargetRegisterClass *RC = MRI->getRegClass(MovDst->getReg());
367  if (!isOfRegClass(CombOldVGPR, *RC, *MRI)) {
368  LLVM_DEBUG(dbgs() << " failed: src1 has wrong register class\n");
369  return nullptr;
370  }
371  }
372  return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ, IsShrinkable);
373 }
374 
375 // returns true if MI doesn't have OpndName immediate operand or the
376 // operand has Value
377 bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
378  int64_t Value, int64_t Mask) const {
379  auto *Imm = TII->getNamedOperand(MI, OpndName);
380  if (!Imm)
381  return true;
382 
383  assert(Imm->isImm());
384  return (Imm->getImm() & Mask) == Value;
385 }
386 
387 bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
388  assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp ||
389  MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp ||
390  MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
391  LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
392 
393  auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
394  assert(DstOpnd && DstOpnd->isReg());
395  auto DPPMovReg = DstOpnd->getReg();
396  if (DPPMovReg.isPhysical()) {
397  LLVM_DEBUG(dbgs() << " failed: dpp move writes physreg\n");
398  return false;
399  }
400  if (execMayBeModifiedBeforeAnyUse(*MRI, DPPMovReg, MovMI)) {
401  LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same"
402  " for all uses\n");
403  return false;
404  }
405 
406  if (MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
407  MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp) {
408  auto *DppCtrl = TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl);
409  assert(DppCtrl && DppCtrl->isImm());
410  if (!AMDGPU::isLegal64BitDPPControl(DppCtrl->getImm())) {
411  LLVM_DEBUG(dbgs() << " failed: 64 bit dpp move uses unsupported"
412  " control value\n");
413  // Let it split, then control may become legal.
414  return false;
415  }
416  }
417 
418  auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
419  assert(RowMaskOpnd && RowMaskOpnd->isImm());
420  auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
421  assert(BankMaskOpnd && BankMaskOpnd->isImm());
422  const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF &&
423  BankMaskOpnd->getImm() == 0xF;
424 
425  auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
426  assert(BCZOpnd && BCZOpnd->isImm());
427  bool BoundCtrlZero = BCZOpnd->getImm();
428 
429  auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
430  auto *SrcOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
431  assert(OldOpnd && OldOpnd->isReg());
432  assert(SrcOpnd && SrcOpnd->isReg());
433  if (OldOpnd->getReg().isPhysical() || SrcOpnd->getReg().isPhysical()) {
434  LLVM_DEBUG(dbgs() << " failed: dpp move reads physreg\n");
435  return false;
436  }
437 
438  auto * const OldOpndValue = getOldOpndValue(*OldOpnd);
439  // OldOpndValue is either undef (IMPLICIT_DEF) or immediate or something else
440  // We could use: assert(!OldOpndValue || OldOpndValue->isImm())
441  // but the third option is used to distinguish undef from non-immediate
442  // to reuse IMPLICIT_DEF instruction later
443  assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd);
444 
445  bool CombBCZ = false;
446 
447  if (MaskAllLanes && BoundCtrlZero) { // [1]
448  CombBCZ = true;
449  } else {
450  if (!OldOpndValue || !OldOpndValue->isImm()) {
451  LLVM_DEBUG(dbgs() << " failed: the DPP mov isn't combinable\n");
452  return false;
453  }
454 
455  if (OldOpndValue->getImm() == 0) {
456  if (MaskAllLanes) {
457  assert(!BoundCtrlZero); // by check [1]
458  CombBCZ = true;
459  }
460  } else if (BoundCtrlZero) {
461  assert(!MaskAllLanes); // by check [1]
462  LLVM_DEBUG(dbgs() <<
463  " failed: old!=0 and bctrl:0 and not all lanes isn't combinable\n");
464  return false;
465  }
466  }
467 
468  LLVM_DEBUG(dbgs() << " old=";
469  if (!OldOpndValue)
470  dbgs() << "undef";
471  else
472  dbgs() << *OldOpndValue;
473  dbgs() << ", bound_ctrl=" << CombBCZ << '\n');
474 
475  SmallVector<MachineInstr*, 4> OrigMIs, DPPMIs;
477  auto CombOldVGPR = getRegSubRegPair(*OldOpnd);
478  // try to reuse previous old reg if its undefined (IMPLICIT_DEF)
479  if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef
480  const TargetRegisterClass *RC = MRI->getRegClass(DPPMovReg);
481  CombOldVGPR = RegSubRegPair(
483  auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(),
484  TII->get(AMDGPU::IMPLICIT_DEF), CombOldVGPR.Reg);
485  DPPMIs.push_back(UndefInst.getInstr());
486  }
487 
488  OrigMIs.push_back(&MovMI);
489  bool Rollback = true;
491 
492  for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) {
493  Uses.push_back(&Use);
494  }
495 
496  while (!Uses.empty()) {
497  MachineOperand *Use = Uses.pop_back_val();
498  Rollback = true;
499 
500  auto &OrigMI = *Use->getParent();
501  LLVM_DEBUG(dbgs() << " try: " << OrigMI);
502 
503  auto OrigOp = OrigMI.getOpcode();
504  if (OrigOp == AMDGPU::REG_SEQUENCE) {
505  Register FwdReg = OrigMI.getOperand(0).getReg();
506  unsigned FwdSubReg = 0;
507 
508  if (execMayBeModifiedBeforeAnyUse(*MRI, FwdReg, OrigMI)) {
509  LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same"
510  " for all uses\n");
511  break;
512  }
513 
514  unsigned OpNo, E = OrigMI.getNumOperands();
515  for (OpNo = 1; OpNo < E; OpNo += 2) {
516  if (OrigMI.getOperand(OpNo).getReg() == DPPMovReg) {
517  FwdSubReg = OrigMI.getOperand(OpNo + 1).getImm();
518  break;
519  }
520  }
521 
522  if (!FwdSubReg)
523  break;
524 
525  for (auto &Op : MRI->use_nodbg_operands(FwdReg)) {
526  if (Op.getSubReg() == FwdSubReg)
527  Uses.push_back(&Op);
528  }
529  RegSeqWithOpNos[&OrigMI].push_back(OpNo);
530  continue;
531  }
532 
533  bool IsShrinkable = isShrinkable(OrigMI);
534  if (!(IsShrinkable || TII->isVOP1(OrigOp) || TII->isVOP2(OrigOp))) {
535  LLVM_DEBUG(dbgs() << " failed: not VOP1/2/3\n");
536  break;
537  }
538 
539  auto *Src0 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0);
540  auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
541  if (Use != Src0 && !(Use == Src1 && OrigMI.isCommutable())) { // [1]
542  LLVM_DEBUG(dbgs() << " failed: no suitable operands\n");
543  break;
544  }
545 
546  assert(Src0 && "Src1 without Src0?");
547  if (Src1 && Src1->isIdenticalTo(*Src0)) {
548  assert(Src1->isReg());
549  LLVM_DEBUG(
550  dbgs()
551  << " " << OrigMI
552  << " failed: DPP register is used more than once per instruction\n");
553  break;
554  }
555 
556  LLVM_DEBUG(dbgs() << " combining: " << OrigMI);
557  if (Use == Src0) {
558  if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
559  OldOpndValue, CombBCZ, IsShrinkable)) {
560  DPPMIs.push_back(DPPInst);
561  Rollback = false;
562  }
563  } else {
564  assert(Use == Src1 && OrigMI.isCommutable()); // by check [1]
565  auto *BB = OrigMI.getParent();
566  auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI);
567  BB->insert(OrigMI, NewMI);
568  if (TII->commuteInstruction(*NewMI)) {
569  LLVM_DEBUG(dbgs() << " commuted: " << *NewMI);
570  if (auto *DPPInst =
571  createDPPInst(*NewMI, MovMI, CombOldVGPR, OldOpndValue, CombBCZ,
572  IsShrinkable)) {
573  DPPMIs.push_back(DPPInst);
574  Rollback = false;
575  }
576  } else
577  LLVM_DEBUG(dbgs() << " failed: cannot be commuted\n");
578  NewMI->eraseFromParent();
579  }
580  if (Rollback)
581  break;
582  OrigMIs.push_back(&OrigMI);
583  }
584 
585  Rollback |= !Uses.empty();
586 
587  for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs))
588  MI->eraseFromParent();
589 
590  if (!Rollback) {
591  for (auto &S : RegSeqWithOpNos) {
592  if (MRI->use_nodbg_empty(S.first->getOperand(0).getReg())) {
593  S.first->eraseFromParent();
594  continue;
595  }
596  while (!S.second.empty())
597  S.first->getOperand(S.second.pop_back_val()).setIsUndef(true);
598  }
599  }
600 
601  return !Rollback;
602 }
603 
604 bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
605  ST = &MF.getSubtarget<GCNSubtarget>();
606  if (!ST->hasDPP() || skipFunction(MF.getFunction()))
607  return false;
608 
609  MRI = &MF.getRegInfo();
610  TII = ST->getInstrInfo();
611 
612  bool Changed = false;
613  for (auto &MBB : MF) {
615  if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
616  Changed = true;
617  ++NumDPPMovsCombined;
618  } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
619  MI.getOpcode() == AMDGPU::V_MOV_B64_dpp) {
620  if (ST->has64BitDPP() && combineDPPMov(MI)) {
621  Changed = true;
622  ++NumDPPMovsCombined;
623  } else {
624  auto Split = TII->expandMovDPP64(MI);
625  for (auto M : { Split.first, Split.second }) {
626  if (M && combineDPPMov(*M))
627  ++NumDPPMovsCombined;
628  }
629  Changed = true;
630  }
631  }
632  }
633  }
634  return Changed;
635 }
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:156
llvm::getRegSubRegPair
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
Definition: SIInstrInfo.h:1192
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::SmallVector< MachineInstr *, 4 >
Statistic.h
llvm::AMDGPU::isLegal64BitDPPControl
LLVM_READNONE bool isLegal64BitDPPControl(unsigned DC)
Definition: AMDGPUBaseInfo.h:979
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
Fail
#define Fail
Definition: AArch64Disassembler.cpp:281
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:380
llvm::MachineFunctionProperties::Property::IsSSA
@ IsSSA
llvm::MachineFunctionProperties
Properties which a MachineFunction may have at a given point in time.
Definition: MachineFunction.h:127
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::AMDGPU::getNamedOperandIdx
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
llvm::SISrcMods::NEG
@ NEG
Definition: SIDefines.h:216
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:103
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::initializeGCNDPPCombinePass
void initializeGCNDPPCombinePass(PassRegistry &)
llvm::TargetInstrInfo::RegSubRegPair
A pair composed of a register and a sub-register index.
Definition: TargetInstrInfo.h:491
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:585
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MachineInstr::getFlags
uint16_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:327
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::Register::isPhysical
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:97
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:666
llvm::MachineRegisterInfo::use_nodbg_operands
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
Definition: MachineRegisterInfo.h:534
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
GCNSubtarget.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:546
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:501
old
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM ID Predecessors according to mbb< bb27, 0x8b0a7c0 > Note ADDri is not a two address instruction its result reg1037 is an operand of the PHI node in bb76 and its operand reg1039 is the result of the PHI node We should treat it as a two address code and make sure the ADDri is scheduled after any node that reads reg1039 Use info(i.e. register scavenger) to assign it a free register to allow reuse the collector could move the objects and invalidate the derived pointer This is bad enough in the first but safe points can crop up unpredictably **array_addr i32 n old
Definition: README.txt:123
INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:37
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::AMDGPU::getDPPOp32
LLVM_READONLY int getDPPOp32(uint16_t Opcode)
llvm::AMDGPU::getVOPe32
LLVM_READONLY int getVOPe32(uint16_t Opcode)
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::MachineFunctionProperties::set
MachineFunctionProperties & set(Property P)
Definition: MachineFunction.h:196
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::getVRegSubRegDef
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
Definition: SIInstrInfo.cpp:7981
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:642
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:656
llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:420
AMDGPUMCTargetDesc.h
llvm::TargetInstrInfo::RegSubRegPair::Reg
Register Reg
Definition: TargetInstrInfo.h:492
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:320
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::execMayBeModifiedBeforeAnyUse
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
Definition: SIInstrInfo.cpp:8049
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::MachineInstr::isCommutable
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
Definition: MachineInstr.h:1064
llvm::AMDGPU::DPP::DppCtrl
DppCtrl
Definition: SIDefines.h:798
llvm::DenseMap
Definition: DenseMap.h:716
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:618
MachineFunctionPass.h
llvm::GCNDPPCombineID
char & GCNDPPCombineID
Definition: GCNDPPCombine.cpp:111
RegSubRegPair
TargetInstrInfo::RegSubRegPair RegSubRegPair
Definition: PeepholeOptimizer.cpp:100
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
isIdentityValue
static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd)
Definition: GCNDPPCombine.cpp:298
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::MachineRegisterInfo::use_nodbg_empty
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
Definition: MachineRegisterInfo.h:574
llvm::TargetInstrInfo::RegSubRegPair::SubReg
unsigned SubReg
Definition: TargetInstrInfo.h:493
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
AMDGPU.h
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:491
uint32_t
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:288
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::createGCNDPPCombinePass
FunctionPass * createGCNDPPCombinePass()
Definition: GCNDPPCombine.cpp:113
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::SISrcMods::ABS
@ ABS
Definition: SIDefines.h:217
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:622
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:345
llvm::misexpect::clamp
uint32_t clamp(uint64_t value, uint32_t low, uint32_t hi)
Definition: MisExpect.cpp:150
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:322
llvm::RISCVMatInt::Imm
@ Imm
Definition: RISCVMatInt.h:23
llvm::SIInstrInfo
Definition: SIInstrInfo.h:44
llvm::MachineInstrBuilder::setMIFlags
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Definition: MachineInstrBuilder.h:273
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:494
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
DEBUG_TYPE
#define DEBUG_TYPE
Definition: GCNDPPCombine.cpp:48
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::isOfRegClass
bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P, const TargetRegisterClass &TRC, MachineRegisterInfo &MRI)
Returns true if a reg:subreg pair P has a TRC class.
Definition: SIInstrInfo.h:1180
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52