LLVM  7.0.0svn
SIPeepholeSDWA.cpp
Go to the documentation of this file.
1 //===- SIPeepholeSDWA.cpp - Peephole optimization for SDWA instructions ---===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file This pass tries to apply several peephole SDWA patterns.
11 ///
12 /// E.g. original:
13 /// V_LSHRREV_B32_e32 %0, 16, %1
14 /// V_ADD_I32_e32 %2, %0, %3
15 /// V_LSHLREV_B32_e32 %4, 16, %2
16 ///
17 /// Replace:
18 /// V_ADD_I32_sdwa %4, %1, %3
19 /// dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
20 ///
21 //===----------------------------------------------------------------------===//
22 
23 #include "AMDGPU.h"
24 #include "AMDGPUSubtarget.h"
25 #include "SIDefines.h"
26 #include "SIInstrInfo.h"
27 #include "SIRegisterInfo.h"
28 #include "Utils/AMDGPUBaseInfo.h"
29 #include "llvm/ADT/None.h"
30 #include "llvm/ADT/Optional.h"
31 #include "llvm/ADT/STLExtras.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/Statistic.h"
42 #include "llvm/MC/LaneBitmask.h"
43 #include "llvm/MC/MCInstrDesc.h"
44 #include "llvm/Pass.h"
45 #include "llvm/Support/Debug.h"
47 #include <algorithm>
48 #include <cassert>
49 #include <cstdint>
50 #include <memory>
51 #include <unordered_map>
52 
53 using namespace llvm;
54 
55 #define DEBUG_TYPE "si-peephole-sdwa"
56 
57 STATISTIC(NumSDWAPatternsFound, "Number of SDWA patterns found.");
58 STATISTIC(NumSDWAInstructionsPeepholed,
59  "Number of instruction converted to SDWA.");
60 
61 namespace {
62 
63 class SDWAOperand;
64 class SDWADstOperand;
65 
66 class SIPeepholeSDWA : public MachineFunctionPass {
67 public:
68  using SDWAOperandsVector = SmallVector<SDWAOperand *, 4>;
69 
70 private:
72  const SIRegisterInfo *TRI;
73  const SIInstrInfo *TII;
74 
75  std::unordered_map<MachineInstr *, std::unique_ptr<SDWAOperand>> SDWAOperands;
76  std::unordered_map<MachineInstr *, SDWAOperandsVector> PotentialMatches;
77  SmallVector<MachineInstr *, 8> ConvertedInstructions;
78 
79  Optional<int64_t> foldToImm(const MachineOperand &Op) const;
80 
81 public:
82  static char ID;
83 
84  SIPeepholeSDWA() : MachineFunctionPass(ID) {
86  }
87 
88  bool runOnMachineFunction(MachineFunction &MF) override;
89  void matchSDWAOperands(MachineBasicBlock &MBB);
90  std::unique_ptr<SDWAOperand> matchSDWAOperand(MachineInstr &MI);
91  bool isConvertibleToSDWA(const MachineInstr &MI, const SISubtarget &ST) const;
92  bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
93  void legalizeScalarOperands(MachineInstr &MI, const SISubtarget &ST) const;
94 
95  StringRef getPassName() const override { return "SI Peephole SDWA"; }
96 
97  void getAnalysisUsage(AnalysisUsage &AU) const override {
98  AU.setPreservesCFG();
100  }
101 };
102 
103 class SDWAOperand {
104 private:
105  MachineOperand *Target; // Operand that would be used in converted instruction
106  MachineOperand *Replaced; // Operand that would be replace by Target
107 
108 public:
109  SDWAOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp)
110  : Target(TargetOp), Replaced(ReplacedOp) {
111  assert(Target->isReg());
112  assert(Replaced->isReg());
113  }
114 
115  virtual ~SDWAOperand() = default;
116 
117  virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) = 0;
118  virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) = 0;
119 
120  MachineOperand *getTargetOperand() const { return Target; }
121  MachineOperand *getReplacedOperand() const { return Replaced; }
122  MachineInstr *getParentInst() const { return Target->getParent(); }
123 
124  MachineRegisterInfo *getMRI() const {
125  return &getParentInst()->getParent()->getParent()->getRegInfo();
126  }
127 
128 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
129  virtual void print(raw_ostream& OS) const = 0;
130  void dump() const { print(dbgs()); }
131 #endif
132 };
133 
134 using namespace AMDGPU::SDWA;
135 
136 class SDWASrcOperand : public SDWAOperand {
137 private:
138  SdwaSel SrcSel;
139  bool Abs;
140  bool Neg;
141  bool Sext;
142 
143 public:
144  SDWASrcOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
145  SdwaSel SrcSel_ = DWORD, bool Abs_ = false, bool Neg_ = false,
146  bool Sext_ = false)
147  : SDWAOperand(TargetOp, ReplacedOp),
148  SrcSel(SrcSel_), Abs(Abs_), Neg(Neg_), Sext(Sext_) {}
149 
150  MachineInstr *potentialToConvert(const SIInstrInfo *TII) override;
151  bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
152 
153  SdwaSel getSrcSel() const { return SrcSel; }
154  bool getAbs() const { return Abs; }
155  bool getNeg() const { return Neg; }
156  bool getSext() const { return Sext; }
157 
158  uint64_t getSrcMods(const SIInstrInfo *TII,
159  const MachineOperand *SrcOp) const;
160 
161 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
162  void print(raw_ostream& OS) const override;
163 #endif
164 };
165 
166 class SDWADstOperand : public SDWAOperand {
167 private:
168  SdwaSel DstSel;
169  DstUnused DstUn;
170 
171 public:
172 
173  SDWADstOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
174  SdwaSel DstSel_ = DWORD, DstUnused DstUn_ = UNUSED_PAD)
175  : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {}
176 
177  MachineInstr *potentialToConvert(const SIInstrInfo *TII) override;
178  bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
179 
180  SdwaSel getDstSel() const { return DstSel; }
181  DstUnused getDstUnused() const { return DstUn; }
182 
183 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
184  void print(raw_ostream& OS) const override;
185 #endif
186 };
187 
188 class SDWADstPreserveOperand : public SDWADstOperand {
189 private:
190  MachineOperand *Preserve;
191 
192 public:
193  SDWADstPreserveOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
194  MachineOperand *PreserveOp, SdwaSel DstSel_ = DWORD)
195  : SDWADstOperand(TargetOp, ReplacedOp, DstSel_, UNUSED_PRESERVE),
196  Preserve(PreserveOp) {}
197 
198  bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
199 
200  MachineOperand *getPreservedOperand() const { return Preserve; }
201 
202 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
203  void print(raw_ostream& OS) const override;
204 #endif
205 };
206 
207 } // end anonymous namespace
208 
209 INITIALIZE_PASS(SIPeepholeSDWA, DEBUG_TYPE, "SI Peephole SDWA", false, false)
210 
211 char SIPeepholeSDWA::ID = 0;
212 
213 char &llvm::SIPeepholeSDWAID = SIPeepholeSDWA::ID;
214 
216  return new SIPeepholeSDWA();
217 }
218 
219 
220 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
222  switch(Sel) {
223  case BYTE_0: OS << "BYTE_0"; break;
224  case BYTE_1: OS << "BYTE_1"; break;
225  case BYTE_2: OS << "BYTE_2"; break;
226  case BYTE_3: OS << "BYTE_3"; break;
227  case WORD_0: OS << "WORD_0"; break;
228  case WORD_1: OS << "WORD_1"; break;
229  case DWORD: OS << "DWORD"; break;
230  }
231  return OS;
232 }
233 
234 static raw_ostream& operator<<(raw_ostream &OS, const DstUnused &Un) {
235  switch(Un) {
236  case UNUSED_PAD: OS << "UNUSED_PAD"; break;
237  case UNUSED_SEXT: OS << "UNUSED_SEXT"; break;
238  case UNUSED_PRESERVE: OS << "UNUSED_PRESERVE"; break;
239  }
240  return OS;
241 }
242 
243 static raw_ostream& operator<<(raw_ostream &OS, const SDWAOperand &Operand) {
244  Operand.print(OS);
245  return OS;
246 }
247 
249 void SDWASrcOperand::print(raw_ostream& OS) const {
250  OS << "SDWA src: " << *getTargetOperand()
251  << " src_sel:" << getSrcSel()
252  << " abs:" << getAbs() << " neg:" << getNeg()
253  << " sext:" << getSext() << '\n';
254 }
255 
257 void SDWADstOperand::print(raw_ostream& OS) const {
258  OS << "SDWA dst: " << *getTargetOperand()
259  << " dst_sel:" << getDstSel()
260  << " dst_unused:" << getDstUnused() << '\n';
261 }
262 
265  OS << "SDWA preserve dst: " << *getTargetOperand()
266  << " dst_sel:" << getDstSel()
267  << " preserve:" << *getPreservedOperand() << '\n';
268 }
269 
270 #endif
271 
272 static void copyRegOperand(MachineOperand &To, const MachineOperand &From) {
273  assert(To.isReg() && From.isReg());
274  To.setReg(From.getReg());
275  To.setSubReg(From.getSubReg());
276  To.setIsUndef(From.isUndef());
277  if (To.isUse()) {
278  To.setIsKill(From.isKill());
279  } else {
280  To.setIsDead(From.isDead());
281  }
282 }
283 
284 static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS) {
285  return LHS.isReg() &&
286  RHS.isReg() &&
287  LHS.getReg() == RHS.getReg() &&
288  LHS.getSubReg() == RHS.getSubReg();
289 }
290 
292  const MachineRegisterInfo *MRI) {
293  if (!Reg->isReg() || !Reg->isDef())
294  return nullptr;
295 
296  MachineOperand *ResMO = nullptr;
297  for (MachineOperand &UseMO : MRI->use_nodbg_operands(Reg->getReg())) {
298  // If there exist use of subreg of Reg then return nullptr
299  if (!isSameReg(UseMO, *Reg))
300  return nullptr;
301 
302  // Check that there is only one instruction that uses Reg
303  if (!ResMO) {
304  ResMO = &UseMO;
305  } else if (ResMO->getParent() != UseMO.getParent()) {
306  return nullptr;
307  }
308  }
309 
310  return ResMO;
311 }
312 
314  const MachineRegisterInfo *MRI) {
315  if (!Reg->isReg())
316  return nullptr;
317 
318  MachineInstr *DefInstr = MRI->getUniqueVRegDef(Reg->getReg());
319  if (!DefInstr)
320  return nullptr;
321 
322  for (auto &DefMO : DefInstr->defs()) {
323  if (DefMO.isReg() && DefMO.getReg() == Reg->getReg())
324  return &DefMO;
325  }
326 
327  // Ignore implicit defs.
328  return nullptr;
329 }
330 
331 uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII,
332  const MachineOperand *SrcOp) const {
333  uint64_t Mods = 0;
334  const auto *MI = SrcOp->getParent();
335  if (TII->getNamedOperand(*MI, AMDGPU::OpName::src0) == SrcOp) {
336  if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
337  Mods = Mod->getImm();
338  }
339  } else if (TII->getNamedOperand(*MI, AMDGPU::OpName::src1) == SrcOp) {
340  if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers)) {
341  Mods = Mod->getImm();
342  }
343  }
344  if (Abs || Neg) {
345  assert(!Sext &&
346  "Float and integer src modifiers can't be set simulteniously");
347  Mods |= Abs ? SISrcMods::ABS : 0;
348  Mods ^= Neg ? SISrcMods::NEG : 0;
349  } else if (Sext) {
350  Mods |= SISrcMods::SEXT;
351  }
352 
353  return Mods;
354 }
355 
356 MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII) {
357  // For SDWA src operand potential instruction is one that use register
358  // defined by parent instruction
359  MachineOperand *PotentialMO = findSingleRegUse(getReplacedOperand(), getMRI());
360  if (!PotentialMO)
361  return nullptr;
362 
363  return PotentialMO->getParent();
364 }
365 
366 bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
367  // Find operand in instruction that matches source operand and replace it with
368  // target operand. Set corresponding src_sel
369 
370  MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
371  MachineOperand *SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel);
372  MachineOperand *SrcMods =
373  TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
374  assert(Src && (Src->isReg() || Src->isImm()));
375  if (!isSameReg(*Src, *getReplacedOperand())) {
376  // If this is not src0 then it should be src1
377  Src = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
378  SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel);
379  SrcMods = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
380 
381  assert(Src && Src->isReg());
382 
383  if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
384  MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
385  !isSameReg(*Src, *getReplacedOperand())) {
386  // In case of v_mac_f16/32_sdwa this pass can try to apply src operand to
387  // src2. This is not allowed.
388  return false;
389  }
390 
391  assert(isSameReg(*Src, *getReplacedOperand()) && SrcSel && SrcMods);
392  }
393  copyRegOperand(*Src, *getTargetOperand());
394  SrcSel->setImm(getSrcSel());
395  SrcMods->setImm(getSrcMods(TII, Src));
396  getTargetOperand()->setIsKill(false);
397  return true;
398 }
399 
400 MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII) {
401  // For SDWA dst operand potential instruction is one that defines register
402  // that this operand uses
403  MachineRegisterInfo *MRI = getMRI();
404  MachineInstr *ParentMI = getParentInst();
405 
406  MachineOperand *PotentialMO = findSingleRegDef(getReplacedOperand(), MRI);
407  if (!PotentialMO)
408  return nullptr;
409 
410  // Check that ParentMI is the only instruction that uses replaced register
411  for (MachineInstr &UseInst : MRI->use_nodbg_instructions(PotentialMO->getReg())) {
412  if (&UseInst != ParentMI)
413  return nullptr;
414  }
415 
416  return PotentialMO->getParent();
417 }
418 
419 bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
420  // Replace vdst operand in MI with target operand. Set dst_sel and dst_unused
421 
422  if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
423  MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
424  getDstSel() != AMDGPU::SDWA::DWORD) {
425  // v_mac_f16/32_sdwa allow dst_sel to be equal only to DWORD
426  return false;
427  }
428 
429  MachineOperand *Operand = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
430  assert(Operand &&
431  Operand->isReg() &&
432  isSameReg(*Operand, *getReplacedOperand()));
433  copyRegOperand(*Operand, *getTargetOperand());
434  MachineOperand *DstSel= TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel);
435  assert(DstSel);
436  DstSel->setImm(getDstSel());
437  MachineOperand *DstUnused= TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
438  assert(DstUnused);
439  DstUnused->setImm(getDstUnused());
440 
441  // Remove original instruction because it would conflict with our new
442  // instruction by register definition
443  getParentInst()->eraseFromParent();
444  return true;
445 }
446 
447 bool SDWADstPreserveOperand::convertToSDWA(MachineInstr &MI,
448  const SIInstrInfo *TII) {
449  // MI should be moved right before v_or_b32.
450  // For this we should clear all kill flags on uses of MI src-operands or else
451  // we can encounter problem with use of killed operand.
452  for (MachineOperand &MO : MI.uses()) {
453  if (!MO.isReg())
454  continue;
455  getMRI()->clearKillFlags(MO.getReg());
456  }
457 
458  // Move MI before v_or_b32
459  auto MBB = MI.getParent();
460  MBB->remove(&MI);
461  MBB->insert(getParentInst(), &MI);
462 
463  // Add Implicit use of preserved register
464  MachineInstrBuilder MIB(*MBB->getParent(), MI);
465  MIB.addReg(getPreservedOperand()->getReg(),
467  getPreservedOperand()->getSubReg());
468 
469  // Tie dst to implicit use
470  MI.tieOperands(AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst),
471  MI.getNumOperands() - 1);
472 
473  // Convert MI as any other SDWADstOperand and remove v_or_b32
474  return SDWADstOperand::convertToSDWA(MI, TII);
475 }
476 
477 Optional<int64_t> SIPeepholeSDWA::foldToImm(const MachineOperand &Op) const {
478  if (Op.isImm()) {
479  return Op.getImm();
480  }
481 
482  // If this is not immediate then it can be copy of immediate value, e.g.:
483  // %1 = S_MOV_B32 255;
484  if (Op.isReg()) {
485  for (const MachineOperand &Def : MRI->def_operands(Op.getReg())) {
486  if (!isSameReg(Op, Def))
487  continue;
488 
489  const MachineInstr *DefInst = Def.getParent();
490  if (!TII->isFoldableCopy(*DefInst))
491  return None;
492 
493  const MachineOperand &Copied = DefInst->getOperand(1);
494  if (!Copied.isImm())
495  return None;
496 
497  return Copied.getImm();
498  }
499  }
500 
501  return None;
502 }
503 
504 std::unique_ptr<SDWAOperand>
505 SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
506  unsigned Opcode = MI.getOpcode();
507  switch (Opcode) {
508  case AMDGPU::V_LSHRREV_B32_e32:
509  case AMDGPU::V_ASHRREV_I32_e32:
510  case AMDGPU::V_LSHLREV_B32_e32:
511  case AMDGPU::V_LSHRREV_B32_e64:
512  case AMDGPU::V_ASHRREV_I32_e64:
513  case AMDGPU::V_LSHLREV_B32_e64: {
514  // from: v_lshrrev_b32_e32 v1, 16/24, v0
515  // to SDWA src:v0 src_sel:WORD_1/BYTE_3
516 
517  // from: v_ashrrev_i32_e32 v1, 16/24, v0
518  // to SDWA src:v0 src_sel:WORD_1/BYTE_3 sext:1
519 
520  // from: v_lshlrev_b32_e32 v1, 16/24, v0
521  // to SDWA dst:v1 dst_sel:WORD_1/BYTE_3 dst_unused:UNUSED_PAD
522  MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
523  auto Imm = foldToImm(*Src0);
524  if (!Imm)
525  break;
526 
527  if (*Imm != 16 && *Imm != 24)
528  break;
529 
530  MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
531  MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
532  if (TRI->isPhysicalRegister(Src1->getReg()) ||
533  TRI->isPhysicalRegister(Dst->getReg()))
534  break;
535 
536  if (Opcode == AMDGPU::V_LSHLREV_B32_e32 ||
537  Opcode == AMDGPU::V_LSHLREV_B32_e64) {
538  return make_unique<SDWADstOperand>(
539  Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD);
540  } else {
541  return make_unique<SDWASrcOperand>(
542  Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false,
543  Opcode != AMDGPU::V_LSHRREV_B32_e32 &&
544  Opcode != AMDGPU::V_LSHRREV_B32_e64);
545  }
546  break;
547  }
548 
549  case AMDGPU::V_LSHRREV_B16_e32:
550  case AMDGPU::V_ASHRREV_I16_e32:
551  case AMDGPU::V_LSHLREV_B16_e32:
552  case AMDGPU::V_LSHRREV_B16_e64:
553  case AMDGPU::V_ASHRREV_I16_e64:
554  case AMDGPU::V_LSHLREV_B16_e64: {
555  // from: v_lshrrev_b16_e32 v1, 8, v0
556  // to SDWA src:v0 src_sel:BYTE_1
557 
558  // from: v_ashrrev_i16_e32 v1, 8, v0
559  // to SDWA src:v0 src_sel:BYTE_1 sext:1
560 
561  // from: v_lshlrev_b16_e32 v1, 8, v0
562  // to SDWA dst:v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD
563  MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
564  auto Imm = foldToImm(*Src0);
565  if (!Imm || *Imm != 8)
566  break;
567 
568  MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
569  MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
570 
571  if (TRI->isPhysicalRegister(Src1->getReg()) ||
572  TRI->isPhysicalRegister(Dst->getReg()))
573  break;
574 
575  if (Opcode == AMDGPU::V_LSHLREV_B16_e32 ||
576  Opcode == AMDGPU::V_LSHLREV_B16_e64) {
577  return make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD);
578  } else {
579  return make_unique<SDWASrcOperand>(
580  Src1, Dst, BYTE_1, false, false,
581  Opcode != AMDGPU::V_LSHRREV_B16_e32 &&
582  Opcode != AMDGPU::V_LSHRREV_B16_e64);
583  }
584  break;
585  }
586 
587  case AMDGPU::V_BFE_I32:
588  case AMDGPU::V_BFE_U32: {
589  // e.g.:
590  // from: v_bfe_u32 v1, v0, 8, 8
591  // to SDWA src:v0 src_sel:BYTE_1
592 
593  // offset | width | src_sel
594  // ------------------------
595  // 0 | 8 | BYTE_0
596  // 0 | 16 | WORD_0
597  // 0 | 32 | DWORD ?
598  // 8 | 8 | BYTE_1
599  // 16 | 8 | BYTE_2
600  // 16 | 16 | WORD_1
601  // 24 | 8 | BYTE_3
602 
603  MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
604  auto Offset = foldToImm(*Src1);
605  if (!Offset)
606  break;
607 
608  MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
609  auto Width = foldToImm(*Src2);
610  if (!Width)
611  break;
612 
613  SdwaSel SrcSel = DWORD;
614 
615  if (*Offset == 0 && *Width == 8)
616  SrcSel = BYTE_0;
617  else if (*Offset == 0 && *Width == 16)
618  SrcSel = WORD_0;
619  else if (*Offset == 0 && *Width == 32)
620  SrcSel = DWORD;
621  else if (*Offset == 8 && *Width == 8)
622  SrcSel = BYTE_1;
623  else if (*Offset == 16 && *Width == 8)
624  SrcSel = BYTE_2;
625  else if (*Offset == 16 && *Width == 16)
626  SrcSel = WORD_1;
627  else if (*Offset == 24 && *Width == 8)
628  SrcSel = BYTE_3;
629  else
630  break;
631 
632  MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
633  MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
634 
635  if (TRI->isPhysicalRegister(Src0->getReg()) ||
636  TRI->isPhysicalRegister(Dst->getReg()))
637  break;
638 
639  return make_unique<SDWASrcOperand>(
640  Src0, Dst, SrcSel, false, false, Opcode != AMDGPU::V_BFE_U32);
641  }
642 
643  case AMDGPU::V_AND_B32_e32:
644  case AMDGPU::V_AND_B32_e64: {
645  // e.g.:
646  // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0
647  // to SDWA src:v0 src_sel:WORD_0/BYTE_0
648 
649  MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
650  MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
651  auto ValSrc = Src1;
652  auto Imm = foldToImm(*Src0);
653 
654  if (!Imm) {
655  Imm = foldToImm(*Src1);
656  ValSrc = Src0;
657  }
658 
659  if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff))
660  break;
661 
662  MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
663 
664  if (TRI->isPhysicalRegister(Src1->getReg()) ||
665  TRI->isPhysicalRegister(Dst->getReg()))
666  break;
667 
668  return make_unique<SDWASrcOperand>(
669  ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);
670  }
671 
672  case AMDGPU::V_OR_B32_e32:
673  case AMDGPU::V_OR_B32_e64: {
674  // Patterns for dst_unused:UNUSED_PRESERVE.
675  // e.g., from:
676  // v_add_f16_sdwa v0, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD
677  // src1_sel:WORD_1 src2_sel:WORD1
678  // v_add_f16_e32 v3, v1, v2
679  // v_or_b32_e32 v4, v0, v3
680  // to SDWA preserve dst:v4 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE preserve:v3
681 
682  // Check if one of operands of v_or_b32 is SDWA instruction
684  auto CheckOROperandsForSDWA =
685  [&](const MachineOperand *Op1, const MachineOperand *Op2) -> CheckRetType {
686  if (!Op1 || !Op1->isReg() || !Op2 || !Op2->isReg())
687  return CheckRetType(None);
688 
689  MachineOperand *Op1Def = findSingleRegDef(Op1, MRI);
690  if (!Op1Def)
691  return CheckRetType(None);
692 
693  MachineInstr *Op1Inst = Op1Def->getParent();
694  if (!TII->isSDWA(*Op1Inst))
695  return CheckRetType(None);
696 
697  MachineOperand *Op2Def = findSingleRegDef(Op2, MRI);
698  if (!Op2Def)
699  return CheckRetType(None);
700 
701  return CheckRetType(std::make_pair(Op1Def, Op2Def));
702  };
703 
704  MachineOperand *OrSDWA = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
705  MachineOperand *OrOther = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
706  assert(OrSDWA && OrOther);
707  auto Res = CheckOROperandsForSDWA(OrSDWA, OrOther);
708  if (!Res) {
709  OrSDWA = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
710  OrOther = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
711  assert(OrSDWA && OrOther);
712  Res = CheckOROperandsForSDWA(OrSDWA, OrOther);
713  if (!Res)
714  break;
715  }
716 
717  MachineOperand *OrSDWADef = Res->first;
718  MachineOperand *OrOtherDef = Res->second;
719  assert(OrSDWADef && OrOtherDef);
720 
721  MachineInstr *SDWAInst = OrSDWADef->getParent();
722  MachineInstr *OtherInst = OrOtherDef->getParent();
723 
724  // Check that OtherInstr is actually bitwise compatible with SDWAInst = their
725  // destination patterns don't overlap. Compatible instruction can be either
726  // regular instruction with compatible bitness or SDWA instruction with
727  // correct dst_sel
728  // SDWAInst | OtherInst bitness / OtherInst dst_sel
729  // -----------------------------------------------------
730  // DWORD | no / no
731  // WORD_0 | no / BYTE_2/3, WORD_1
732  // WORD_1 | 8/16-bit instructions / BYTE_0/1, WORD_0
733  // BYTE_0 | no / BYTE_1/2/3, WORD_1
734  // BYTE_1 | 8-bit / BYTE_0/2/3, WORD_1
735  // BYTE_2 | 8/16-bit / BYTE_0/1/3. WORD_0
736  // BYTE_3 | 8/16/24-bit / BYTE_0/1/2, WORD_0
737  // E.g. if SDWAInst is v_add_f16_sdwa dst_sel:WORD_1 then v_add_f16 is OK
738  // but v_add_f32 is not.
739 
740  // TODO: add support for non-SDWA instructions as OtherInst.
741  // For now this only works with SDWA instructions. For regular instructions
742  // there is no way to determine if instruction write only 8/16/24-bit out of
743  // full register size and all registers are at min 32-bit wide.
744  if (!TII->isSDWA(*OtherInst))
745  break;
746 
747  SdwaSel DstSel = static_cast<SdwaSel>(
748  TII->getNamedImmOperand(*SDWAInst, AMDGPU::OpName::dst_sel));;
749  SdwaSel OtherDstSel = static_cast<SdwaSel>(
750  TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_sel));
751 
752  bool DstSelAgree = false;
753  switch (DstSel) {
754  case WORD_0: DstSelAgree = ((OtherDstSel == BYTE_2) ||
755  (OtherDstSel == BYTE_3) ||
756  (OtherDstSel == WORD_1));
757  break;
758  case WORD_1: DstSelAgree = ((OtherDstSel == BYTE_0) ||
759  (OtherDstSel == BYTE_1) ||
760  (OtherDstSel == WORD_0));
761  break;
762  case BYTE_0: DstSelAgree = ((OtherDstSel == BYTE_1) ||
763  (OtherDstSel == BYTE_2) ||
764  (OtherDstSel == BYTE_3) ||
765  (OtherDstSel == WORD_1));
766  break;
767  case BYTE_1: DstSelAgree = ((OtherDstSel == BYTE_0) ||
768  (OtherDstSel == BYTE_2) ||
769  (OtherDstSel == BYTE_3) ||
770  (OtherDstSel == WORD_1));
771  break;
772  case BYTE_2: DstSelAgree = ((OtherDstSel == BYTE_0) ||
773  (OtherDstSel == BYTE_1) ||
774  (OtherDstSel == BYTE_3) ||
775  (OtherDstSel == WORD_0));
776  break;
777  case BYTE_3: DstSelAgree = ((OtherDstSel == BYTE_0) ||
778  (OtherDstSel == BYTE_1) ||
779  (OtherDstSel == BYTE_2) ||
780  (OtherDstSel == WORD_0));
781  break;
782  default: DstSelAgree = false;
783  }
784 
785  if (!DstSelAgree)
786  break;
787 
788  // Also OtherInst dst_unused should be UNUSED_PAD
789  DstUnused OtherDstUnused = static_cast<DstUnused>(
790  TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_unused));
791  if (OtherDstUnused != DstUnused::UNUSED_PAD)
792  break;
793 
794  // Create DstPreserveOperand
795  MachineOperand *OrDst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
796  assert(OrDst && OrDst->isReg());
797 
798  return make_unique<SDWADstPreserveOperand>(
799  OrDst, OrSDWADef, OrOtherDef, DstSel);
800 
801  }
802  }
803 
804  return std::unique_ptr<SDWAOperand>(nullptr);
805 }
806 
807 void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) {
808  for (MachineInstr &MI : MBB) {
809  if (auto Operand = matchSDWAOperand(MI)) {
810  DEBUG(dbgs() << "Match: " << MI << "To: " << *Operand << '\n');
811  SDWAOperands[&MI] = std::move(Operand);
812  ++NumSDWAPatternsFound;
813  }
814  }
815 }
816 
817 bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI,
818  const SISubtarget &ST) const {
819  // Check if this is already an SDWA instruction
820  unsigned Opc = MI.getOpcode();
821  if (TII->isSDWA(Opc))
822  return true;
823 
824  // Check if this instruction has opcode that supports SDWA
825  if (AMDGPU::getSDWAOp(Opc) == -1)
826  Opc = AMDGPU::getVOPe32(Opc);
827 
828  if (AMDGPU::getSDWAOp(Opc) == -1)
829  return false;
830 
831  if (!ST.hasSDWAOmod() && TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
832  return false;
833 
834  if (TII->isVOPC(Opc)) {
835  if (!ST.hasSDWASdst()) {
836  const MachineOperand *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
837  if (SDst && SDst->getReg() != AMDGPU::VCC)
838  return false;
839  }
840 
841  if (!ST.hasSDWAOutModsVOPC() &&
842  (TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) ||
843  TII->hasModifiersSet(MI, AMDGPU::OpName::omod)))
844  return false;
845 
846  } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst) ||
847  !TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) {
848  return false;
849  }
850 
851  if (!ST.hasSDWAMac() && (Opc == AMDGPU::V_MAC_F16_e32 ||
852  Opc == AMDGPU::V_MAC_F32_e32))
853  return false;
854 
855  return true;
856 }
857 
858 bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
859  const SDWAOperandsVector &SDWAOperands) {
860  // Convert to sdwa
861  int SDWAOpcode;
862  unsigned Opcode = MI.getOpcode();
863  if (TII->isSDWA(Opcode)) {
864  SDWAOpcode = Opcode;
865  } else {
866  SDWAOpcode = AMDGPU::getSDWAOp(Opcode);
867  if (SDWAOpcode == -1)
868  SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(Opcode));
869  }
870  assert(SDWAOpcode != -1);
871 
872  const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode);
873 
874  // Create SDWA version of instruction MI and initialize its operands
875  MachineInstrBuilder SDWAInst =
876  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc);
877 
878  // Copy dst, if it is present in original then should also be present in SDWA
879  MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
880  if (Dst) {
881  assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1);
882  SDWAInst.add(*Dst);
883  } else if ((Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst))) {
884  assert(Dst &&
885  AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
886  SDWAInst.add(*Dst);
887  } else {
888  assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
889  SDWAInst.addReg(AMDGPU::VCC, RegState::Define);
890  }
891 
892  // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
893  // src0_modifiers (except for v_nop_sdwa, but it can't get here)
894  MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
895  assert(
896  Src0 &&
897  AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0) != -1 &&
898  AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1);
899  if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers))
900  SDWAInst.addImm(Mod->getImm());
901  else
902  SDWAInst.addImm(0);
903  SDWAInst.add(*Src0);
904 
905  // Copy src1 if present, initialize src1_modifiers.
906  MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
907  if (Src1) {
908  assert(
909  AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1) != -1 &&
910  AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1);
911  if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers))
912  SDWAInst.addImm(Mod->getImm());
913  else
914  SDWAInst.addImm(0);
915  SDWAInst.add(*Src1);
916  }
917 
918  if (SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||
919  SDWAOpcode == AMDGPU::V_MAC_F32_sdwa) {
920  // v_mac_f16/32 has additional src2 operand tied to vdst
921  MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
922  assert(Src2);
923  SDWAInst.add(*Src2);
924  }
925 
926  // Copy clamp if present, initialize otherwise
927  assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::clamp) != -1);
928  MachineOperand *Clamp = TII->getNamedOperand(MI, AMDGPU::OpName::clamp);
929  if (Clamp) {
930  SDWAInst.add(*Clamp);
931  } else {
932  SDWAInst.addImm(0);
933  }
934 
935  // Copy omod if present, initialize otherwise if needed
936  if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) {
937  MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);
938  if (OMod) {
939  SDWAInst.add(*OMod);
940  } else {
941  SDWAInst.addImm(0);
942  }
943  }
944 
945  // Copy dst_sel if present, initialize otherwise if needed
946  if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1) {
947  MachineOperand *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel);
948  if (DstSel) {
949  SDWAInst.add(*DstSel);
950  } else {
952  }
953  }
954 
955  // Copy dst_unused if present, initialize otherwise if needed
956  if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1) {
957  MachineOperand *DstUnused = TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
958  if (DstUnused) {
959  SDWAInst.add(*DstUnused);
960  } else {
962  }
963  }
964 
965  // Copy src0_sel if present, initialize otherwise
966  assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_sel) != -1);
967  MachineOperand *Src0Sel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel);
968  if (Src0Sel) {
969  SDWAInst.add(*Src0Sel);
970  } else {
972  }
973 
974  // Copy src1_sel if present, initialize otherwise if needed
975  if (Src1) {
976  assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_sel) != -1);
977  MachineOperand *Src1Sel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel);
978  if (Src1Sel) {
979  SDWAInst.add(*Src1Sel);
980  } else {
982  }
983  }
984 
985  // Apply all sdwa operand patterns.
986  bool Converted = false;
987  for (auto &Operand : SDWAOperands) {
988  // There should be no intesection between SDWA operands and potential MIs
989  // e.g.:
990  // v_and_b32 v0, 0xff, v1 -> src:v1 sel:BYTE_0
991  // v_and_b32 v2, 0xff, v0 -> src:v0 sel:BYTE_0
992  // v_add_u32 v3, v4, v2
993  //
994  // In that example it is possible that we would fold 2nd instruction into 3rd
995  // (v_add_u32_sdwa) and then try to fold 1st instruction into 2nd (that was
996  // already destroyed). So if SDWAOperand is also a potential MI then do not
997  // apply it.
998  if (PotentialMatches.count(Operand->getParentInst()) == 0)
999  Converted |= Operand->convertToSDWA(*SDWAInst, TII);
1000  }
1001  if (Converted) {
1002  ConvertedInstructions.push_back(SDWAInst);
1003  } else {
1004  SDWAInst->eraseFromParent();
1005  return false;
1006  }
1007 
1008  DEBUG(dbgs() << "Convert instruction:" << MI
1009  << "Into:" << *SDWAInst << '\n');
1010  ++NumSDWAInstructionsPeepholed;
1011 
1012  MI.eraseFromParent();
1013  return true;
1014 }
1015 
1016 // If an instruction was converted to SDWA it should not have immediates or SGPR
1017 // operands (allowed one SGPR on GFX9). Copy its scalar operands into VGPRs.
1018 void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI,
1019  const SISubtarget &ST) const {
1020  const MCInstrDesc &Desc = TII->get(MI.getOpcode());
1021  unsigned ConstantBusCount = 0;
1022  for (MachineOperand &Op : MI.explicit_uses()) {
1023  if (!Op.isImm() && !(Op.isReg() && !TRI->isVGPR(*MRI, Op.getReg())))
1024  continue;
1025 
1026  unsigned I = MI.getOperandNo(&Op);
1027  if (Desc.OpInfo[I].RegClass == -1 ||
1028  !TRI->hasVGPRs(TRI->getRegClass(Desc.OpInfo[I].RegClass)))
1029  continue;
1030 
1031  if (ST.hasSDWAScalar() && ConstantBusCount == 0 && Op.isReg() &&
1032  TRI->isSGPRReg(*MRI, Op.getReg())) {
1033  ++ConstantBusCount;
1034  continue;
1035  }
1036 
1037  unsigned VGPR = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1038  auto Copy = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
1039  TII->get(AMDGPU::V_MOV_B32_e32), VGPR);
1040  if (Op.isImm())
1041  Copy.addImm(Op.getImm());
1042  else if (Op.isReg())
1043  Copy.addReg(Op.getReg(), Op.isKill() ? RegState::Kill : 0,
1044  Op.getSubReg());
1045  Op.ChangeToRegister(VGPR, false);
1046  }
1047 }
1048 
1049 bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
1050  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
1051 
1052  if (!ST.hasSDWA() || skipFunction(MF.getFunction()))
1053  return false;
1054 
1055  MRI = &MF.getRegInfo();
1056  TRI = ST.getRegisterInfo();
1057  TII = ST.getInstrInfo();
1058 
1059  // Find all SDWA operands in MF.
1060  bool Ret = false;
1061  for (MachineBasicBlock &MBB : MF) {
1062  bool Changed = false;
1063  do {
1064  matchSDWAOperands(MBB);
1065 
1066  for (const auto &OperandPair : SDWAOperands) {
1067  const auto &Operand = OperandPair.second;
1068  MachineInstr *PotentialMI = Operand->potentialToConvert(TII);
1069  if (PotentialMI && isConvertibleToSDWA(*PotentialMI, ST)) {
1070  PotentialMatches[PotentialMI].push_back(Operand.get());
1071  }
1072  }
1073 
1074  for (auto &PotentialPair : PotentialMatches) {
1075  MachineInstr &PotentialMI = *PotentialPair.first;
1076  convertToSDWA(PotentialMI, PotentialPair.second);
1077  }
1078 
1079  PotentialMatches.clear();
1080  SDWAOperands.clear();
1081 
1082  Changed = !ConvertedInstructions.empty();
1083 
1084  if (Changed)
1085  Ret = true;
1086  while (!ConvertedInstructions.empty())
1087  legalizeScalarOperands(*ConvertedInstructions.pop_back_val(), ST);
1088  } while (Changed);
1089  }
1090 
1091  return Ret;
1092 }
const MachineInstrBuilder & add(const MachineOperand &MO) const
Interface definition for SIRegisterInfo.
A common definition of LaneBitmask for use in TableGen and CodeGen.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AMDGPU specific subclass of TargetSubtarget.
FunctionPass * createSIPeepholeSDWAPass()
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:449
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:368
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
char & SIPeepholeSDWAID
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:271
bool hasModifiersSet(const MachineInstr &MI, unsigned OpName) const
iterator_range< use_nodbg_iterator > use_nodbg_operands(unsigned Reg) const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:387
void setIsUndef(bool Val=true)
const SIInstrInfo * getInstrInfo() const override
unsigned getSubReg() const
static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS)
int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const
Get required immediate operand.
Definition: SIInstrInfo.h:822
static MachineOperand * findSingleRegDef(const MachineOperand *Reg, const MachineRegisterInfo *MRI)
STATISTIC(NumFunctions, "Total number of functions")
void setIsDead(bool Val=true)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
#define DEBUG_TYPE
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:296
static MachineOperand * findSingleRegUse(const MachineOperand *Reg, const MachineRegisterInfo *MRI)
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
Reg
All possible values of the reg field in the ModR/M byte.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:293
bool hasSDWAOutModsVOPC() const
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
iterator_range< def_iterator > def_operands(unsigned Reg) const
LLVM_READONLY int getSDWAOp(uint16_t Opcode)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
unsigned const MachineRegisterInfo * MRI
bool isFoldableCopy(const MachineInstr &MI) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static void copyRegOperand(MachineOperand &To, const MachineOperand &From)
bool hasVGPRs(const TargetRegisterClass *RC) const
void initializeSIPeepholeSDWAPass(PassRegistry &)
Represent the analysis usage information of a pass.
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:357
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
self_iterator getIterator()
Definition: ilist_node.h:82
iterator_range< mop_iterator > explicit_uses()
Definition: MachineInstr.h:377
void setIsKill(bool Val=true)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:34
LLVM_READONLY int getVOPe32(uint16_t Opcode)
const SIRegisterInfo * getRegisterInfo() const override
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:383
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:285
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
int64_t getImm() const
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Target - Wrapper for Target specific information.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
The access may modify the value stored in memory.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:142
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:60
Interface definition for SIInstrInfo.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
static bool isVOPC(const MachineInstr &MI)
Definition: SIInstrInfo.h:390
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:76
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:58
void setSubReg(unsigned subReg)
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:2018
bool isReg() const
isReg - Tests if this is a MO_Register operand.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSDWA(const MachineInstr &MI)
Definition: SIInstrInfo.h:382
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:174
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
#define DEBUG(X)
Definition: Debug.h:118
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
bool hasSDWAScalar() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:298
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(unsigned Reg) const
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.