LLVM  9.0.0svn
SIShrinkInstructions.cpp
Go to the documentation of this file.
1 //===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 /// The pass tries to use the 32-bit encoding for instructions when possible.
8 //===----------------------------------------------------------------------===//
9 //
10 
11 #include "AMDGPU.h"
12 #include "AMDGPUSubtarget.h"
13 #include "SIInstrInfo.h"
15 #include "llvm/ADT/Statistic.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/LLVMContext.h"
22 #include "llvm/Support/Debug.h"
25 
26 #define DEBUG_TYPE "si-shrink-instructions"
27 
28 STATISTIC(NumInstructionsShrunk,
29  "Number of 64-bit instruction reduced to 32-bit.");
30 STATISTIC(NumLiteralConstantsFolded,
31  "Number of literal constants folded into 32-bit instructions.");
32 
33 using namespace llvm;
34 
35 namespace {
36 
37 class SIShrinkInstructions : public MachineFunctionPass {
38 public:
39  static char ID;
40 
41  void shrinkMIMG(MachineInstr &MI);
42 
43 public:
44  SIShrinkInstructions() : MachineFunctionPass(ID) {
45  }
46 
47  bool runOnMachineFunction(MachineFunction &MF) override;
48 
49  StringRef getPassName() const override { return "SI Shrink Instructions"; }
50 
51  void getAnalysisUsage(AnalysisUsage &AU) const override {
52  AU.setPreservesCFG();
54  }
55 };
56 
57 } // End anonymous namespace.
58 
59 INITIALIZE_PASS(SIShrinkInstructions, DEBUG_TYPE,
60  "SI Shrink Instructions", false, false)
61 
62 char SIShrinkInstructions::ID = 0;
63 
65  return new SIShrinkInstructions();
66 }
67 
68 /// This function checks \p MI for operands defined by a move immediate
69 /// instruction and then folds the literal constant into the instruction if it
70 /// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions.
72  MachineRegisterInfo &MRI, bool TryToCommute = true) {
73  assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
74 
75  int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
76 
77  // Try to fold Src0
78  MachineOperand &Src0 = MI.getOperand(Src0Idx);
79  if (Src0.isReg()) {
80  unsigned Reg = Src0.getReg();
81  if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI.hasOneUse(Reg)) {
82  MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
83  if (Def && Def->isMoveImmediate()) {
84  MachineOperand &MovSrc = Def->getOperand(1);
85  bool ConstantFolded = false;
86 
87  if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) ||
88  isUInt<32>(MovSrc.getImm()))) {
89  // It's possible to have only one component of a super-reg defined by
90  // a single mov, so we need to clear any subregister flag.
91  Src0.setSubReg(0);
92  Src0.ChangeToImmediate(MovSrc.getImm());
93  ConstantFolded = true;
94  } else if (MovSrc.isFI()) {
95  Src0.setSubReg(0);
96  Src0.ChangeToFrameIndex(MovSrc.getIndex());
97  ConstantFolded = true;
98  }
99 
100  if (ConstantFolded) {
101  assert(MRI.use_empty(Reg));
102  Def->eraseFromParent();
103  ++NumLiteralConstantsFolded;
104  return true;
105  }
106  }
107  }
108  }
109 
110  // We have failed to fold src0, so commute the instruction and try again.
111  if (TryToCommute && MI.isCommutable()) {
112  if (TII->commuteInstruction(MI)) {
113  if (foldImmediates(MI, TII, MRI, false))
114  return true;
115 
116  // Commute back.
117  TII->commuteInstruction(MI);
118  }
119  }
120 
121  return false;
122 }
123 
124 static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
125  return isInt<16>(Src.getImm()) &&
126  !TII->isInlineConstant(*Src.getParent(),
127  Src.getParent()->getOperandNo(&Src));
128 }
129 
130 static bool isKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
131  return isUInt<16>(Src.getImm()) &&
132  !TII->isInlineConstant(*Src.getParent(),
133  Src.getParent()->getOperandNo(&Src));
134 }
135 
137  const MachineOperand &Src,
138  bool &IsUnsigned) {
139  if (isInt<16>(Src.getImm())) {
140  IsUnsigned = false;
141  return !TII->isInlineConstant(Src);
142  }
143 
144  if (isUInt<16>(Src.getImm())) {
145  IsUnsigned = true;
146  return !TII->isInlineConstant(Src);
147  }
148 
149  return false;
150 }
151 
152 /// \returns true if the constant in \p Src should be replaced with a bitreverse
153 /// of an inline immediate.
154 static bool isReverseInlineImm(const SIInstrInfo *TII,
155  const MachineOperand &Src,
156  int32_t &ReverseImm) {
157  if (!isInt<32>(Src.getImm()) || TII->isInlineConstant(Src))
158  return false;
159 
160  ReverseImm = reverseBits<int32_t>(static_cast<int32_t>(Src.getImm()));
161  return ReverseImm >= -16 && ReverseImm <= 64;
162 }
163 
164 /// Copy implicit register operands from specified instruction to this
165 /// instruction that are not part of the instruction definition.
167  const MachineInstr &MI) {
168  for (unsigned i = MI.getDesc().getNumOperands() +
169  MI.getDesc().getNumImplicitUses() +
170  MI.getDesc().getNumImplicitDefs(), e = MI.getNumOperands();
171  i != e; ++i) {
172  const MachineOperand &MO = MI.getOperand(i);
173  if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask())
174  NewMI.addOperand(MF, MO);
175  }
176 }
177 
179  // cmpk instructions do scc = dst <cc op> imm16, so commute the instruction to
180  // get constants on the RHS.
181  if (!MI.getOperand(0).isReg())
182  TII->commuteInstruction(MI, false, 0, 1);
183 
184  const MachineOperand &Src1 = MI.getOperand(1);
185  if (!Src1.isImm())
186  return;
187 
188  int SOPKOpc = AMDGPU::getSOPKOp(MI.getOpcode());
189  if (SOPKOpc == -1)
190  return;
191 
192  // eq/ne is special because the imm16 can be treated as signed or unsigned,
193  // and initially selectd to the unsigned versions.
194  if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) {
195  bool HasUImm;
196  if (isKImmOrKUImmOperand(TII, Src1, HasUImm)) {
197  if (!HasUImm) {
198  SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ?
199  AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32;
200  }
201 
202  MI.setDesc(TII->get(SOPKOpc));
203  }
204 
205  return;
206  }
207 
208  const MCInstrDesc &NewDesc = TII->get(SOPKOpc);
209 
210  if ((TII->sopkIsZext(SOPKOpc) && isKUImmOperand(TII, Src1)) ||
211  (!TII->sopkIsZext(SOPKOpc) && isKImmOperand(TII, Src1))) {
212  MI.setDesc(NewDesc);
213  }
214 }
215 
216 // Shrink NSA encoded instructions with contiguous VGPRs to non-NSA encoding.
217 void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) {
218  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
219  if (Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA)
220  return;
221 
222  MachineFunction *MF = MI.getParent()->getParent();
223  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
224  const SIInstrInfo *TII = ST.getInstrInfo();
225  const SIRegisterInfo &TRI = TII->getRegisterInfo();
226  int VAddr0Idx =
227  AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
228  unsigned NewAddrDwords = Info->VAddrDwords;
229  const TargetRegisterClass *RC;
230 
231  if (Info->VAddrDwords == 2) {
232  RC = &AMDGPU::VReg_64RegClass;
233  } else if (Info->VAddrDwords == 3) {
234  RC = &AMDGPU::VReg_96RegClass;
235  } else if (Info->VAddrDwords == 4) {
236  RC = &AMDGPU::VReg_128RegClass;
237  } else if (Info->VAddrDwords <= 8) {
238  RC = &AMDGPU::VReg_256RegClass;
239  NewAddrDwords = 8;
240  } else {
241  RC = &AMDGPU::VReg_512RegClass;
242  NewAddrDwords = 16;
243  }
244 
245  unsigned VgprBase = 0;
246  bool IsUndef = true;
247  bool IsKill = NewAddrDwords == Info->VAddrDwords;
248  for (unsigned i = 0; i < Info->VAddrDwords; ++i) {
249  const MachineOperand &Op = MI.getOperand(VAddr0Idx + i);
250  unsigned Vgpr = TRI.getHWRegIndex(Op.getReg());
251 
252  if (i == 0) {
253  VgprBase = Vgpr;
254  } else if (VgprBase + i != Vgpr)
255  return;
256 
257  if (!Op.isUndef())
258  IsUndef = false;
259  if (!Op.isKill())
260  IsKill = false;
261  }
262 
263  if (VgprBase + NewAddrDwords > 256)
264  return;
265 
266  // Further check for implicit tied operands - this may be present if TFE is
267  // enabled
268  int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
269  int LWEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::lwe);
270  unsigned TFEVal = MI.getOperand(TFEIdx).getImm();
271  unsigned LWEVal = MI.getOperand(LWEIdx).getImm();
272  int ToUntie = -1;
273  if (TFEVal || LWEVal) {
274  // TFE/LWE is enabled so we need to deal with an implicit tied operand
275  for (unsigned i = LWEIdx + 1, e = MI.getNumOperands(); i != e; ++i) {
276  if (MI.getOperand(i).isReg() && MI.getOperand(i).isTied() &&
277  MI.getOperand(i).isImplicit()) {
278  // This is the tied operand
279  assert(
280  ToUntie == -1 &&
281  "found more than one tied implicit operand when expecting only 1");
282  ToUntie = i;
283  MI.untieRegOperand(ToUntie);
284  }
285  }
286  }
287 
288  unsigned NewOpcode =
289  AMDGPU::getMIMGOpcode(Info->BaseOpcode, AMDGPU::MIMGEncGfx10Default,
290  Info->VDataDwords, NewAddrDwords);
291  MI.setDesc(TII->get(NewOpcode));
292  MI.getOperand(VAddr0Idx).setReg(RC->getRegister(VgprBase));
293  MI.getOperand(VAddr0Idx).setIsUndef(IsUndef);
294  MI.getOperand(VAddr0Idx).setIsKill(IsKill);
295 
296  for (unsigned i = 1; i < Info->VAddrDwords; ++i)
297  MI.RemoveOperand(VAddr0Idx + 1);
298 
299  if (ToUntie >= 0) {
300  MI.tieOperands(
301  AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata),
302  ToUntie - (Info->VAddrDwords - 1));
303  }
304 }
305 
306 /// Attempt to shink AND/OR/XOR operations requiring non-inlineable literals.
307 /// For AND or OR, try using S_BITSET{0,1} to clear or set bits.
308 /// If the inverse of the immediate is legal, use ANDN2, ORN2 or
309 /// XNOR (as a ^ b == ~(a ^ ~b)).
310 /// \returns true if the caller should continue the machine function iterator
313  const SIInstrInfo *TII,
314  MachineInstr &MI) {
315  unsigned Opc = MI.getOpcode();
316  const MachineOperand *Dest = &MI.getOperand(0);
317  MachineOperand *Src0 = &MI.getOperand(1);
318  MachineOperand *Src1 = &MI.getOperand(2);
319  MachineOperand *SrcReg = Src0;
320  MachineOperand *SrcImm = Src1;
321 
322  if (SrcImm->isImm() &&
324  uint32_t Imm = static_cast<uint32_t>(SrcImm->getImm());
325  uint32_t NewImm = 0;
326 
327  if (Opc == AMDGPU::S_AND_B32) {
328  if (isPowerOf2_32(~Imm)) {
329  NewImm = countTrailingOnes(Imm);
330  Opc = AMDGPU::S_BITSET0_B32;
331  } else if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
332  NewImm = ~Imm;
333  Opc = AMDGPU::S_ANDN2_B32;
334  }
335  } else if (Opc == AMDGPU::S_OR_B32) {
336  if (isPowerOf2_32(Imm)) {
337  NewImm = countTrailingZeros(Imm);
338  Opc = AMDGPU::S_BITSET1_B32;
339  } else if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
340  NewImm = ~Imm;
341  Opc = AMDGPU::S_ORN2_B32;
342  }
343  } else if (Opc == AMDGPU::S_XOR_B32) {
345  NewImm = ~Imm;
346  Opc = AMDGPU::S_XNOR_B32;
347  }
348  } else {
349  llvm_unreachable("unexpected opcode");
350  }
351 
352  if ((Opc == AMDGPU::S_ANDN2_B32 || Opc == AMDGPU::S_ORN2_B32) &&
353  SrcImm == Src0) {
354  if (!TII->commuteInstruction(MI, false, 1, 2))
355  NewImm = 0;
356  }
357 
358  if (NewImm != 0) {
360  SrcReg->isReg()) {
361  MRI.setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());
362  MRI.setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());
363  return true;
364  }
365 
366  if (SrcReg->isReg() && SrcReg->getReg() == Dest->getReg()) {
367  MI.setDesc(TII->get(Opc));
368  if (Opc == AMDGPU::S_BITSET0_B32 ||
369  Opc == AMDGPU::S_BITSET1_B32) {
370  Src0->ChangeToImmediate(NewImm);
371  // Remove the immediate and add the tied input.
372  MI.getOperand(2).ChangeToRegister(Dest->getReg(), false);
373  MI.tieOperands(0, 2);
374  } else {
375  SrcImm->setImm(NewImm);
376  }
377  }
378  }
379  }
380 
381  return false;
382 }
383 
384 // This is the same as MachineInstr::readsRegister/modifiesRegister except
385 // it takes subregs into account.
387  unsigned Reg, unsigned SubReg,
388  const SIRegisterInfo &TRI) {
389  for (const MachineOperand &MO : R) {
390  if (!MO.isReg())
391  continue;
392 
395  if (TRI.regsOverlap(Reg, MO.getReg()))
396  return true;
397  } else if (MO.getReg() == Reg &&
399  LaneBitmask Overlap = TRI.getSubRegIndexLaneMask(SubReg) &
400  TRI.getSubRegIndexLaneMask(MO.getSubReg());
401  if (Overlap.any())
402  return true;
403  }
404  }
405  return false;
406 }
407 
408 static bool instReadsReg(const MachineInstr *MI,
409  unsigned Reg, unsigned SubReg,
410  const SIRegisterInfo &TRI) {
411  return instAccessReg(MI->uses(), Reg, SubReg, TRI);
412 }
413 
414 static bool instModifiesReg(const MachineInstr *MI,
415  unsigned Reg, unsigned SubReg,
416  const SIRegisterInfo &TRI) {
417  return instAccessReg(MI->defs(), Reg, SubReg, TRI);
418 }
419 
421 getSubRegForIndex(unsigned Reg, unsigned Sub, unsigned I,
422  const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI) {
423  if (TRI.getRegSizeInBits(Reg, MRI) != 32) {
425  Reg = TRI.getSubReg(Reg, TRI.getSubRegFromChannel(I));
426  } else {
427  LaneBitmask LM = TRI.getSubRegIndexLaneMask(Sub);
429  }
430  }
431  return TargetInstrInfo::RegSubRegPair(Reg, Sub);
432 }
433 
434 // Match:
435 // mov t, x
436 // mov x, y
437 // mov y, t
438 //
439 // =>
440 //
441 // mov t, x (t is potentially dead and move eliminated)
442 // v_swap_b32 x, y
443 //
444 // Returns next valid instruction pointer if was able to create v_swap_b32.
445 //
446 // This shall not be done too early not to prevent possible folding which may
447 // remove matched moves, and this should prefereably be done before RA to
448 // release saved registers and also possibly after RA which can insert copies
449 // too.
450 //
451 // This is really just a generic peephole that is not a canocical shrinking,
452 // although requirements match the pass placement and it reduces code size too.
454  const SIInstrInfo *TII) {
455  assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
456  MovT.getOpcode() == AMDGPU::COPY);
457 
458  unsigned T = MovT.getOperand(0).getReg();
459  unsigned Tsub = MovT.getOperand(0).getSubReg();
460  MachineOperand &Xop = MovT.getOperand(1);
461 
462  if (!Xop.isReg())
463  return nullptr;
464  unsigned X = Xop.getReg();
465  unsigned Xsub = Xop.getSubReg();
466 
467  unsigned Size = TII->getOpSize(MovT, 0) / 4;
468 
469  const SIRegisterInfo &TRI = TII->getRegisterInfo();
470  if (!TRI.isVGPR(MRI, X))
471  return nullptr;
472 
473  for (MachineOperand &YTop : MRI.use_nodbg_operands(T)) {
474  if (YTop.getSubReg() != Tsub)
475  continue;
476 
477  MachineInstr &MovY = *YTop.getParent();
478  if ((MovY.getOpcode() != AMDGPU::V_MOV_B32_e32 &&
479  MovY.getOpcode() != AMDGPU::COPY) ||
480  MovY.getOperand(1).getSubReg() != Tsub)
481  continue;
482 
483  unsigned Y = MovY.getOperand(0).getReg();
484  unsigned Ysub = MovY.getOperand(0).getSubReg();
485 
486  if (!TRI.isVGPR(MRI, Y) || MovT.getParent() != MovY.getParent())
487  continue;
488 
489  MachineInstr *MovX = nullptr;
490  auto I = std::next(MovT.getIterator()), E = MovT.getParent()->instr_end();
491  for (auto IY = MovY.getIterator(); I != E && I != IY; ++I) {
492  if (instReadsReg(&*I, X, Xsub, TRI) ||
493  instModifiesReg(&*I, Y, Ysub, TRI) ||
494  instModifiesReg(&*I, T, Tsub, TRI) ||
495  (MovX && instModifiesReg(&*I, X, Xsub, TRI))) {
496  MovX = nullptr;
497  break;
498  }
499  if (!instReadsReg(&*I, Y, Ysub, TRI)) {
500  if (!MovX && instModifiesReg(&*I, X, Xsub, TRI)) {
501  MovX = nullptr;
502  break;
503  }
504  continue;
505  }
506  if (MovX ||
507  (I->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
508  I->getOpcode() != AMDGPU::COPY) ||
509  I->getOperand(0).getReg() != X ||
510  I->getOperand(0).getSubReg() != Xsub) {
511  MovX = nullptr;
512  break;
513  }
514  MovX = &*I;
515  }
516 
517  if (!MovX || I == E)
518  continue;
519 
520  LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << MovY);
521 
522  for (unsigned I = 0; I < Size; ++I) {
524  X1 = getSubRegForIndex(X, Xsub, I, TRI, MRI);
525  Y1 = getSubRegForIndex(Y, Ysub, I, TRI, MRI);
526  BuildMI(*MovT.getParent(), MovX->getIterator(), MovT.getDebugLoc(),
527  TII->get(AMDGPU::V_SWAP_B32))
528  .addDef(X1.Reg, 0, X1.SubReg)
529  .addDef(Y1.Reg, 0, Y1.SubReg)
530  .addReg(Y1.Reg, 0, Y1.SubReg)
531  .addReg(X1.Reg, 0, X1.SubReg).getInstr();
532  }
533  MovX->eraseFromParent();
534  MovY.eraseFromParent();
535  MachineInstr *Next = &*std::next(MovT.getIterator());
536  if (MRI.use_nodbg_empty(T))
537  MovT.eraseFromParent();
538  else
539  Xop.setIsKill(false);
540 
541  return Next;
542  }
543 
544  return nullptr;
545 }
546 
547 bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
548  if (skipFunction(MF.getFunction()))
549  return false;
550 
552  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
553  const SIInstrInfo *TII = ST.getInstrInfo();
554  unsigned VCCReg = ST.isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
555 
556  std::vector<unsigned> I1Defs;
557 
558  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
559  BI != BE; ++BI) {
560 
561  MachineBasicBlock &MBB = *BI;
563  for (I = MBB.begin(); I != MBB.end(); I = Next) {
564  Next = std::next(I);
565  MachineInstr &MI = *I;
566 
567  if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) {
568  // If this has a literal constant source that is the same as the
569  // reversed bits of an inline immediate, replace with a bitreverse of
570  // that constant. This saves 4 bytes in the common case of materializing
571  // sign bits.
572 
573  // Test if we are after regalloc. We only want to do this after any
574  // optimizations happen because this will confuse them.
575  // XXX - not exactly a check for post-regalloc run.
576  MachineOperand &Src = MI.getOperand(1);
577  if (Src.isImm() &&
579  int32_t ReverseImm;
580  if (isReverseInlineImm(TII, Src, ReverseImm)) {
581  MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32));
582  Src.setImm(ReverseImm);
583  continue;
584  }
585  }
586  }
587 
588  if (ST.hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
589  MI.getOpcode() == AMDGPU::COPY)) {
590  if (auto *NextMI = matchSwap(MI, MRI, TII)) {
591  Next = NextMI->getIterator();
592  continue;
593  }
594  }
595 
596  // Combine adjacent s_nops to use the immediate operand encoding how long
597  // to wait.
598  //
599  // s_nop N
600  // s_nop M
601  // =>
602  // s_nop (N + M)
603  if (MI.getOpcode() == AMDGPU::S_NOP &&
604  Next != MBB.end() &&
605  (*Next).getOpcode() == AMDGPU::S_NOP) {
606 
607  MachineInstr &NextMI = *Next;
608  // The instruction encodes the amount to wait with an offset of 1,
609  // i.e. 0 is wait 1 cycle. Convert both to cycles and then convert back
610  // after adding.
611  uint8_t Nop0 = MI.getOperand(0).getImm() + 1;
612  uint8_t Nop1 = NextMI.getOperand(0).getImm() + 1;
613 
614  // Make sure we don't overflow the bounds.
615  if (Nop0 + Nop1 <= 8) {
616  NextMI.getOperand(0).setImm(Nop0 + Nop1 - 1);
617  MI.eraseFromParent();
618  }
619 
620  continue;
621  }
622 
623  // FIXME: We also need to consider movs of constant operands since
624  // immediate operands are not folded if they have more than one use, and
625  // the operand folding pass is unaware if the immediate will be free since
626  // it won't know if the src == dest constraint will end up being
627  // satisfied.
628  if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||
629  MI.getOpcode() == AMDGPU::S_MUL_I32) {
630  const MachineOperand *Dest = &MI.getOperand(0);
631  MachineOperand *Src0 = &MI.getOperand(1);
632  MachineOperand *Src1 = &MI.getOperand(2);
633 
634  if (!Src0->isReg() && Src1->isReg()) {
635  if (TII->commuteInstruction(MI, false, 1, 2))
636  std::swap(Src0, Src1);
637  }
638 
639  // FIXME: This could work better if hints worked with subregisters. If
640  // we have a vector add of a constant, we usually don't get the correct
641  // allocation due to the subregister usage.
643  Src0->isReg()) {
644  MRI.setRegAllocationHint(Dest->getReg(), 0, Src0->getReg());
645  MRI.setRegAllocationHint(Src0->getReg(), 0, Dest->getReg());
646  continue;
647  }
648 
649  if (Src0->isReg() && Src0->getReg() == Dest->getReg()) {
650  if (Src1->isImm() && isKImmOperand(TII, *Src1)) {
651  unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ?
652  AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32;
653 
654  MI.setDesc(TII->get(Opc));
655  MI.tieOperands(0, 1);
656  }
657  }
658  }
659 
660  // Try to use s_cmpk_*
661  if (MI.isCompare() && TII->isSOPC(MI)) {
662  shrinkScalarCompare(TII, MI);
663  continue;
664  }
665 
666  // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
667  if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
668  const MachineOperand &Dst = MI.getOperand(0);
669  MachineOperand &Src = MI.getOperand(1);
670 
671  if (Src.isImm() &&
673  int32_t ReverseImm;
674  if (isKImmOperand(TII, Src))
675  MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
676  else if (isReverseInlineImm(TII, Src, ReverseImm)) {
677  MI.setDesc(TII->get(AMDGPU::S_BREV_B32));
678  Src.setImm(ReverseImm);
679  }
680  }
681 
682  continue;
683  }
684 
685  // Shrink scalar logic operations.
686  if (MI.getOpcode() == AMDGPU::S_AND_B32 ||
687  MI.getOpcode() == AMDGPU::S_OR_B32 ||
688  MI.getOpcode() == AMDGPU::S_XOR_B32) {
689  if (shrinkScalarLogicOp(ST, MRI, TII, MI))
690  continue;
691  }
692 
693  if (TII->isMIMG(MI.getOpcode()) &&
697  shrinkMIMG(MI);
698  continue;
699  }
700 
701  if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
702  continue;
703 
704  if (!TII->canShrink(MI, MRI)) {
705  // Try commuting the instruction and see if that enables us to shrink
706  // it.
707  if (!MI.isCommutable() || !TII->commuteInstruction(MI) ||
708  !TII->canShrink(MI, MRI))
709  continue;
710  }
711 
712  // getVOPe32 could be -1 here if we started with an instruction that had
713  // a 32-bit encoding and then commuted it to an instruction that did not.
714  if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
715  continue;
716 
717  int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
718 
719  if (TII->isVOPC(Op32)) {
720  unsigned DstReg = MI.getOperand(0).getReg();
722  // VOPC instructions can only write to the VCC register. We can't
723  // force them to use VCC here, because this is only one register and
724  // cannot deal with sequences which would require multiple copies of
725  // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...)
726  //
727  // So, instead of forcing the instruction to write to VCC, we provide
728  // a hint to the register allocator to use VCC and then we will run
729  // this pass again after RA and shrink it if it outputs to VCC.
730  MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, VCCReg);
731  continue;
732  }
733  if (DstReg != VCCReg)
734  continue;
735  }
736 
737  if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
738  // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC
739  // instructions.
740  const MachineOperand *Src2 =
741  TII->getNamedOperand(MI, AMDGPU::OpName::src2);
742  if (!Src2->isReg())
743  continue;
744  unsigned SReg = Src2->getReg();
746  MRI.setRegAllocationHint(SReg, 0, VCCReg);
747  continue;
748  }
749  if (SReg != VCCReg)
750  continue;
751  }
752 
753  // Check for the bool flag output for instructions like V_ADD_I32_e64.
754  const MachineOperand *SDst = TII->getNamedOperand(MI,
755  AMDGPU::OpName::sdst);
756 
757  // Check the carry-in operand for v_addc_u32_e64.
758  const MachineOperand *Src2 = TII->getNamedOperand(MI,
759  AMDGPU::OpName::src2);
760 
761  if (SDst) {
762  bool Next = false;
763 
764  if (SDst->getReg() != VCCReg) {
766  MRI.setRegAllocationHint(SDst->getReg(), 0, VCCReg);
767  Next = true;
768  }
769 
770  // All of the instructions with carry outs also have an SGPR input in
771  // src2.
772  if (Src2 && Src2->getReg() != VCCReg) {
774  MRI.setRegAllocationHint(Src2->getReg(), 0, VCCReg);
775  Next = true;
776  }
777 
778  if (Next)
779  continue;
780  }
781 
782  // We can shrink this instruction
783  LLVM_DEBUG(dbgs() << "Shrinking " << MI);
784 
785  MachineInstr *Inst32 = TII->buildShrunkInst(MI, Op32);
786  ++NumInstructionsShrunk;
787 
788  // Copy extra operands not present in the instruction definition.
789  copyExtraImplicitOps(*Inst32, MF, MI);
790 
791  MI.eraseFromParent();
792  foldImmediates(*Inst32, TII, MRI);
793 
794  LLVM_DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
795  }
796  }
797  return false;
798 }
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
unsigned getNumImplicitUses() const
Return the number of implicit uses this instruction has.
Definition: MCInstrDesc.h:532
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:348
static bool isReverseInlineImm(const SIInstrInfo *TII, const MachineOperand &Src, int32_t &ReverseImm)
static unsigned getSubRegFromChannel(unsigned Channel)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
AMDGPU specific subclass of TargetSubtarget.
instr_iterator instr_end()
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getNumImplicitDefs() const
Return the number of implicit defs this instruct has.
Definition: MCInstrDesc.h:554
This class represents lattice values for constants.
Definition: AllocatorList.h:23
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:493
static bool sopkIsZext(const MachineInstr &MI)
Definition: SIInstrInfo.h:585
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
const MachineFunctionProperties & getProperties() const
Get the function properties.
unsigned getRegister(unsigned i) const
Return the specified register in the class.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:384
iterator_range< use_nodbg_iterator > use_nodbg_operands(unsigned Reg) const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:510
void setIsUndef(bool Val=true)
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
unsigned getSubReg() const
const SIInstrInfo * getInstrInfo() const override
static bool instModifiesReg(const MachineInstr *MI, unsigned Reg, unsigned SubReg, const SIRegisterInfo &TRI)
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:305
unsigned const TargetRegisterInfo * TRI
bool isInlineConstant(const APInt &Imm) const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:171
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
Definition: MachineInstr.h:701
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:119
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:211
const HexagonInstrInfo * TII
static TargetInstrInfo::RegSubRegPair getSubRegForIndex(unsigned Reg, unsigned Sub, unsigned I, const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI)
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:413
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned SubReg
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:410
static bool isKImmOrKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src, bool &IsUnsigned)
static bool instAccessReg(iterator_range< MachineInstr::const_mop_iterator > &&R, unsigned Reg, unsigned SubReg, const SIRegisterInfo &TRI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
Definition: SIInstrInfo.h:772
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:407
STATISTIC(NumInstructionsShrunk, "Number of 64-bit instruction reduced to 32-bit.")
#define DEBUG_TYPE
The pass tries to use the 32-bit encoding for instructions when possible.
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
static bool isKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src)
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
TargetInstrInfo::RegSubRegPair RegSubRegPair
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned const MachineRegisterInfo * MRI
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool isCompare(QueryType Type=IgnoreBundle) const
Return true if this instruction is a comparison.
Definition: MachineInstr.h:695
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool hasSwap() const
Represent the analysis usage information of a pass.
LLVM_READONLY int getSOPKOp(uint16_t Opcode)
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:482
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
Generation getGeneration() const
self_iterator getIterator()
Definition: ilist_node.h:81
constexpr Type getAsInteger() const
Definition: LaneBitmask.h:73
static bool isVOP2(const MachineInstr &MI)
Definition: SIInstrInfo.h:404
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
void setIsKill(bool Val=true)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:33
static bool instReadsReg(const MachineInstr *MI, unsigned Reg, unsigned SubReg, const SIRegisterInfo &TRI)
Iterator for intrusive lists based on ilist_node.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:308
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_READONLY int getVOPe32(uint16_t Opcode)
MachineOperand class - Representation of each machine instruction operand.
void setRegAllocationHint(unsigned VReg, unsigned Type, unsigned PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register...
A pair composed of a register and a sub-register index.
static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src)
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly. ...
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
int64_t getImm() const
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
A range adaptor for a pair of iterators.
static MachineInstr * matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI, const SIInstrInfo *TII)
bool use_empty(unsigned RegNo) const
use_empty - Return true if there are no instructions using the specified register.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:255
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
static void copyExtraImplicitOps(MachineInstr &NewMI, MachineFunction &MF, const MachineInstr &MI)
Copy implicit register operands from specified instruction to this instruction that are not part of t...
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool hasInv2PiInlineImm() const
Interface definition for SIInstrInfo.
FunctionPass * createSIShrinkInstructionsPass()
bool hasOneUse(unsigned RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static bool isVOPC(const MachineInstr &MI)
Definition: SIInstrInfo.h:428
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:58
constexpr bool any() const
Definition: LaneBitmask.h:52
void setSubReg(unsigned subReg)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
uint32_t Size
Definition: Profile.cpp:46
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:345
bool isReg() const
isReg - Tests if this is a MO_Register operand.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool hasProperty(Property P) const
static bool shrinkScalarLogicOp(const GCNSubtarget &ST, MachineRegisterInfo &MRI, const SIInstrInfo *TII, MachineInstr &MI)
Attempt to shink AND/OR/XOR operations requiring non-inlineable literals.
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
unsigned countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:477
unsigned getHWRegIndex(unsigned Reg) const
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
const MachineInstrBuilder & addDef(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Register getReg() const
getReg - Returns the register number.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:415
static bool isVOP1(const MachineInstr &MI)
Definition: SIInstrInfo.h:396
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MachineInstr.h:860
static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, MachineRegisterInfo &MRI, bool TryToCommute=true)
This function checks MI for operands defined by a move immediate instruction and then folds the liter...
bool isImplicit() const
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.