LLVM  9.0.0svn
SIShrinkInstructions.cpp
Go to the documentation of this file.
1 //===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 /// The pass tries to use the 32-bit encoding for instructions when possible.
8 //===----------------------------------------------------------------------===//
9 //
10 
11 #include "AMDGPU.h"
12 #include "AMDGPUSubtarget.h"
13 #include "SIInstrInfo.h"
15 #include "llvm/ADT/Statistic.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/LLVMContext.h"
22 #include "llvm/Support/Debug.h"
25 
26 #define DEBUG_TYPE "si-shrink-instructions"
27 
28 STATISTIC(NumInstructionsShrunk,
29  "Number of 64-bit instruction reduced to 32-bit.");
30 STATISTIC(NumLiteralConstantsFolded,
31  "Number of literal constants folded into 32-bit instructions.");
32 
33 using namespace llvm;
34 
35 namespace {
36 
37 class SIShrinkInstructions : public MachineFunctionPass {
38 public:
39  static char ID;
40 
41 public:
42  SIShrinkInstructions() : MachineFunctionPass(ID) {
43  }
44 
45  bool runOnMachineFunction(MachineFunction &MF) override;
46 
47  StringRef getPassName() const override { return "SI Shrink Instructions"; }
48 
49  void getAnalysisUsage(AnalysisUsage &AU) const override {
50  AU.setPreservesCFG();
52  }
53 };
54 
55 } // End anonymous namespace.
56 
57 INITIALIZE_PASS(SIShrinkInstructions, DEBUG_TYPE,
58  "SI Shrink Instructions", false, false)
59 
60 char SIShrinkInstructions::ID = 0;
61 
63  return new SIShrinkInstructions();
64 }
65 
66 /// This function checks \p MI for operands defined by a move immediate
67 /// instruction and then folds the literal constant into the instruction if it
68 /// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions.
70  MachineRegisterInfo &MRI, bool TryToCommute = true) {
71  assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
72 
73  int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
74 
75  // Try to fold Src0
76  MachineOperand &Src0 = MI.getOperand(Src0Idx);
77  if (Src0.isReg()) {
78  unsigned Reg = Src0.getReg();
79  if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI.hasOneUse(Reg)) {
80  MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
81  if (Def && Def->isMoveImmediate()) {
82  MachineOperand &MovSrc = Def->getOperand(1);
83  bool ConstantFolded = false;
84 
85  if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) ||
86  isUInt<32>(MovSrc.getImm()))) {
87  // It's possible to have only one component of a super-reg defined by
88  // a single mov, so we need to clear any subregister flag.
89  Src0.setSubReg(0);
90  Src0.ChangeToImmediate(MovSrc.getImm());
91  ConstantFolded = true;
92  } else if (MovSrc.isFI()) {
93  Src0.setSubReg(0);
94  Src0.ChangeToFrameIndex(MovSrc.getIndex());
95  ConstantFolded = true;
96  }
97 
98  if (ConstantFolded) {
99  assert(MRI.use_empty(Reg));
100  Def->eraseFromParent();
101  ++NumLiteralConstantsFolded;
102  return true;
103  }
104  }
105  }
106  }
107 
108  // We have failed to fold src0, so commute the instruction and try again.
109  if (TryToCommute && MI.isCommutable()) {
110  if (TII->commuteInstruction(MI)) {
111  if (foldImmediates(MI, TII, MRI, false))
112  return true;
113 
114  // Commute back.
115  TII->commuteInstruction(MI);
116  }
117  }
118 
119  return false;
120 }
121 
122 static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
123  return isInt<16>(Src.getImm()) &&
124  !TII->isInlineConstant(*Src.getParent(),
125  Src.getParent()->getOperandNo(&Src));
126 }
127 
128 static bool isKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
129  return isUInt<16>(Src.getImm()) &&
130  !TII->isInlineConstant(*Src.getParent(),
131  Src.getParent()->getOperandNo(&Src));
132 }
133 
135  const MachineOperand &Src,
136  bool &IsUnsigned) {
137  if (isInt<16>(Src.getImm())) {
138  IsUnsigned = false;
139  return !TII->isInlineConstant(Src);
140  }
141 
142  if (isUInt<16>(Src.getImm())) {
143  IsUnsigned = true;
144  return !TII->isInlineConstant(Src);
145  }
146 
147  return false;
148 }
149 
150 /// \returns true if the constant in \p Src should be replaced with a bitreverse
151 /// of an inline immediate.
152 static bool isReverseInlineImm(const SIInstrInfo *TII,
153  const MachineOperand &Src,
154  int32_t &ReverseImm) {
155  if (!isInt<32>(Src.getImm()) || TII->isInlineConstant(Src))
156  return false;
157 
158  ReverseImm = reverseBits<int32_t>(static_cast<int32_t>(Src.getImm()));
159  return ReverseImm >= -16 && ReverseImm <= 64;
160 }
161 
162 /// Copy implicit register operands from specified instruction to this
163 /// instruction that are not part of the instruction definition.
165  const MachineInstr &MI) {
166  for (unsigned i = MI.getDesc().getNumOperands() +
167  MI.getDesc().getNumImplicitUses() +
168  MI.getDesc().getNumImplicitDefs(), e = MI.getNumOperands();
169  i != e; ++i) {
170  const MachineOperand &MO = MI.getOperand(i);
171  if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask())
172  NewMI.addOperand(MF, MO);
173  }
174 }
175 
177  // cmpk instructions do scc = dst <cc op> imm16, so commute the instruction to
178  // get constants on the RHS.
179  if (!MI.getOperand(0).isReg())
180  TII->commuteInstruction(MI, false, 0, 1);
181 
182  const MachineOperand &Src1 = MI.getOperand(1);
183  if (!Src1.isImm())
184  return;
185 
186  int SOPKOpc = AMDGPU::getSOPKOp(MI.getOpcode());
187  if (SOPKOpc == -1)
188  return;
189 
190  // eq/ne is special because the imm16 can be treated as signed or unsigned,
191  // and initially selectd to the unsigned versions.
192  if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) {
193  bool HasUImm;
194  if (isKImmOrKUImmOperand(TII, Src1, HasUImm)) {
195  if (!HasUImm) {
196  SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ?
197  AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32;
198  }
199 
200  MI.setDesc(TII->get(SOPKOpc));
201  }
202 
203  return;
204  }
205 
206  const MCInstrDesc &NewDesc = TII->get(SOPKOpc);
207 
208  if ((TII->sopkIsZext(SOPKOpc) && isKUImmOperand(TII, Src1)) ||
209  (!TII->sopkIsZext(SOPKOpc) && isKImmOperand(TII, Src1))) {
210  MI.setDesc(NewDesc);
211  }
212 }
213 
214 /// Attempt to shink AND/OR/XOR operations requiring non-inlineable literals.
215 /// For AND or OR, try using S_BITSET{0,1} to clear or set bits.
216 /// If the inverse of the immediate is legal, use ANDN2, ORN2 or
217 /// XNOR (as a ^ b == ~(a ^ ~b)).
218 /// \returns true if the caller should continue the machine function iterator
221  const SIInstrInfo *TII,
222  MachineInstr &MI) {
223  unsigned Opc = MI.getOpcode();
224  const MachineOperand *Dest = &MI.getOperand(0);
225  MachineOperand *Src0 = &MI.getOperand(1);
226  MachineOperand *Src1 = &MI.getOperand(2);
227  MachineOperand *SrcReg = Src0;
228  MachineOperand *SrcImm = Src1;
229 
230  if (SrcImm->isImm() &&
232  uint32_t Imm = static_cast<uint32_t>(SrcImm->getImm());
233  uint32_t NewImm = 0;
234 
235  if (Opc == AMDGPU::S_AND_B32) {
236  if (isPowerOf2_32(~Imm)) {
237  NewImm = countTrailingOnes(Imm);
238  Opc = AMDGPU::S_BITSET0_B32;
239  } else if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
240  NewImm = ~Imm;
241  Opc = AMDGPU::S_ANDN2_B32;
242  }
243  } else if (Opc == AMDGPU::S_OR_B32) {
244  if (isPowerOf2_32(Imm)) {
245  NewImm = countTrailingZeros(Imm);
246  Opc = AMDGPU::S_BITSET1_B32;
247  } else if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
248  NewImm = ~Imm;
249  Opc = AMDGPU::S_ORN2_B32;
250  }
251  } else if (Opc == AMDGPU::S_XOR_B32) {
253  NewImm = ~Imm;
254  Opc = AMDGPU::S_XNOR_B32;
255  }
256  } else {
257  llvm_unreachable("unexpected opcode");
258  }
259 
260  if ((Opc == AMDGPU::S_ANDN2_B32 || Opc == AMDGPU::S_ORN2_B32) &&
261  SrcImm == Src0) {
262  if (!TII->commuteInstruction(MI, false, 1, 2))
263  NewImm = 0;
264  }
265 
266  if (NewImm != 0) {
268  SrcReg->isReg()) {
269  MRI.setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());
270  MRI.setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());
271  return true;
272  }
273 
274  if (SrcReg->isReg() && SrcReg->getReg() == Dest->getReg()) {
275  MI.setDesc(TII->get(Opc));
276  if (Opc == AMDGPU::S_BITSET0_B32 ||
277  Opc == AMDGPU::S_BITSET1_B32) {
278  Src0->ChangeToImmediate(NewImm);
279  MI.RemoveOperand(2);
280  } else {
281  SrcImm->setImm(NewImm);
282  }
283  }
284  }
285  }
286 
287  return false;
288 }
289 
290 // This is the same as MachineInstr::readsRegister/modifiesRegister except
291 // it takes subregs into account.
293  unsigned Reg, unsigned SubReg,
294  const SIRegisterInfo &TRI) {
295  for (const MachineOperand &MO : R) {
296  if (!MO.isReg())
297  continue;
298 
301  if (TRI.regsOverlap(Reg, MO.getReg()))
302  return true;
303  } else if (MO.getReg() == Reg &&
305  LaneBitmask Overlap = TRI.getSubRegIndexLaneMask(SubReg) &
306  TRI.getSubRegIndexLaneMask(MO.getSubReg());
307  if (Overlap.any())
308  return true;
309  }
310  }
311  return false;
312 }
313 
314 static bool instReadsReg(const MachineInstr *MI,
315  unsigned Reg, unsigned SubReg,
316  const SIRegisterInfo &TRI) {
317  return instAccessReg(MI->uses(), Reg, SubReg, TRI);
318 }
319 
320 static bool instModifiesReg(const MachineInstr *MI,
321  unsigned Reg, unsigned SubReg,
322  const SIRegisterInfo &TRI) {
323  return instAccessReg(MI->defs(), Reg, SubReg, TRI);
324 }
325 
327 getSubRegForIndex(unsigned Reg, unsigned Sub, unsigned I,
328  const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI) {
329  if (TRI.getRegSizeInBits(Reg, MRI) != 32) {
331  Reg = TRI.getSubReg(Reg, TRI.getSubRegFromChannel(I));
332  } else {
333  LaneBitmask LM = TRI.getSubRegIndexLaneMask(Sub);
335  }
336  }
337  return TargetInstrInfo::RegSubRegPair(Reg, Sub);
338 }
339 
340 // Match:
341 // mov t, x
342 // mov x, y
343 // mov y, t
344 //
345 // =>
346 //
347 // mov t, x (t is potentially dead and move eliminated)
348 // v_swap_b32 x, y
349 //
350 // Returns next valid instruction pointer if was able to create v_swap_b32.
351 //
352 // This shall not be done too early not to prevent possible folding which may
353 // remove matched moves, and this should prefereably be done before RA to
354 // release saved registers and also possibly after RA which can insert copies
355 // too.
356 //
357 // This is really just a generic peephole that is not a canocical shrinking,
358 // although requirements match the pass placement and it reduces code size too.
360  const SIInstrInfo *TII) {
361  assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
362  MovT.getOpcode() == AMDGPU::COPY);
363 
364  unsigned T = MovT.getOperand(0).getReg();
365  unsigned Tsub = MovT.getOperand(0).getSubReg();
366  MachineOperand &Xop = MovT.getOperand(1);
367 
368  if (!Xop.isReg())
369  return nullptr;
370  unsigned X = Xop.getReg();
371  unsigned Xsub = Xop.getSubReg();
372 
373  unsigned Size = TII->getOpSize(MovT, 0) / 4;
374 
375  const SIRegisterInfo &TRI = TII->getRegisterInfo();
376  if (!TRI.isVGPR(MRI, X))
377  return nullptr;
378 
379  for (MachineOperand &YTop : MRI.use_nodbg_operands(T)) {
380  if (YTop.getSubReg() != Tsub)
381  continue;
382 
383  MachineInstr &MovY = *YTop.getParent();
384  if ((MovY.getOpcode() != AMDGPU::V_MOV_B32_e32 &&
385  MovY.getOpcode() != AMDGPU::COPY) ||
386  MovY.getOperand(1).getSubReg() != Tsub)
387  continue;
388 
389  unsigned Y = MovY.getOperand(0).getReg();
390  unsigned Ysub = MovY.getOperand(0).getSubReg();
391 
392  if (!TRI.isVGPR(MRI, Y) || MovT.getParent() != MovY.getParent())
393  continue;
394 
395  MachineInstr *MovX = nullptr;
396  auto I = std::next(MovT.getIterator()), E = MovT.getParent()->instr_end();
397  for (auto IY = MovY.getIterator(); I != E && I != IY; ++I) {
398  if (instReadsReg(&*I, X, Xsub, TRI) ||
399  instModifiesReg(&*I, Y, Ysub, TRI) ||
400  instModifiesReg(&*I, T, Tsub, TRI) ||
401  (MovX && instModifiesReg(&*I, X, Xsub, TRI))) {
402  MovX = nullptr;
403  break;
404  }
405  if (!instReadsReg(&*I, Y, Ysub, TRI)) {
406  if (!MovX && instModifiesReg(&*I, X, Xsub, TRI)) {
407  MovX = nullptr;
408  break;
409  }
410  continue;
411  }
412  if (MovX ||
413  (I->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
414  I->getOpcode() != AMDGPU::COPY) ||
415  I->getOperand(0).getReg() != X ||
416  I->getOperand(0).getSubReg() != Xsub) {
417  MovX = nullptr;
418  break;
419  }
420  MovX = &*I;
421  }
422 
423  if (!MovX || I == E)
424  continue;
425 
426  LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << MovY);
427 
428  for (unsigned I = 0; I < Size; ++I) {
430  X1 = getSubRegForIndex(X, Xsub, I, TRI, MRI);
431  Y1 = getSubRegForIndex(Y, Ysub, I, TRI, MRI);
432  BuildMI(*MovT.getParent(), MovX->getIterator(), MovT.getDebugLoc(),
433  TII->get(AMDGPU::V_SWAP_B32))
434  .addDef(X1.Reg, 0, X1.SubReg)
435  .addDef(Y1.Reg, 0, Y1.SubReg)
436  .addReg(Y1.Reg, 0, Y1.SubReg)
437  .addReg(X1.Reg, 0, X1.SubReg).getInstr();
438  }
439  MovX->eraseFromParent();
440  MovY.eraseFromParent();
441  MachineInstr *Next = &*std::next(MovT.getIterator());
442  if (MRI.use_nodbg_empty(T))
443  MovT.eraseFromParent();
444  else
445  Xop.setIsKill(false);
446 
447  return Next;
448  }
449 
450  return nullptr;
451 }
452 
453 bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
454  if (skipFunction(MF.getFunction()))
455  return false;
456 
458  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
459  const SIInstrInfo *TII = ST.getInstrInfo();
460 
461  std::vector<unsigned> I1Defs;
462 
463  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
464  BI != BE; ++BI) {
465 
466  MachineBasicBlock &MBB = *BI;
468  for (I = MBB.begin(); I != MBB.end(); I = Next) {
469  Next = std::next(I);
470  MachineInstr &MI = *I;
471 
472  if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) {
473  // If this has a literal constant source that is the same as the
474  // reversed bits of an inline immediate, replace with a bitreverse of
475  // that constant. This saves 4 bytes in the common case of materializing
476  // sign bits.
477 
478  // Test if we are after regalloc. We only want to do this after any
479  // optimizations happen because this will confuse them.
480  // XXX - not exactly a check for post-regalloc run.
481  MachineOperand &Src = MI.getOperand(1);
482  if (Src.isImm() &&
484  int32_t ReverseImm;
485  if (isReverseInlineImm(TII, Src, ReverseImm)) {
486  MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32));
487  Src.setImm(ReverseImm);
488  continue;
489  }
490  }
491  }
492 
493  if (ST.hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
494  MI.getOpcode() == AMDGPU::COPY)) {
495  if (auto *NextMI = matchSwap(MI, MRI, TII)) {
496  Next = NextMI->getIterator();
497  continue;
498  }
499  }
500 
501  // Combine adjacent s_nops to use the immediate operand encoding how long
502  // to wait.
503  //
504  // s_nop N
505  // s_nop M
506  // =>
507  // s_nop (N + M)
508  if (MI.getOpcode() == AMDGPU::S_NOP &&
509  Next != MBB.end() &&
510  (*Next).getOpcode() == AMDGPU::S_NOP) {
511 
512  MachineInstr &NextMI = *Next;
513  // The instruction encodes the amount to wait with an offset of 1,
514  // i.e. 0 is wait 1 cycle. Convert both to cycles and then convert back
515  // after adding.
516  uint8_t Nop0 = MI.getOperand(0).getImm() + 1;
517  uint8_t Nop1 = NextMI.getOperand(0).getImm() + 1;
518 
519  // Make sure we don't overflow the bounds.
520  if (Nop0 + Nop1 <= 8) {
521  NextMI.getOperand(0).setImm(Nop0 + Nop1 - 1);
522  MI.eraseFromParent();
523  }
524 
525  continue;
526  }
527 
528  // FIXME: We also need to consider movs of constant operands since
529  // immediate operands are not folded if they have more than one use, and
530  // the operand folding pass is unaware if the immediate will be free since
531  // it won't know if the src == dest constraint will end up being
532  // satisfied.
533  if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||
534  MI.getOpcode() == AMDGPU::S_MUL_I32) {
535  const MachineOperand *Dest = &MI.getOperand(0);
536  MachineOperand *Src0 = &MI.getOperand(1);
537  MachineOperand *Src1 = &MI.getOperand(2);
538 
539  if (!Src0->isReg() && Src1->isReg()) {
540  if (TII->commuteInstruction(MI, false, 1, 2))
541  std::swap(Src0, Src1);
542  }
543 
544  // FIXME: This could work better if hints worked with subregisters. If
545  // we have a vector add of a constant, we usually don't get the correct
546  // allocation due to the subregister usage.
548  Src0->isReg()) {
549  MRI.setRegAllocationHint(Dest->getReg(), 0, Src0->getReg());
550  MRI.setRegAllocationHint(Src0->getReg(), 0, Dest->getReg());
551  continue;
552  }
553 
554  if (Src0->isReg() && Src0->getReg() == Dest->getReg()) {
555  if (Src1->isImm() && isKImmOperand(TII, *Src1)) {
556  unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ?
557  AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32;
558 
559  MI.setDesc(TII->get(Opc));
560  MI.tieOperands(0, 1);
561  }
562  }
563  }
564 
565  // Try to use s_cmpk_*
566  if (MI.isCompare() && TII->isSOPC(MI)) {
567  shrinkScalarCompare(TII, MI);
568  continue;
569  }
570 
571  // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
572  if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
573  const MachineOperand &Dst = MI.getOperand(0);
574  MachineOperand &Src = MI.getOperand(1);
575 
576  if (Src.isImm() &&
578  int32_t ReverseImm;
579  if (isKImmOperand(TII, Src))
580  MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
581  else if (isReverseInlineImm(TII, Src, ReverseImm)) {
582  MI.setDesc(TII->get(AMDGPU::S_BREV_B32));
583  Src.setImm(ReverseImm);
584  }
585  }
586 
587  continue;
588  }
589 
590  // Shrink scalar logic operations.
591  if (MI.getOpcode() == AMDGPU::S_AND_B32 ||
592  MI.getOpcode() == AMDGPU::S_OR_B32 ||
593  MI.getOpcode() == AMDGPU::S_XOR_B32) {
594  if (shrinkScalarLogicOp(ST, MRI, TII, MI))
595  continue;
596  }
597 
598  if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
599  continue;
600 
601  if (!TII->canShrink(MI, MRI)) {
602  // Try commuting the instruction and see if that enables us to shrink
603  // it.
604  if (!MI.isCommutable() || !TII->commuteInstruction(MI) ||
605  !TII->canShrink(MI, MRI))
606  continue;
607  }
608 
609  // getVOPe32 could be -1 here if we started with an instruction that had
610  // a 32-bit encoding and then commuted it to an instruction that did not.
611  if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
612  continue;
613 
614  int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
615 
616  if (TII->isVOPC(Op32)) {
617  unsigned DstReg = MI.getOperand(0).getReg();
619  // VOPC instructions can only write to the VCC register. We can't
620  // force them to use VCC here, because this is only one register and
621  // cannot deal with sequences which would require multiple copies of
622  // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...)
623  //
624  // So, instead of forcing the instruction to write to VCC, we provide
625  // a hint to the register allocator to use VCC and then we will run
626  // this pass again after RA and shrink it if it outputs to VCC.
627  MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC);
628  continue;
629  }
630  if (DstReg != AMDGPU::VCC)
631  continue;
632  }
633 
634  if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
635  // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC
636  // instructions.
637  const MachineOperand *Src2 =
638  TII->getNamedOperand(MI, AMDGPU::OpName::src2);
639  if (!Src2->isReg())
640  continue;
641  unsigned SReg = Src2->getReg();
643  MRI.setRegAllocationHint(SReg, 0, AMDGPU::VCC);
644  continue;
645  }
646  if (SReg != AMDGPU::VCC)
647  continue;
648  }
649 
650  // Check for the bool flag output for instructions like V_ADD_I32_e64.
651  const MachineOperand *SDst = TII->getNamedOperand(MI,
652  AMDGPU::OpName::sdst);
653 
654  // Check the carry-in operand for v_addc_u32_e64.
655  const MachineOperand *Src2 = TII->getNamedOperand(MI,
656  AMDGPU::OpName::src2);
657 
658  if (SDst) {
659  if (SDst->getReg() != AMDGPU::VCC) {
661  MRI.setRegAllocationHint(SDst->getReg(), 0, AMDGPU::VCC);
662  continue;
663  }
664 
665  // All of the instructions with carry outs also have an SGPR input in
666  // src2.
667  if (Src2 && Src2->getReg() != AMDGPU::VCC) {
669  MRI.setRegAllocationHint(Src2->getReg(), 0, AMDGPU::VCC);
670 
671  continue;
672  }
673  }
674 
675  // We can shrink this instruction
676  LLVM_DEBUG(dbgs() << "Shrinking " << MI);
677 
678  MachineInstr *Inst32 = TII->buildShrunkInst(MI, Op32);
679  ++NumInstructionsShrunk;
680 
681  // Copy extra operands not present in the instruction definition.
682  copyExtraImplicitOps(*Inst32, MF, MI);
683 
684  MI.eraseFromParent();
685  foldImmediates(*Inst32, TII, MRI);
686 
687  LLVM_DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
688  }
689  }
690  return false;
691 }
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
unsigned getNumImplicitUses() const
Return the number of implicit uses this instruction has.
Definition: MCInstrDesc.h:526
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:348
static bool isReverseInlineImm(const SIInstrInfo *TII, const MachineOperand &Src, int32_t &ReverseImm)
static unsigned getSubRegFromChannel(unsigned Channel)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
AMDGPU specific subclass of TargetSubtarget.
instr_iterator instr_end()
unsigned getNumImplicitDefs() const
Return the number of implicit defs this instruct has.
Definition: MCInstrDesc.h:548
This class represents lattice values for constants.
Definition: AllocatorList.h:23
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:491
static bool sopkIsZext(const MachineInstr &MI)
Definition: SIInstrInfo.h:562
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:382
iterator_range< use_nodbg_iterator > use_nodbg_operands(unsigned Reg) const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:508
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
unsigned getSubReg() const
const SIInstrInfo * getInstrInfo() const override
static bool instModifiesReg(const MachineInstr *MI, unsigned Reg, unsigned SubReg, const SIRegisterInfo &TRI)
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:305
unsigned const TargetRegisterInfo * TRI
bool isInlineConstant(const APInt &Imm) const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:164
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
Definition: MachineInstr.h:699
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
const HexagonInstrInfo * TII
static TargetInstrInfo::RegSubRegPair getSubRegForIndex(unsigned Reg, unsigned Sub, unsigned I, const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI)
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:411
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:477
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned SubReg
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
static bool isKImmOrKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src, bool &IsUnsigned)
static bool instAccessReg(iterator_range< MachineInstr::const_mop_iterator > &&R, unsigned Reg, unsigned SubReg, const SIRegisterInfo &TRI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
Definition: SIInstrInfo.h:737
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:405
STATISTIC(NumInstructionsShrunk, "Number of 64-bit instruction reduced to 32-bit.")
#define DEBUG_TYPE
The pass tries to use the 32-bit encoding for instructions when possible.
static bool isKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src)
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
TargetInstrInfo::RegSubRegPair RegSubRegPair
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned const MachineRegisterInfo * MRI
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:119
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool isCompare(QueryType Type=IgnoreBundle) const
Return true if this instruction is a comparison.
Definition: MachineInstr.h:693
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool hasSwap() const
Represent the analysis usage information of a pass.
LLVM_READONLY int getSOPKOp(uint16_t Opcode)
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:480
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
self_iterator getIterator()
Definition: ilist_node.h:81
constexpr Type getAsInteger() const
Definition: LaneBitmask.h:73
static bool isVOP2(const MachineInstr &MI)
Definition: SIInstrInfo.h:386
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setIsKill(bool Val=true)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:33
static bool instReadsReg(const MachineInstr *MI, unsigned Reg, unsigned SubReg, const SIRegisterInfo &TRI)
Iterator for intrusive lists based on ilist_node.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:308
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_READONLY int getVOPe32(uint16_t Opcode)
MachineOperand class - Representation of each machine instruction operand.
void setRegAllocationHint(unsigned VReg, unsigned Type, unsigned PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register...
A pair composed of a register and a sub-register index.
static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src)
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly. ...
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:285
int64_t getImm() const
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
A range adaptor for a pair of iterators.
static MachineInstr * matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI, const SIInstrInfo *TII)
bool use_empty(unsigned RegNo) const
use_empty - Return true if there are no instructions using the specified register.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:253
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
static void copyExtraImplicitOps(MachineInstr &NewMI, MachineFunction &MF, const MachineInstr &MI)
Copy implicit register operands from specified instruction to this instruction that are not part of t...
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
bool hasInv2PiInlineImm() const
Interface definition for SIInstrInfo.
FunctionPass * createSIShrinkInstructionsPass()
bool hasOneUse(unsigned RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static bool isVOPC(const MachineInstr &MI)
Definition: SIInstrInfo.h:410
#define I(x, y, z)
Definition: MD5.cpp:58
constexpr bool any() const
Definition: LaneBitmask.h:52
void setSubReg(unsigned subReg)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
uint32_t Size
Definition: Profile.cpp:46
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:345
bool isReg() const
isReg - Tests if this is a MO_Register operand.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool shrinkScalarLogicOp(const GCNSubtarget &ST, MachineRegisterInfo &MRI, const SIInstrInfo *TII, MachineInstr &MI)
Attempt to shink AND/OR/XOR operations requiring non-inlineable literals.
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
const MachineInstrBuilder & addDef(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
static bool isVOP1(const MachineInstr &MI)
Definition: SIInstrInfo.h:378
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MachineInstr.h:847
static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, MachineRegisterInfo &MRI, bool TryToCommute=true)
This function checks MI for operands defined by a move immediate instruction and then folds the liter...
bool isImplicit() const
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.