LLVM  9.0.0svn
SIShrinkInstructions.cpp
Go to the documentation of this file.
1 //===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 /// The pass tries to use the 32-bit encoding for instructions when possible.
8 //===----------------------------------------------------------------------===//
9 //
10 
11 #include "AMDGPU.h"
12 #include "AMDGPUSubtarget.h"
13 #include "SIInstrInfo.h"
15 #include "llvm/ADT/Statistic.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/LLVMContext.h"
22 #include "llvm/Support/Debug.h"
25 
26 #define DEBUG_TYPE "si-shrink-instructions"
27 
28 STATISTIC(NumInstructionsShrunk,
29  "Number of 64-bit instruction reduced to 32-bit.");
30 STATISTIC(NumLiteralConstantsFolded,
31  "Number of literal constants folded into 32-bit instructions.");
32 
33 using namespace llvm;
34 
35 namespace {
36 
37 class SIShrinkInstructions : public MachineFunctionPass {
38 public:
39  static char ID;
40 
41 public:
42  SIShrinkInstructions() : MachineFunctionPass(ID) {
43  }
44 
45  bool runOnMachineFunction(MachineFunction &MF) override;
46 
47  StringRef getPassName() const override { return "SI Shrink Instructions"; }
48 
49  void getAnalysisUsage(AnalysisUsage &AU) const override {
50  AU.setPreservesCFG();
52  }
53 };
54 
55 } // End anonymous namespace.
56 
57 INITIALIZE_PASS(SIShrinkInstructions, DEBUG_TYPE,
58  "SI Shrink Instructions", false, false)
59 
60 char SIShrinkInstructions::ID = 0;
61 
63  return new SIShrinkInstructions();
64 }
65 
66 /// This function checks \p MI for operands defined by a move immediate
67 /// instruction and then folds the literal constant into the instruction if it
68 /// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions.
70  MachineRegisterInfo &MRI, bool TryToCommute = true) {
71  assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
72 
73  int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
74 
75  // Try to fold Src0
76  MachineOperand &Src0 = MI.getOperand(Src0Idx);
77  if (Src0.isReg()) {
78  unsigned Reg = Src0.getReg();
79  if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI.hasOneUse(Reg)) {
80  MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
81  if (Def && Def->isMoveImmediate()) {
82  MachineOperand &MovSrc = Def->getOperand(1);
83  bool ConstantFolded = false;
84 
85  if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) ||
86  isUInt<32>(MovSrc.getImm()))) {
87  // It's possible to have only one component of a super-reg defined by
88  // a single mov, so we need to clear any subregister flag.
89  Src0.setSubReg(0);
90  Src0.ChangeToImmediate(MovSrc.getImm());
91  ConstantFolded = true;
92  } else if (MovSrc.isFI()) {
93  Src0.setSubReg(0);
94  Src0.ChangeToFrameIndex(MovSrc.getIndex());
95  ConstantFolded = true;
96  }
97 
98  if (ConstantFolded) {
99  assert(MRI.use_empty(Reg));
100  Def->eraseFromParent();
101  ++NumLiteralConstantsFolded;
102  return true;
103  }
104  }
105  }
106  }
107 
108  // We have failed to fold src0, so commute the instruction and try again.
109  if (TryToCommute && MI.isCommutable()) {
110  if (TII->commuteInstruction(MI)) {
111  if (foldImmediates(MI, TII, MRI, false))
112  return true;
113 
114  // Commute back.
115  TII->commuteInstruction(MI);
116  }
117  }
118 
119  return false;
120 }
121 
122 static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
123  return isInt<16>(Src.getImm()) &&
124  !TII->isInlineConstant(*Src.getParent(),
125  Src.getParent()->getOperandNo(&Src));
126 }
127 
128 static bool isKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
129  return isUInt<16>(Src.getImm()) &&
130  !TII->isInlineConstant(*Src.getParent(),
131  Src.getParent()->getOperandNo(&Src));
132 }
133 
135  const MachineOperand &Src,
136  bool &IsUnsigned) {
137  if (isInt<16>(Src.getImm())) {
138  IsUnsigned = false;
139  return !TII->isInlineConstant(Src);
140  }
141 
142  if (isUInt<16>(Src.getImm())) {
143  IsUnsigned = true;
144  return !TII->isInlineConstant(Src);
145  }
146 
147  return false;
148 }
149 
150 /// \returns true if the constant in \p Src should be replaced with a bitreverse
151 /// of an inline immediate.
152 static bool isReverseInlineImm(const SIInstrInfo *TII,
153  const MachineOperand &Src,
154  int32_t &ReverseImm) {
155  if (!isInt<32>(Src.getImm()) || TII->isInlineConstant(Src))
156  return false;
157 
158  ReverseImm = reverseBits<int32_t>(static_cast<int32_t>(Src.getImm()));
159  return ReverseImm >= -16 && ReverseImm <= 64;
160 }
161 
162 /// Copy implicit register operands from specified instruction to this
163 /// instruction that are not part of the instruction definition.
165  const MachineInstr &MI) {
166  for (unsigned i = MI.getDesc().getNumOperands() +
167  MI.getDesc().getNumImplicitUses() +
168  MI.getDesc().getNumImplicitDefs(), e = MI.getNumOperands();
169  i != e; ++i) {
170  const MachineOperand &MO = MI.getOperand(i);
171  if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask())
172  NewMI.addOperand(MF, MO);
173  }
174 }
175 
177  // cmpk instructions do scc = dst <cc op> imm16, so commute the instruction to
178  // get constants on the RHS.
179  if (!MI.getOperand(0).isReg())
180  TII->commuteInstruction(MI, false, 0, 1);
181 
182  const MachineOperand &Src1 = MI.getOperand(1);
183  if (!Src1.isImm())
184  return;
185 
186  int SOPKOpc = AMDGPU::getSOPKOp(MI.getOpcode());
187  if (SOPKOpc == -1)
188  return;
189 
190  // eq/ne is special because the imm16 can be treated as signed or unsigned,
191  // and initially selectd to the unsigned versions.
192  if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) {
193  bool HasUImm;
194  if (isKImmOrKUImmOperand(TII, Src1, HasUImm)) {
195  if (!HasUImm) {
196  SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ?
197  AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32;
198  }
199 
200  MI.setDesc(TII->get(SOPKOpc));
201  }
202 
203  return;
204  }
205 
206  const MCInstrDesc &NewDesc = TII->get(SOPKOpc);
207 
208  if ((TII->sopkIsZext(SOPKOpc) && isKUImmOperand(TII, Src1)) ||
209  (!TII->sopkIsZext(SOPKOpc) && isKImmOperand(TII, Src1))) {
210  MI.setDesc(NewDesc);
211  }
212 }
213 
214 /// Attempt to shink AND/OR/XOR operations requiring non-inlineable literals.
215 /// For AND or OR, try using S_BITSET{0,1} to clear or set bits.
216 /// If the inverse of the immediate is legal, use ANDN2, ORN2 or
217 /// XNOR (as a ^ b == ~(a ^ ~b)).
218 /// \returns true if the caller should continue the machine function iterator
221  const SIInstrInfo *TII,
222  MachineInstr &MI) {
223  unsigned Opc = MI.getOpcode();
224  const MachineOperand *Dest = &MI.getOperand(0);
225  MachineOperand *Src0 = &MI.getOperand(1);
226  MachineOperand *Src1 = &MI.getOperand(2);
227  MachineOperand *SrcReg = Src0;
228  MachineOperand *SrcImm = Src1;
229 
230  if (SrcImm->isImm() &&
232  uint32_t Imm = static_cast<uint32_t>(SrcImm->getImm());
233  uint32_t NewImm = 0;
234 
235  if (Opc == AMDGPU::S_AND_B32) {
236  if (isPowerOf2_32(~Imm)) {
237  NewImm = countTrailingOnes(Imm);
238  Opc = AMDGPU::S_BITSET0_B32;
239  } else if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
240  NewImm = ~Imm;
241  Opc = AMDGPU::S_ANDN2_B32;
242  }
243  } else if (Opc == AMDGPU::S_OR_B32) {
244  if (isPowerOf2_32(Imm)) {
245  NewImm = countTrailingZeros(Imm);
246  Opc = AMDGPU::S_BITSET1_B32;
247  } else if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
248  NewImm = ~Imm;
249  Opc = AMDGPU::S_ORN2_B32;
250  }
251  } else if (Opc == AMDGPU::S_XOR_B32) {
253  NewImm = ~Imm;
254  Opc = AMDGPU::S_XNOR_B32;
255  }
256  } else {
257  llvm_unreachable("unexpected opcode");
258  }
259 
260  if ((Opc == AMDGPU::S_ANDN2_B32 || Opc == AMDGPU::S_ORN2_B32) &&
261  SrcImm == Src0) {
262  if (!TII->commuteInstruction(MI, false, 1, 2))
263  NewImm = 0;
264  }
265 
266  if (NewImm != 0) {
268  SrcReg->isReg()) {
269  MRI.setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());
270  MRI.setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());
271  return true;
272  }
273 
274  if (SrcReg->isReg() && SrcReg->getReg() == Dest->getReg()) {
275  MI.setDesc(TII->get(Opc));
276  if (Opc == AMDGPU::S_BITSET0_B32 ||
277  Opc == AMDGPU::S_BITSET1_B32) {
278  Src0->ChangeToImmediate(NewImm);
279  // Remove the immediate and add the tied input.
280  MI.getOperand(2).ChangeToRegister(Dest->getReg(), false);
281  MI.tieOperands(0, 2);
282  } else {
283  SrcImm->setImm(NewImm);
284  }
285  }
286  }
287  }
288 
289  return false;
290 }
291 
292 // This is the same as MachineInstr::readsRegister/modifiesRegister except
293 // it takes subregs into account.
295  unsigned Reg, unsigned SubReg,
296  const SIRegisterInfo &TRI) {
297  for (const MachineOperand &MO : R) {
298  if (!MO.isReg())
299  continue;
300 
303  if (TRI.regsOverlap(Reg, MO.getReg()))
304  return true;
305  } else if (MO.getReg() == Reg &&
307  LaneBitmask Overlap = TRI.getSubRegIndexLaneMask(SubReg) &
308  TRI.getSubRegIndexLaneMask(MO.getSubReg());
309  if (Overlap.any())
310  return true;
311  }
312  }
313  return false;
314 }
315 
316 static bool instReadsReg(const MachineInstr *MI,
317  unsigned Reg, unsigned SubReg,
318  const SIRegisterInfo &TRI) {
319  return instAccessReg(MI->uses(), Reg, SubReg, TRI);
320 }
321 
322 static bool instModifiesReg(const MachineInstr *MI,
323  unsigned Reg, unsigned SubReg,
324  const SIRegisterInfo &TRI) {
325  return instAccessReg(MI->defs(), Reg, SubReg, TRI);
326 }
327 
329 getSubRegForIndex(unsigned Reg, unsigned Sub, unsigned I,
330  const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI) {
331  if (TRI.getRegSizeInBits(Reg, MRI) != 32) {
333  Reg = TRI.getSubReg(Reg, TRI.getSubRegFromChannel(I));
334  } else {
335  LaneBitmask LM = TRI.getSubRegIndexLaneMask(Sub);
337  }
338  }
339  return TargetInstrInfo::RegSubRegPair(Reg, Sub);
340 }
341 
342 // Match:
343 // mov t, x
344 // mov x, y
345 // mov y, t
346 //
347 // =>
348 //
349 // mov t, x (t is potentially dead and move eliminated)
350 // v_swap_b32 x, y
351 //
352 // Returns next valid instruction pointer if was able to create v_swap_b32.
353 //
354 // This shall not be done too early not to prevent possible folding which may
355 // remove matched moves, and this should prefereably be done before RA to
356 // release saved registers and also possibly after RA which can insert copies
357 // too.
358 //
359 // This is really just a generic peephole that is not a canocical shrinking,
360 // although requirements match the pass placement and it reduces code size too.
362  const SIInstrInfo *TII) {
363  assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
364  MovT.getOpcode() == AMDGPU::COPY);
365 
366  unsigned T = MovT.getOperand(0).getReg();
367  unsigned Tsub = MovT.getOperand(0).getSubReg();
368  MachineOperand &Xop = MovT.getOperand(1);
369 
370  if (!Xop.isReg())
371  return nullptr;
372  unsigned X = Xop.getReg();
373  unsigned Xsub = Xop.getSubReg();
374 
375  unsigned Size = TII->getOpSize(MovT, 0) / 4;
376 
377  const SIRegisterInfo &TRI = TII->getRegisterInfo();
378  if (!TRI.isVGPR(MRI, X))
379  return nullptr;
380 
381  for (MachineOperand &YTop : MRI.use_nodbg_operands(T)) {
382  if (YTop.getSubReg() != Tsub)
383  continue;
384 
385  MachineInstr &MovY = *YTop.getParent();
386  if ((MovY.getOpcode() != AMDGPU::V_MOV_B32_e32 &&
387  MovY.getOpcode() != AMDGPU::COPY) ||
388  MovY.getOperand(1).getSubReg() != Tsub)
389  continue;
390 
391  unsigned Y = MovY.getOperand(0).getReg();
392  unsigned Ysub = MovY.getOperand(0).getSubReg();
393 
394  if (!TRI.isVGPR(MRI, Y) || MovT.getParent() != MovY.getParent())
395  continue;
396 
397  MachineInstr *MovX = nullptr;
398  auto I = std::next(MovT.getIterator()), E = MovT.getParent()->instr_end();
399  for (auto IY = MovY.getIterator(); I != E && I != IY; ++I) {
400  if (instReadsReg(&*I, X, Xsub, TRI) ||
401  instModifiesReg(&*I, Y, Ysub, TRI) ||
402  instModifiesReg(&*I, T, Tsub, TRI) ||
403  (MovX && instModifiesReg(&*I, X, Xsub, TRI))) {
404  MovX = nullptr;
405  break;
406  }
407  if (!instReadsReg(&*I, Y, Ysub, TRI)) {
408  if (!MovX && instModifiesReg(&*I, X, Xsub, TRI)) {
409  MovX = nullptr;
410  break;
411  }
412  continue;
413  }
414  if (MovX ||
415  (I->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
416  I->getOpcode() != AMDGPU::COPY) ||
417  I->getOperand(0).getReg() != X ||
418  I->getOperand(0).getSubReg() != Xsub) {
419  MovX = nullptr;
420  break;
421  }
422  MovX = &*I;
423  }
424 
425  if (!MovX || I == E)
426  continue;
427 
428  LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << MovY);
429 
430  for (unsigned I = 0; I < Size; ++I) {
432  X1 = getSubRegForIndex(X, Xsub, I, TRI, MRI);
433  Y1 = getSubRegForIndex(Y, Ysub, I, TRI, MRI);
434  BuildMI(*MovT.getParent(), MovX->getIterator(), MovT.getDebugLoc(),
435  TII->get(AMDGPU::V_SWAP_B32))
436  .addDef(X1.Reg, 0, X1.SubReg)
437  .addDef(Y1.Reg, 0, Y1.SubReg)
438  .addReg(Y1.Reg, 0, Y1.SubReg)
439  .addReg(X1.Reg, 0, X1.SubReg).getInstr();
440  }
441  MovX->eraseFromParent();
442  MovY.eraseFromParent();
443  MachineInstr *Next = &*std::next(MovT.getIterator());
444  if (MRI.use_nodbg_empty(T))
445  MovT.eraseFromParent();
446  else
447  Xop.setIsKill(false);
448 
449  return Next;
450  }
451 
452  return nullptr;
453 }
454 
455 bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
456  if (skipFunction(MF.getFunction()))
457  return false;
458 
460  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
461  const SIInstrInfo *TII = ST.getInstrInfo();
462 
463  std::vector<unsigned> I1Defs;
464 
465  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
466  BI != BE; ++BI) {
467 
468  MachineBasicBlock &MBB = *BI;
470  for (I = MBB.begin(); I != MBB.end(); I = Next) {
471  Next = std::next(I);
472  MachineInstr &MI = *I;
473 
474  if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) {
475  // If this has a literal constant source that is the same as the
476  // reversed bits of an inline immediate, replace with a bitreverse of
477  // that constant. This saves 4 bytes in the common case of materializing
478  // sign bits.
479 
480  // Test if we are after regalloc. We only want to do this after any
481  // optimizations happen because this will confuse them.
482  // XXX - not exactly a check for post-regalloc run.
483  MachineOperand &Src = MI.getOperand(1);
484  if (Src.isImm() &&
486  int32_t ReverseImm;
487  if (isReverseInlineImm(TII, Src, ReverseImm)) {
488  MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32));
489  Src.setImm(ReverseImm);
490  continue;
491  }
492  }
493  }
494 
495  if (ST.hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
496  MI.getOpcode() == AMDGPU::COPY)) {
497  if (auto *NextMI = matchSwap(MI, MRI, TII)) {
498  Next = NextMI->getIterator();
499  continue;
500  }
501  }
502 
503  // Combine adjacent s_nops to use the immediate operand encoding how long
504  // to wait.
505  //
506  // s_nop N
507  // s_nop M
508  // =>
509  // s_nop (N + M)
510  if (MI.getOpcode() == AMDGPU::S_NOP &&
511  Next != MBB.end() &&
512  (*Next).getOpcode() == AMDGPU::S_NOP) {
513 
514  MachineInstr &NextMI = *Next;
515  // The instruction encodes the amount to wait with an offset of 1,
516  // i.e. 0 is wait 1 cycle. Convert both to cycles and then convert back
517  // after adding.
518  uint8_t Nop0 = MI.getOperand(0).getImm() + 1;
519  uint8_t Nop1 = NextMI.getOperand(0).getImm() + 1;
520 
521  // Make sure we don't overflow the bounds.
522  if (Nop0 + Nop1 <= 8) {
523  NextMI.getOperand(0).setImm(Nop0 + Nop1 - 1);
524  MI.eraseFromParent();
525  }
526 
527  continue;
528  }
529 
530  // FIXME: We also need to consider movs of constant operands since
531  // immediate operands are not folded if they have more than one use, and
532  // the operand folding pass is unaware if the immediate will be free since
533  // it won't know if the src == dest constraint will end up being
534  // satisfied.
535  if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||
536  MI.getOpcode() == AMDGPU::S_MUL_I32) {
537  const MachineOperand *Dest = &MI.getOperand(0);
538  MachineOperand *Src0 = &MI.getOperand(1);
539  MachineOperand *Src1 = &MI.getOperand(2);
540 
541  if (!Src0->isReg() && Src1->isReg()) {
542  if (TII->commuteInstruction(MI, false, 1, 2))
543  std::swap(Src0, Src1);
544  }
545 
546  // FIXME: This could work better if hints worked with subregisters. If
547  // we have a vector add of a constant, we usually don't get the correct
548  // allocation due to the subregister usage.
550  Src0->isReg()) {
551  MRI.setRegAllocationHint(Dest->getReg(), 0, Src0->getReg());
552  MRI.setRegAllocationHint(Src0->getReg(), 0, Dest->getReg());
553  continue;
554  }
555 
556  if (Src0->isReg() && Src0->getReg() == Dest->getReg()) {
557  if (Src1->isImm() && isKImmOperand(TII, *Src1)) {
558  unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ?
559  AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32;
560 
561  MI.setDesc(TII->get(Opc));
562  MI.tieOperands(0, 1);
563  }
564  }
565  }
566 
567  // Try to use s_cmpk_*
568  if (MI.isCompare() && TII->isSOPC(MI)) {
569  shrinkScalarCompare(TII, MI);
570  continue;
571  }
572 
573  // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
574  if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
575  const MachineOperand &Dst = MI.getOperand(0);
576  MachineOperand &Src = MI.getOperand(1);
577 
578  if (Src.isImm() &&
580  int32_t ReverseImm;
581  if (isKImmOperand(TII, Src))
582  MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
583  else if (isReverseInlineImm(TII, Src, ReverseImm)) {
584  MI.setDesc(TII->get(AMDGPU::S_BREV_B32));
585  Src.setImm(ReverseImm);
586  }
587  }
588 
589  continue;
590  }
591 
592  // Shrink scalar logic operations.
593  if (MI.getOpcode() == AMDGPU::S_AND_B32 ||
594  MI.getOpcode() == AMDGPU::S_OR_B32 ||
595  MI.getOpcode() == AMDGPU::S_XOR_B32) {
596  if (shrinkScalarLogicOp(ST, MRI, TII, MI))
597  continue;
598  }
599 
600  if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
601  continue;
602 
603  if (!TII->canShrink(MI, MRI)) {
604  // Try commuting the instruction and see if that enables us to shrink
605  // it.
606  if (!MI.isCommutable() || !TII->commuteInstruction(MI) ||
607  !TII->canShrink(MI, MRI))
608  continue;
609  }
610 
611  // getVOPe32 could be -1 here if we started with an instruction that had
612  // a 32-bit encoding and then commuted it to an instruction that did not.
613  if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
614  continue;
615 
616  int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
617 
618  if (TII->isVOPC(Op32)) {
619  unsigned DstReg = MI.getOperand(0).getReg();
621  // VOPC instructions can only write to the VCC register. We can't
622  // force them to use VCC here, because this is only one register and
623  // cannot deal with sequences which would require multiple copies of
624  // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...)
625  //
626  // So, instead of forcing the instruction to write to VCC, we provide
627  // a hint to the register allocator to use VCC and then we will run
628  // this pass again after RA and shrink it if it outputs to VCC.
629  MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC);
630  continue;
631  }
632  if (DstReg != AMDGPU::VCC)
633  continue;
634  }
635 
636  if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
637  // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC
638  // instructions.
639  const MachineOperand *Src2 =
640  TII->getNamedOperand(MI, AMDGPU::OpName::src2);
641  if (!Src2->isReg())
642  continue;
643  unsigned SReg = Src2->getReg();
645  MRI.setRegAllocationHint(SReg, 0, AMDGPU::VCC);
646  continue;
647  }
648  if (SReg != AMDGPU::VCC)
649  continue;
650  }
651 
652  // Check for the bool flag output for instructions like V_ADD_I32_e64.
653  const MachineOperand *SDst = TII->getNamedOperand(MI,
654  AMDGPU::OpName::sdst);
655 
656  // Check the carry-in operand for v_addc_u32_e64.
657  const MachineOperand *Src2 = TII->getNamedOperand(MI,
658  AMDGPU::OpName::src2);
659 
660  if (SDst) {
661  if (SDst->getReg() != AMDGPU::VCC) {
663  MRI.setRegAllocationHint(SDst->getReg(), 0, AMDGPU::VCC);
664  continue;
665  }
666 
667  // All of the instructions with carry outs also have an SGPR input in
668  // src2.
669  if (Src2 && Src2->getReg() != AMDGPU::VCC) {
671  MRI.setRegAllocationHint(Src2->getReg(), 0, AMDGPU::VCC);
672 
673  continue;
674  }
675  }
676 
677  // We can shrink this instruction
678  LLVM_DEBUG(dbgs() << "Shrinking " << MI);
679 
680  MachineInstr *Inst32 = TII->buildShrunkInst(MI, Op32);
681  ++NumInstructionsShrunk;
682 
683  // Copy extra operands not present in the instruction definition.
684  copyExtraImplicitOps(*Inst32, MF, MI);
685 
686  MI.eraseFromParent();
687  foldImmediates(*Inst32, TII, MRI);
688 
689  LLVM_DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
690  }
691  }
692  return false;
693 }
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
unsigned getNumImplicitUses() const
Return the number of implicit uses this instruction has.
Definition: MCInstrDesc.h:526
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:348
static bool isReverseInlineImm(const SIInstrInfo *TII, const MachineOperand &Src, int32_t &ReverseImm)
static unsigned getSubRegFromChannel(unsigned Channel)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
AMDGPU specific subclass of TargetSubtarget.
instr_iterator instr_end()
unsigned getNumImplicitDefs() const
Return the number of implicit defs this instruct has.
Definition: MCInstrDesc.h:548
This class represents lattice values for constants.
Definition: AllocatorList.h:23
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:491
static bool sopkIsZext(const MachineInstr &MI)
Definition: SIInstrInfo.h:566
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:382
iterator_range< use_nodbg_iterator > use_nodbg_operands(unsigned Reg) const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:508
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
unsigned getSubReg() const
const SIInstrInfo * getInstrInfo() const override
static bool instModifiesReg(const MachineInstr *MI, unsigned Reg, unsigned SubReg, const SIRegisterInfo &TRI)
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:305
unsigned const TargetRegisterInfo * TRI
bool isInlineConstant(const APInt &Imm) const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:165
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
Definition: MachineInstr.h:699
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
const HexagonInstrInfo * TII
static TargetInstrInfo::RegSubRegPair getSubRegForIndex(unsigned Reg, unsigned Sub, unsigned I, const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI)
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:411
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:477
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned SubReg
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
static bool isKImmOrKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src, bool &IsUnsigned)
static bool instAccessReg(iterator_range< MachineInstr::const_mop_iterator > &&R, unsigned Reg, unsigned SubReg, const SIRegisterInfo &TRI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
Definition: SIInstrInfo.h:745
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:405
STATISTIC(NumInstructionsShrunk, "Number of 64-bit instruction reduced to 32-bit.")
#define DEBUG_TYPE
The pass tries to use the 32-bit encoding for instructions when possible.
static bool isKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src)
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
TargetInstrInfo::RegSubRegPair RegSubRegPair
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned const MachineRegisterInfo * MRI
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:119
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool isCompare(QueryType Type=IgnoreBundle) const
Return true if this instruction is a comparison.
Definition: MachineInstr.h:693
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool hasSwap() const
Represent the analysis usage information of a pass.
LLVM_READONLY int getSOPKOp(uint16_t Opcode)
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:480
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
self_iterator getIterator()
Definition: ilist_node.h:81
constexpr Type getAsInteger() const
Definition: LaneBitmask.h:73
static bool isVOP2(const MachineInstr &MI)
Definition: SIInstrInfo.h:390
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setIsKill(bool Val=true)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:33
static bool instReadsReg(const MachineInstr *MI, unsigned Reg, unsigned SubReg, const SIRegisterInfo &TRI)
Iterator for intrusive lists based on ilist_node.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:308
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_READONLY int getVOPe32(uint16_t Opcode)
MachineOperand class - Representation of each machine instruction operand.
void setRegAllocationHint(unsigned VReg, unsigned Type, unsigned PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register...
A pair composed of a register and a sub-register index.
static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src)
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly. ...
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
int64_t getImm() const
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
A range adaptor for a pair of iterators.
static MachineInstr * matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI, const SIInstrInfo *TII)
bool use_empty(unsigned RegNo) const
use_empty - Return true if there are no instructions using the specified register.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:253
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
static void copyExtraImplicitOps(MachineInstr &NewMI, MachineFunction &MF, const MachineInstr &MI)
Copy implicit register operands from specified instruction to this instruction that are not part of t...
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
bool hasInv2PiInlineImm() const
Interface definition for SIInstrInfo.
FunctionPass * createSIShrinkInstructionsPass()
bool hasOneUse(unsigned RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static bool isVOPC(const MachineInstr &MI)
Definition: SIInstrInfo.h:414
#define I(x, y, z)
Definition: MD5.cpp:58
constexpr bool any() const
Definition: LaneBitmask.h:52
void setSubReg(unsigned subReg)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
uint32_t Size
Definition: Profile.cpp:46
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:345
bool isReg() const
isReg - Tests if this is a MO_Register operand.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool shrinkScalarLogicOp(const GCNSubtarget &ST, MachineRegisterInfo &MRI, const SIInstrInfo *TII, MachineInstr &MI)
Attempt to shink AND/OR/XOR operations requiring non-inlineable literals.
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
const MachineInstrBuilder & addDef(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
static bool isVOP1(const MachineInstr &MI)
Definition: SIInstrInfo.h:382
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MachineInstr.h:847
static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, MachineRegisterInfo &MRI, bool TryToCommute=true)
This function checks MI for operands defined by a move immediate instruction and then folds the liter...
bool isImplicit() const
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.