LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIShrinkInstructions.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 216 221 97.7 %
Date: 2017-09-14 15:23:50 Functions: 17 18 94.4 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : /// The pass tries to use the 32-bit encoding for instructions when possible.
       9             : //===----------------------------------------------------------------------===//
      10             : //
      11             : 
      12             : #include "AMDGPU.h"
      13             : #include "AMDGPUMCInstLower.h"
      14             : #include "AMDGPUSubtarget.h"
      15             : #include "SIInstrInfo.h"
      16             : #include "llvm/ADT/Statistic.h"
      17             : #include "llvm/CodeGen/MachineFunctionPass.h"
      18             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      19             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      20             : #include "llvm/IR/Constants.h"
      21             : #include "llvm/IR/Function.h"
      22             : #include "llvm/IR/LLVMContext.h"
      23             : #include "llvm/Support/Debug.h"
      24             : #include "llvm/Support/raw_ostream.h"
      25             : #include "llvm/Target/TargetMachine.h"
      26             : 
      27             : #define DEBUG_TYPE "si-shrink-instructions"
      28             : 
      29             : STATISTIC(NumInstructionsShrunk,
      30             :           "Number of 64-bit instruction reduced to 32-bit.");
      31             : STATISTIC(NumLiteralConstantsFolded,
      32             :           "Number of literal constants folded into 32-bit instructions.");
      33             : 
      34             : using namespace llvm;
      35             : 
      36             : namespace {
      37             : 
      38        2881 : class SIShrinkInstructions : public MachineFunctionPass {
      39             : public:
      40             :   static char ID;
      41             : 
      42             : public:
      43        2897 :   SIShrinkInstructions() : MachineFunctionPass(ID) {
      44             :   }
      45             : 
      46             :   bool runOnMachineFunction(MachineFunction &MF) override;
      47             : 
      48        2885 :   StringRef getPassName() const override { return "SI Shrink Instructions"; }
      49             : 
      50        2885 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
      51        2885 :     AU.setPreservesCFG();
      52        2885 :     MachineFunctionPass::getAnalysisUsage(AU);
      53        2885 :   }
      54             : };
      55             : 
      56             : } // End anonymous namespace.
      57             : 
      58      312538 : INITIALIZE_PASS(SIShrinkInstructions, DEBUG_TYPE,
      59             :                 "SI Shrink Instructions", false, false)
      60             : 
      61             : char SIShrinkInstructions::ID = 0;
      62             : 
      63        2892 : FunctionPass *llvm::createSIShrinkInstructionsPass() {
      64        5784 :   return new SIShrinkInstructions();
      65             : }
      66             : 
      67       54252 : static bool isVGPR(const MachineOperand *MO, const SIRegisterInfo &TRI,
      68             :                    const MachineRegisterInfo &MRI) {
      69       54252 :   if (!MO->isReg())
      70             :     return false;
      71             : 
      72       90926 :   if (TargetRegisterInfo::isVirtualRegister(MO->getReg()))
      73       67480 :     return TRI.hasVGPRs(MRI.getRegClass(MO->getReg()));
      74             : 
      75       11723 :   return TRI.hasVGPRs(TRI.getPhysRegClass(MO->getReg()));
      76             : }
      77             : 
      78       57022 : static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
      79             :                       const SIRegisterInfo &TRI,
      80             :                       const MachineRegisterInfo &MRI) {
      81             : 
      82       57022 :   const MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
      83             :   // Can't shrink instruction with three operands.
      84             :   // FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
      85             :   // a special case for it.  It can only be shrunk if the third operand
      86             :   // is vcc.  We should handle this the same way we handle vopc, by addding
      87             :   // a register allocation hint pre-regalloc and then do the shrinking
      88             :   // post-regalloc.
      89       57022 :   if (Src2) {
      90       28272 :     switch (MI.getOpcode()) {
      91             :       default: return false;
      92             : 
      93          33 :       case AMDGPU::V_ADDC_U32_e64:
      94             :       case AMDGPU::V_SUBB_U32_e64:
      95          66 :         if (TII->getNamedOperand(MI, AMDGPU::OpName::src1)->isImm())
      96             :           return false;
      97             :         // Additional verification is needed for sdst/src2.
      98          21 :         return true;
      99             : 
     100        1036 :       case AMDGPU::V_MAC_F32_e64:
     101             :       case AMDGPU::V_MAC_F16_e64:
     102        2072 :         if (!isVGPR(Src2, TRI, MRI) ||
     103        1036 :             TII->hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers))
     104             :           return false;
     105             :         break;
     106             : 
     107             :       case AMDGPU::V_CNDMASK_B32_e64:
     108             :         break;
     109             :     }
     110             :   }
     111             : 
     112       56981 :   const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
     113       94523 :   if (Src1 && (!isVGPR(Src1, TRI, MRI) ||
     114       37542 :                TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers)))
     115             :     return false;
     116             : 
     117             :   // We don't need to check src0, all input types are legal, so just make sure
     118             :   // src0 isn't using any modifiers.
     119       40319 :   if (TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers))
     120             :     return false;
     121             : 
     122             :   // Check output modifiers
     123       79222 :   return !TII->hasModifiersSet(MI, AMDGPU::OpName::omod) &&
     124       39593 :          !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp);
     125             : }
     126             : 
     127             : /// \brief This function checks \p MI for operands defined by a move immediate
     128             : /// instruction and then folds the literal constant into the instruction if it
     129             : /// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions.
     130       36052 : static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
     131             :                            MachineRegisterInfo &MRI, bool TryToCommute = true) {
     132             :   assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
     133             : 
     134       72104 :   int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
     135             : 
     136             :   // Try to fold Src0
     137       72104 :   MachineOperand &Src0 = MI.getOperand(Src0Idx);
     138       36052 :   if (Src0.isReg()) {
     139       27414 :     unsigned Reg = Src0.getReg();
     140       27414 :     if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI.hasOneUse(Reg)) {
     141       11744 :       MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
     142       23488 :       if (Def && Def->isMoveImmediate()) {
     143         706 :         MachineOperand &MovSrc = Def->getOperand(1);
     144         706 :         bool ConstantFolded = false;
     145             : 
     146        1363 :         if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) ||
     147           4 :                                isUInt<32>(MovSrc.getImm()))) {
     148             :           // It's possible to have only one component of a super-reg defined by
     149             :           // a single mov, so we need to clear any subregister flag.
     150         653 :           Src0.setSubReg(0);
     151         653 :           Src0.ChangeToImmediate(MovSrc.getImm());
     152         653 :           ConstantFolded = true;
     153          53 :         } else if (MovSrc.isFI()) {
     154          53 :           Src0.setSubReg(0);
     155          53 :           Src0.ChangeToFrameIndex(MovSrc.getIndex());
     156          53 :           ConstantFolded = true;
     157             :         }
     158             : 
     159             :         if (ConstantFolded) {
     160             :           assert(MRI.use_empty(Reg));
     161         706 :           Def->eraseFromParent();
     162         706 :           ++NumLiteralConstantsFolded;
     163         706 :           return true;
     164             :         }
     165             :       }
     166             :     }
     167             :   }
     168             : 
     169             :   // We have failed to fold src0, so commute the instruction and try again.
     170       63295 :   if (TryToCommute && MI.isCommutable()) {
     171       18845 :     if (TII->commuteInstruction(MI)) {
     172        7614 :       if (foldImmediates(MI, TII, MRI, false))
     173             :         return true;
     174             : 
     175             :       // Commute back.
     176        7397 :       TII->commuteInstruction(MI);
     177             :     }
     178             :   }
     179             : 
     180             :   return false;
     181             : }
     182             : 
     183             : // Copy MachineOperand with all flags except setting it as implicit.
     184        4133 : static void copyFlagsToImplicitVCC(MachineInstr &MI,
     185             :                                    const MachineOperand &Orig) {
     186             : 
     187        4143 :   for (MachineOperand &Use : MI.implicit_operands()) {
     188        4143 :     if (Use.isUse() && Use.getReg() == AMDGPU::VCC) {
     189        8266 :       Use.setIsUndef(Orig.isUndef());
     190        4133 :       Use.setIsKill(Orig.isKill());
     191             :       return;
     192             :     }
     193             :   }
     194             : }
     195             : 
     196       24420 : static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
     197       62803 :   return isInt<16>(Src.getImm()) &&
     198       41889 :     !TII->isInlineConstant(*Src.getParent(),
     199       24420 :                            Src.getParent()->getOperandNo(&Src));
     200             : }
     201             : 
     202          32 : static bool isKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
     203          52 :   return isUInt<16>(Src.getImm()) &&
     204          60 :     !TII->isInlineConstant(*Src.getParent(),
     205          32 :                            Src.getParent()->getOperandNo(&Src));
     206             : }
     207             : 
     208         302 : static bool isKImmOrKUImmOperand(const SIInstrInfo *TII,
     209             :                                  const MachineOperand &Src,
     210             :                                  bool &IsUnsigned) {
     211         604 :   if (isInt<16>(Src.getImm())) {
     212         286 :     IsUnsigned = false;
     213         286 :     return !TII->isInlineConstant(Src);
     214             :   }
     215             : 
     216          16 :   if (isUInt<16>(Src.getImm())) {
     217           8 :     IsUnsigned = true;
     218           8 :     return !TII->isInlineConstant(Src);
     219             :   }
     220             : 
     221             :   return false;
     222             : }
     223             : 
     224             : /// \returns true if the constant in \p Src should be replaced with a bitreverse
     225             : /// of an inline immediate.
     226       32561 : static bool isReverseInlineImm(const SIInstrInfo *TII,
     227             :                                const MachineOperand &Src,
     228             :                                int32_t &ReverseImm) {
     229       65122 :   if (!isInt<32>(Src.getImm()) || TII->isInlineConstant(Src))
     230             :     return false;
     231             : 
     232       24734 :   ReverseImm = reverseBits<int32_t>(static_cast<int32_t>(Src.getImm()));
     233       12367 :   return ReverseImm >= -16 && ReverseImm <= 64;
     234             : }
     235             : 
     236             : /// Copy implicit register operands from specified instruction to this
     237             : /// instruction that are not part of the instruction definition.
     238       28438 : static void copyExtraImplicitOps(MachineInstr &NewMI, MachineFunction &MF,
     239             :                                  const MachineInstr &MI) {
     240       56876 :   for (unsigned i = MI.getDesc().getNumOperands() +
     241       56876 :          MI.getDesc().getNumImplicitUses() +
     242       56876 :          MI.getDesc().getNumImplicitDefs(), e = MI.getNumOperands();
     243       28438 :        i != e; ++i) {
     244           0 :     const MachineOperand &MO = MI.getOperand(i);
     245           0 :     if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask())
     246           0 :       NewMI.addOperand(MF, MO);
     247             :   }
     248       28438 : }
     249             : 
     250         471 : static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) {
     251             :   // cmpk instructions do scc = dst <cc op> imm16, so commute the instruction to
     252             :   // get constants on the RHS.
     253         942 :   if (!MI.getOperand(0).isReg())
     254          20 :     TII->commuteInstruction(MI, false, 0, 1);
     255             : 
     256         942 :   const MachineOperand &Src1 = MI.getOperand(1);
     257         471 :   if (!Src1.isImm())
     258             :     return;
     259             : 
     260         856 :   int SOPKOpc = AMDGPU::getSOPKOp(MI.getOpcode());
     261         428 :   if (SOPKOpc == -1)
     262             :     return;
     263             : 
     264             :   // eq/ne is special because the imm16 can be treated as signed or unsigned,
     265             :   // and initially selectd to the unsigned versions.
     266         414 :   if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) {
     267             :     bool HasUImm;
     268         302 :     if (isKImmOrKUImmOperand(TII, Src1, HasUImm)) {
     269          26 :       if (!HasUImm) {
     270          18 :         SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ?
     271             :           AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32;
     272             :       }
     273             : 
     274          52 :       MI.setDesc(TII->get(SOPKOpc));
     275             :     }
     276             : 
     277             :     return;
     278             :   }
     279             : 
     280         224 :   const MCInstrDesc &NewDesc = TII->get(SOPKOpc);
     281             : 
     282         320 :   if ((TII->sopkIsZext(SOPKOpc) && isKUImmOperand(TII, Src1)) ||
     283         272 :       (!TII->sopkIsZext(SOPKOpc) && isKImmOperand(TII, Src1))) {
     284          36 :     MI.setDesc(NewDesc);
     285             :   }
     286             : }
     287             : 
     288       29513 : bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
     289       29513 :   if (skipFunction(*MF.getFunction()))
     290             :     return false;
     291             : 
     292       29510 :   MachineRegisterInfo &MRI = MF.getRegInfo();
     293       29510 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     294       29510 :   const SIInstrInfo *TII = ST.getInstrInfo();
     295       29510 :   const SIRegisterInfo &TRI = TII->getRegisterInfo();
     296             : 
     297       29510 :   std::vector<unsigned> I1Defs;
     298             : 
     299       59020 :   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
     300       62811 :                                                   BI != BE; ++BI) {
     301             : 
     302       33301 :     MachineBasicBlock &MBB = *BI;
     303       66602 :     MachineBasicBlock::iterator I, Next;
     304     1380433 :     for (I = MBB.begin(); I != MBB.end(); I = Next) {
     305      640265 :       Next = std::next(I);
     306      640265 :       MachineInstr &MI = *I;
     307             : 
     308      640265 :       if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) {
     309             :         // If this has a literal constant source that is the same as the
     310             :         // reversed bits of an inline immediate, replace with a bitreverse of
     311             :         // that constant. This saves 4 bytes in the common case of materializing
     312             :         // sign bits.
     313             : 
     314             :         // Test if we are after regalloc. We only want to do this after any
     315             :         // optimizations happen because this will confuse them.
     316             :         // XXX - not exactly a check for post-regalloc run.
     317       98990 :         MachineOperand &Src = MI.getOperand(1);
     318       66124 :         if (Src.isImm() &&
     319       33258 :             TargetRegisterInfo::isPhysicalRegister(MI.getOperand(0).getReg())) {
     320             :           int32_t ReverseImm;
     321        9677 :           if (isReverseInlineImm(TII, Src, ReverseImm)) {
     322         348 :             MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32));
     323         232 :             Src.setImm(ReverseImm);
     324         116 :             continue;
     325             :           }
     326             :         }
     327             :       }
     328             : 
     329             :       // Combine adjacent s_nops to use the immediate operand encoding how long
     330             :       // to wait.
     331             :       //
     332             :       // s_nop N
     333             :       // s_nop M
     334             :       //  =>
     335             :       // s_nop (N + M)
     336        1675 :       if (MI.getOpcode() == AMDGPU::S_NOP &&
     337      645174 :           Next != MBB.end() &&
     338        1675 :           (*Next).getOpcode() == AMDGPU::S_NOP) {
     339             : 
     340          75 :         MachineInstr &NextMI = *Next;
     341             :         // The instruction encodes the amount to wait with an offset of 1,
     342             :         // i.e. 0 is wait 1 cycle. Convert both to cycles and then convert back
     343             :         // after adding.
     344          75 :         uint8_t Nop0 = MI.getOperand(0).getImm() + 1;
     345          75 :         uint8_t Nop1 = NextMI.getOperand(0).getImm() + 1;
     346             : 
     347             :         // Make sure we don't overflow the bounds.
     348          75 :         if (Nop0 + Nop1 <= 8) {
     349         150 :           NextMI.getOperand(0).setImm(Nop0 + Nop1 - 1);
     350          75 :           MI.eraseFromParent();
     351             :         }
     352             : 
     353          75 :         continue;
     354             :       }
     355             : 
     356             :       // FIXME: We also need to consider movs of constant operands since
     357             :       // immediate operands are not folded if they have more than one use, and
     358             :       // the operand folding pass is unaware if the immediate will be free since
     359             :       // it won't know if the src == dest constraint will end up being
     360             :       // satisfied.
     361     1278066 :       if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||
     362             :           MI.getOpcode() == AMDGPU::S_MUL_I32) {
     363        2220 :         const MachineOperand *Dest = &MI.getOperand(0);
     364        4440 :         MachineOperand *Src0 = &MI.getOperand(1);
     365        4440 :         MachineOperand *Src1 = &MI.getOperand(2);
     366             : 
     367        4444 :         if (!Src0->isReg() && Src1->isReg()) {
     368           4 :           if (TII->commuteInstruction(MI, false, 1, 2))
     369             :             std::swap(Src0, Src1);
     370             :         }
     371             : 
     372             :         // FIXME: This could work better if hints worked with subregisters. If
     373             :         // we have a vector add of a constant, we usually don't get the correct
     374             :         // allocation due to the subregister usage.
     375        6598 :         if (TargetRegisterInfo::isVirtualRegister(Dest->getReg()) &&
     376        2162 :             Src0->isReg()) {
     377        2154 :           MRI.setRegAllocationHint(Dest->getReg(), 0, Src0->getReg());
     378        2154 :           MRI.setRegAllocationHint(Src0->getReg(), 0, Dest->getReg());
     379        1077 :           continue;
     380             :         }
     381             : 
     382        3425 :         if (Src0->isReg() && Src0->getReg() == Dest->getReg()) {
     383         894 :           if (Src1->isImm() && isKImmOperand(TII, *Src1)) {
     384          71 :             unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ?
     385          71 :               AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32;
     386             : 
     387         213 :             MI.setDesc(TII->get(Opc));
     388          71 :             MI.tieOperands(0, 1);
     389             :           }
     390             :         }
     391             :       }
     392             : 
     393             :       // Try to use s_cmpk_*
     394      650094 :       if (MI.isCompare() && TII->isSOPC(MI)) {
     395         471 :         shrinkScalarCompare(TII, MI);
     396         471 :         continue;
     397             :       }
     398             : 
     399             :       // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
     400      638526 :       if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
     401       56182 :         const MachineOperand &Dst = MI.getOperand(0);
     402      112364 :         MachineOperand &Src = MI.getOperand(1);
     403             : 
     404      100811 :         if (Src.isImm() &&
     405       89258 :             TargetRegisterInfo::isPhysicalRegister(Dst.getReg())) {
     406             :           int32_t ReverseImm;
     407       24130 :           if (isKImmOperand(TII, Src))
     408        2260 :             MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
     409       23000 :           else if (isReverseInlineImm(TII, Src, ReverseImm)) {
     410         258 :             MI.setDesc(TII->get(AMDGPU::S_BREV_B32));
     411          86 :             Src.setImm(ReverseImm);
     412             :           }
     413             :         }
     414             : 
     415       56182 :         continue;
     416             :       }
     417             : 
     418      582344 :       if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
     419      535512 :         continue;
     420             : 
     421       46832 :       if (!canShrink(MI, TII, TRI, MRI)) {
     422             :         // Try commuting the instruction and see if that enables us to shrink
     423             :         // it.
     424       24782 :         if (!MI.isCommutable() || !TII->commuteInstruction(MI) ||
     425       10190 :             !canShrink(MI, TII, TRI, MRI))
     426        7580 :           continue;
     427             :       }
     428             : 
     429             :       // getVOPe32 could be -1 here if we started with an instruction that had
     430             :       // a 32-bit encoding and then commuted it to an instruction that did not.
     431       39252 :       if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
     432           0 :         continue;
     433             : 
     434       39252 :       int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
     435             : 
     436       78504 :       if (TII->isVOPC(Op32)) {
     437        8064 :         unsigned DstReg = MI.getOperand(0).getReg();
     438       12074 :         if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
     439             :           // VOPC instructions can only write to the VCC register. We can't
     440             :           // force them to use VCC here, because this is only one register and
     441             :           // cannot deal with sequences which would require multiple copies of
     442             :           // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...)
     443             :           //
     444             :           // So, instead of forcing the instruction to write to VCC, we provide
     445             :           // a hint to the register allocator to use VCC and then we we will run
     446             :           // this pass again after RA and shrink it if it outputs to VCC.
     447        8020 :           MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC);
     448        4010 :           continue;
     449             :         }
     450        4054 :         if (DstReg != AMDGPU::VCC)
     451         720 :           continue;
     452             :       }
     453             : 
     454       34522 :       if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
     455             :         // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC
     456             :         // instructions.
     457       10160 :         const MachineOperand *Src2 =
     458             :             TII->getNamedOperand(MI, AMDGPU::OpName::src2);
     459       10160 :         if (!Src2->isReg())
     460           0 :           continue;
     461       10160 :         unsigned SReg = Src2->getReg();
     462       15221 :         if (TargetRegisterInfo::isVirtualRegister(SReg)) {
     463        5061 :           MRI.setRegAllocationHint(SReg, 0, AMDGPU::VCC);
     464        5061 :           continue;
     465             :         }
     466        5099 :         if (SReg != AMDGPU::VCC)
     467         976 :           continue;
     468             :       }
     469             : 
     470             :       // Check for the bool flag output for instructions like V_ADD_I32_e64.
     471       28485 :       const MachineOperand *SDst = TII->getNamedOperand(MI,
     472             :                                                         AMDGPU::OpName::sdst);
     473             : 
     474             :       // Check the carry-in operand for v_addc_u32_e64.
     475       28485 :       const MachineOperand *Src2 = TII->getNamedOperand(MI,
     476             :                                                         AMDGPU::OpName::src2);
     477             : 
     478       28485 :       if (SDst) {
     479        3456 :         if (SDst->getReg() != AMDGPU::VCC) {
     480          90 :           if (TargetRegisterInfo::isVirtualRegister(SDst->getReg()))
     481          24 :             MRI.setRegAllocationHint(SDst->getReg(), 0, AMDGPU::VCC);
     482          45 :           continue;
     483             :         }
     484             : 
     485             :         // All of the instructions with carry outs also have an SGPR input in
     486             :         // src2.
     487        3368 :         if (Src2 && Src2->getReg() != AMDGPU::VCC) {
     488           4 :           if (TargetRegisterInfo::isVirtualRegister(Src2->getReg()))
     489           1 :             MRI.setRegAllocationHint(Src2->getReg(), 0, AMDGPU::VCC);
     490             : 
     491           2 :           continue;
     492             :         }
     493             :       }
     494             : 
     495             :       // We can shrink this instruction
     496             :       DEBUG(dbgs() << "Shrinking " << MI);
     497             : 
     498             :       MachineInstrBuilder Inst32 =
     499       85314 :           BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32));
     500             : 
     501             :       // Add the dst operand if the 32-bit encoding also has an explicit $vdst.
     502             :       // For VOPC instructions, this is replaced by an implicit def of vcc.
     503       28438 :       int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst);
     504       28438 :       if (Op32DstIdx != -1) {
     505             :         // dst
     506       25104 :         Inst32.add(MI.getOperand(0));
     507             :       } else {
     508             :         assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
     509             :                "Unexpected case");
     510             :       }
     511             : 
     512             : 
     513       56876 :       Inst32.add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
     514             : 
     515       28438 :       const MachineOperand *Src1 =
     516             :           TII->getNamedOperand(MI, AMDGPU::OpName::src1);
     517       28438 :       if (Src1)
     518             :         Inst32.add(*Src1);
     519             : 
     520       28438 :       if (Src2) {
     521        4953 :         int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
     522        4953 :         if (Op32Src2Idx != -1) {
     523             :           Inst32.add(*Src2);
     524             :         } else {
     525             :           // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
     526             :           // replaced with an implicit read of vcc. This was already added
     527             :           // during the initial BuildMI, so find it to preserve the flags.
     528        4133 :           copyFlagsToImplicitVCC(*Inst32, *Src2);
     529             :         }
     530             :       }
     531             : 
     532       28438 :       ++NumInstructionsShrunk;
     533             : 
     534             :       // Copy extra operands not present in the instruction definition.
     535       28438 :       copyExtraImplicitOps(*Inst32, MF, MI);
     536             : 
     537       28438 :       MI.eraseFromParent();
     538       28438 :       foldImmediates(*Inst32, TII, MRI);
     539             : 
     540             :       DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
     541             : 
     542             : 
     543             :     }
     544             :   }
     545             :   return false;
     546             : }

Generated by: LCOV version 1.13