LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIShrinkInstructions.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 187 192 97.4 %
Date: 2018-07-13 00:08:38 Functions: 17 18 94.4 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : /// The pass tries to use the 32-bit encoding for instructions when possible.
       9             : //===----------------------------------------------------------------------===//
      10             : //
      11             : 
      12             : #include "AMDGPU.h"
      13             : #include "AMDGPUSubtarget.h"
      14             : #include "SIInstrInfo.h"
      15             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      16             : #include "llvm/ADT/Statistic.h"
      17             : #include "llvm/CodeGen/MachineFunctionPass.h"
      18             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      19             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      20             : #include "llvm/IR/Constants.h"
      21             : #include "llvm/IR/Function.h"
      22             : #include "llvm/IR/LLVMContext.h"
      23             : #include "llvm/Support/Debug.h"
      24             : #include "llvm/Support/raw_ostream.h"
      25             : #include "llvm/Target/TargetMachine.h"
      26             : 
      27             : #define DEBUG_TYPE "si-shrink-instructions"
      28             : 
      29             : STATISTIC(NumInstructionsShrunk,
      30             :           "Number of 64-bit instruction reduced to 32-bit.");
      31             : STATISTIC(NumLiteralConstantsFolded,
      32             :           "Number of literal constants folded into 32-bit instructions.");
      33             : 
      34             : using namespace llvm;
      35             : 
      36             : namespace {
      37             : 
      38        3535 : class SIShrinkInstructions : public MachineFunctionPass {
      39             : public:
      40             :   static char ID;
      41             : 
      42             : public:
      43        3551 :   SIShrinkInstructions() : MachineFunctionPass(ID) {
      44             :   }
      45             : 
      46             :   bool runOnMachineFunction(MachineFunction &MF) override;
      47             : 
      48        3530 :   StringRef getPassName() const override { return "SI Shrink Instructions"; }
      49             : 
      50        3530 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
      51        3530 :     AU.setPreservesCFG();
      52        3530 :     MachineFunctionPass::getAnalysisUsage(AU);
      53        3530 :   }
      54             : };
      55             : 
      56             : } // End anonymous namespace.
      57             : 
      58      342570 : INITIALIZE_PASS(SIShrinkInstructions, DEBUG_TYPE,
      59             :                 "SI Shrink Instructions", false, false)
      60             : 
      61             : char SIShrinkInstructions::ID = 0;
      62             : 
      63        3546 : FunctionPass *llvm::createSIShrinkInstructionsPass() {
      64        7092 :   return new SIShrinkInstructions();
      65             : }
      66             : 
      67       76358 : static bool isVGPR(const MachineOperand *MO, const SIRegisterInfo &TRI,
      68             :                    const MachineRegisterInfo &MRI) {
      69       76358 :   if (!MO->isReg())
      70             :     return false;
      71             : 
      72      125950 :   if (TargetRegisterInfo::isVirtualRegister(MO->getReg()))
      73       44949 :     return TRI.hasVGPRs(MRI.getRegClass(MO->getReg()));
      74             : 
      75       18026 :   return TRI.hasVGPRs(TRI.getPhysRegClass(MO->getReg()));
      76             : }
      77             : 
      78       80169 : static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
      79             :                       const SIRegisterInfo &TRI,
      80             :                       const MachineRegisterInfo &MRI) {
      81             : 
      82       80169 :   const MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
      83             :   // Can't shrink instruction with three operands.
      84             :   // FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
      85             :   // a special case for it.  It can only be shrunk if the third operand
      86             :   // is vcc.  We should handle this the same way we handle vopc, by addding
      87             :   // a register allocation hint pre-regalloc and then do the shrinking
      88             :   // post-regalloc.
      89       80169 :   if (Src2) {
      90       45964 :     switch (MI.getOpcode()) {
      91             :       default: return false;
      92             : 
      93       13068 :       case AMDGPU::V_ADDC_U32_e64:
      94             :       case AMDGPU::V_SUBB_U32_e64:
      95             :       case AMDGPU::V_SUBBREV_U32_e64:
      96       13068 :         if (!isVGPR(TII->getNamedOperand(MI, AMDGPU::OpName::src1), TRI, MRI))
      97             :           return false;
      98             :         // Additional verification is needed for sdst/src2.
      99        9185 :         return true;
     100             : 
     101        1519 :       case AMDGPU::V_MAC_F32_e64:
     102             :       case AMDGPU::V_MAC_F16_e64:
     103             :       case AMDGPU::V_FMAC_F32_e64:
     104        3038 :         if (!isVGPR(Src2, TRI, MRI) ||
     105        1519 :             TII->hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers))
     106             :           return false;
     107             :         break;
     108             : 
     109             :       case AMDGPU::V_CNDMASK_B32_e64:
     110             :         break;
     111             :     }
     112             :   }
     113             : 
     114       67101 :   const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
     115      110722 :   if (Src1 && (!isVGPR(Src1, TRI, MRI) ||
     116       43621 :                TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers)))
     117             :     return false;
     118             : 
     119             :   // We don't need to check src0, all input types are legal, so just make sure
     120             :   // src0 isn't using any modifiers.
     121       47930 :   if (TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers))
     122             :     return false;
     123             : 
     124             :   // Check output modifiers
     125       94050 :   return !TII->hasModifiersSet(MI, AMDGPU::OpName::omod) &&
     126       47007 :          !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp);
     127             : }
     128             : 
     129             : /// This function checks \p MI for operands defined by a move immediate
     130             : /// instruction and then folds the literal constant into the instruction if it
     131             : /// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions.
     132       45904 : static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
     133             :                            MachineRegisterInfo &MRI, bool TryToCommute = true) {
     134             :   assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
     135             : 
     136       91808 :   int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
     137             : 
     138             :   // Try to fold Src0
     139       45904 :   MachineOperand &Src0 = MI.getOperand(Src0Idx);
     140       45904 :   if (Src0.isReg()) {
     141       29934 :     unsigned Reg = Src0.getReg();
     142       29934 :     if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI.hasOneUse(Reg)) {
     143       11833 :       MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
     144       23666 :       if (Def && Def->isMoveImmediate()) {
     145        1028 :         MachineOperand &MovSrc = Def->getOperand(1);
     146             :         bool ConstantFolded = false;
     147             : 
     148        2005 :         if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) ||
     149           4 :                                isUInt<32>(MovSrc.getImm()))) {
     150             :           // It's possible to have only one component of a super-reg defined by
     151             :           // a single mov, so we need to clear any subregister flag.
     152             :           Src0.setSubReg(0);
     153         973 :           Src0.ChangeToImmediate(MovSrc.getImm());
     154             :           ConstantFolded = true;
     155          55 :         } else if (MovSrc.isFI()) {
     156             :           Src0.setSubReg(0);
     157          55 :           Src0.ChangeToFrameIndex(MovSrc.getIndex());
     158             :           ConstantFolded = true;
     159             :         }
     160             : 
     161             :         if (ConstantFolded) {
     162             :           assert(MRI.use_empty(Reg));
     163        1028 :           Def->eraseFromParent();
     164             :           ++NumLiteralConstantsFolded;
     165        1028 :           return true;
     166             :         }
     167             :       }
     168             :     }
     169             :   }
     170             : 
     171             :   // We have failed to fold src0, so commute the instruction and try again.
     172       82759 :   if (TryToCommute && MI.isCommutable()) {
     173       28552 :     if (TII->commuteInstruction(MI)) {
     174        7277 :       if (foldImmediates(MI, TII, MRI, false))
     175             :         return true;
     176             : 
     177             :       // Commute back.
     178        6993 :       TII->commuteInstruction(MI);
     179             :     }
     180             :   }
     181             : 
     182             :   return false;
     183             : }
     184             : 
     185             : // Copy MachineOperand with all flags except setting it as implicit.
     186        6401 : static void copyFlagsToImplicitVCC(MachineInstr &MI,
     187             :                                    const MachineOperand &Orig) {
     188             : 
     189       15043 :   for (MachineOperand &Use : MI.implicit_operands()) {
     190       10722 :     if (Use.isUse() && Use.getReg() == AMDGPU::VCC) {
     191             :       Use.setIsUndef(Orig.isUndef());
     192             :       Use.setIsKill(Orig.isKill());
     193             :       return;
     194             :     }
     195             :   }
     196             : }
     197             : 
     198       25621 : static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
     199       66095 :   return isInt<16>(Src.getImm()) &&
     200       14853 :     !TII->isInlineConstant(*Src.getParent(),
     201       25621 :                            Src.getParent()->getOperandNo(&Src));
     202             : }
     203             : 
     204          36 : static bool isKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
     205          56 :   return isUInt<16>(Src.getImm()) &&
     206          20 :     !TII->isInlineConstant(*Src.getParent(),
     207          36 :                            Src.getParent()->getOperandNo(&Src));
     208             : }
     209             : 
     210         325 : static bool isKImmOrKUImmOperand(const SIInstrInfo *TII,
     211             :                                  const MachineOperand &Src,
     212             :                                  bool &IsUnsigned) {
     213         650 :   if (isInt<16>(Src.getImm())) {
     214         309 :     IsUnsigned = false;
     215         309 :     return !TII->isInlineConstant(Src);
     216             :   }
     217             : 
     218          16 :   if (isUInt<16>(Src.getImm())) {
     219           8 :     IsUnsigned = true;
     220           8 :     return !TII->isInlineConstant(Src);
     221             :   }
     222             : 
     223             :   return false;
     224             : }
     225             : 
     226             : /// \returns true if the constant in \p Src should be replaced with a bitreverse
     227             : /// of an inline immediate.
     228       33687 : static bool isReverseInlineImm(const SIInstrInfo *TII,
     229             :                                const MachineOperand &Src,
     230             :                                int32_t &ReverseImm) {
     231       67374 :   if (!isInt<32>(Src.getImm()) || TII->isInlineConstant(Src))
     232             :     return false;
     233             : 
     234       25764 :   ReverseImm = reverseBits<int32_t>(static_cast<int32_t>(Src.getImm()));
     235       12882 :   return ReverseImm >= -16 && ReverseImm <= 64;
     236             : }
     237             : 
     238             : /// Copy implicit register operands from specified instruction to this
     239             : /// instruction that are not part of the instruction definition.
     240       38627 : static void copyExtraImplicitOps(MachineInstr &NewMI, MachineFunction &MF,
     241             :                                  const MachineInstr &MI) {
     242       77254 :   for (unsigned i = MI.getDesc().getNumOperands() +
     243       38627 :          MI.getDesc().getNumImplicitUses() +
     244       77254 :          MI.getDesc().getNumImplicitDefs(), e = MI.getNumOperands();
     245       38627 :        i != e; ++i) {
     246           0 :     const MachineOperand &MO = MI.getOperand(i);
     247           0 :     if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask())
     248           0 :       NewMI.addOperand(MF, MO);
     249             :   }
     250       38627 : }
     251             : 
     252         501 : static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) {
     253             :   // cmpk instructions do scc = dst <cc op> imm16, so commute the instruction to
     254             :   // get constants on the RHS.
     255        1002 :   if (!MI.getOperand(0).isReg())
     256          20 :     TII->commuteInstruction(MI, false, 0, 1);
     257             : 
     258         501 :   const MachineOperand &Src1 = MI.getOperand(1);
     259         501 :   if (!Src1.isImm())
     260             :     return;
     261             : 
     262         910 :   int SOPKOpc = AMDGPU::getSOPKOp(MI.getOpcode());
     263         455 :   if (SOPKOpc == -1)
     264             :     return;
     265             : 
     266             :   // eq/ne is special because the imm16 can be treated as signed or unsigned,
     267             :   // and initially selectd to the unsigned versions.
     268         439 :   if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) {
     269             :     bool HasUImm;
     270         325 :     if (isKImmOrKUImmOperand(TII, Src1, HasUImm)) {
     271          34 :       if (!HasUImm) {
     272          26 :         SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ?
     273             :           AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32;
     274             :       }
     275             : 
     276          34 :       MI.setDesc(TII->get(SOPKOpc));
     277             :     }
     278             : 
     279             :     return;
     280             :   }
     281             : 
     282         114 :   const MCInstrDesc &NewDesc = TII->get(SOPKOpc);
     283             : 
     284         326 :   if ((TII->sopkIsZext(SOPKOpc) && isKUImmOperand(TII, Src1)) ||
     285          78 :       (!TII->sopkIsZext(SOPKOpc) && isKImmOperand(TII, Src1))) {
     286             :     MI.setDesc(NewDesc);
     287             :   }
     288             : }
     289             : 
     290       35592 : bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
     291       35592 :   if (skipFunction(MF.getFunction()))
     292             :     return false;
     293             : 
     294       35587 :   MachineRegisterInfo &MRI = MF.getRegInfo();
     295       35587 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     296             :   const SIInstrInfo *TII = ST.getInstrInfo();
     297             :   const SIRegisterInfo &TRI = TII->getRegisterInfo();
     298             : 
     299             :   std::vector<unsigned> I1Defs;
     300             : 
     301             :   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
     302       75435 :                                                   BI != BE; ++BI) {
     303             : 
     304             :     MachineBasicBlock &MBB = *BI;
     305             :     MachineBasicBlock::iterator I, Next;
     306      759087 :     for (I = MBB.begin(); I != MBB.end(); I = Next) {
     307             :       Next = std::next(I);
     308             :       MachineInstr &MI = *I;
     309             : 
     310     1438478 :       if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) {
     311             :         // If this has a literal constant source that is the same as the
     312             :         // reversed bits of an inline immediate, replace with a bitreverse of
     313             :         // that constant. This saves 4 bytes in the common case of materializing
     314             :         // sign bits.
     315             : 
     316             :         // Test if we are after regalloc. We only want to do this after any
     317             :         // optimizations happen because this will confuse them.
     318             :         // XXX - not exactly a check for post-regalloc run.
     319       55626 :         MachineOperand &Src = MI.getOperand(1);
     320       72625 :         if (Src.isImm() &&
     321       16999 :             TargetRegisterInfo::isPhysicalRegister(MI.getOperand(0).getReg())) {
     322             :           int32_t ReverseImm;
     323        9870 :           if (isReverseInlineImm(TII, Src, ReverseImm)) {
     324          85 :             MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32));
     325          85 :             Src.setImm(ReverseImm);
     326          85 :             continue;
     327             :           }
     328             :         }
     329             :       }
     330             : 
     331             :       // Combine adjacent s_nops to use the immediate operand encoding how long
     332             :       // to wait.
     333             :       //
     334             :       // s_nop N
     335             :       // s_nop M
     336             :       //  =>
     337             :       // s_nop (N + M)
     338      719793 :       if (MI.getOpcode() == AMDGPU::S_NOP &&
     339      719793 :           Next != MBB.end() &&
     340         639 :           (*Next).getOpcode() == AMDGPU::S_NOP) {
     341             : 
     342             :         MachineInstr &NextMI = *Next;
     343             :         // The instruction encodes the amount to wait with an offset of 1,
     344             :         // i.e. 0 is wait 1 cycle. Convert both to cycles and then convert back
     345             :         // after adding.
     346         196 :         uint8_t Nop0 = MI.getOperand(0).getImm() + 1;
     347         196 :         uint8_t Nop1 = NextMI.getOperand(0).getImm() + 1;
     348             : 
     349             :         // Make sure we don't overflow the bounds.
     350         196 :         if (Nop0 + Nop1 <= 8) {
     351         196 :           NextMI.getOperand(0).setImm(Nop0 + Nop1 - 1);
     352         196 :           MI.eraseFromParent();
     353             :         }
     354             : 
     355         196 :         continue;
     356             :       }
     357             : 
     358             :       // FIXME: We also need to consider movs of constant operands since
     359             :       // immediate operands are not folded if they have more than one use, and
     360             :       // the operand folding pass is unaware if the immediate will be free since
     361             :       // it won't know if the src == dest constraint will end up being
     362             :       // satisfied.
     363     1434970 :       if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||
     364             :           MI.getOpcode() == AMDGPU::S_MUL_I32) {
     365        3106 :         const MachineOperand *Dest = &MI.getOperand(0);
     366             :         MachineOperand *Src0 = &MI.getOperand(1);
     367             :         MachineOperand *Src1 = &MI.getOperand(2);
     368             : 
     369        3110 :         if (!Src0->isReg() && Src1->isReg()) {
     370           4 :           if (TII->commuteInstruction(MI, false, 1, 2))
     371             :             std::swap(Src0, Src1);
     372             :         }
     373             : 
     374             :         // FIXME: This could work better if hints worked with subregisters. If
     375             :         // we have a vector add of a constant, we usually don't get the correct
     376             :         // allocation due to the subregister usage.
     377        9256 :         if (TargetRegisterInfo::isVirtualRegister(Dest->getReg()) &&
     378             :             Src0->isReg()) {
     379        1520 :           MRI.setRegAllocationHint(Dest->getReg(), 0, Src0->getReg());
     380        1520 :           MRI.setRegAllocationHint(Src0->getReg(), 0, Dest->getReg());
     381             :           continue;
     382             :         }
     383             : 
     384        3168 :         if (Src0->isReg() && Src0->getReg() == Dest->getReg()) {
     385         623 :           if (Src1->isImm() && isKImmOperand(TII, *Src1)) {
     386         174 :             unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ?
     387             :               AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32;
     388             : 
     389          87 :             MI.setDesc(TII->get(Opc));
     390          87 :             MI.tieOperands(0, 1);
     391             :           }
     392             :         }
     393             :       }
     394             : 
     395             :       // Try to use s_cmpk_*
     396      726609 :       if (MI.isCompare() && TII->isSOPC(MI)) {
     397         501 :         shrinkScalarCompare(TII, MI);
     398         501 :         continue;
     399             :       }
     400             : 
     401             :       // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
     402     1433874 :       if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
     403       66698 :         const MachineOperand &Dst = MI.getOperand(0);
     404             :         MachineOperand &Src = MI.getOperand(1);
     405             : 
     406      113259 :         if (Src.isImm() &&
     407       46561 :             TargetRegisterInfo::isPhysicalRegister(Dst.getReg())) {
     408             :           int32_t ReverseImm;
     409       25186 :           if (isKImmOperand(TII, Src))
     410        1284 :             MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
     411       23902 :           else if (isReverseInlineImm(TII, Src, ReverseImm)) {
     412          89 :             MI.setDesc(TII->get(AMDGPU::S_BREV_B32));
     413          89 :             Src.setImm(ReverseImm);
     414             :           }
     415             :         }
     416             : 
     417       66698 :         continue;
     418             :       }
     419             : 
     420      650239 :       if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
     421      586273 :         continue;
     422             : 
     423       63966 :       if (!canShrink(MI, TII, TRI, MRI)) {
     424             :         // Try commuting the instruction and see if that enables us to shrink
     425             :         // it.
     426       37060 :         if (!MI.isCommutable() || !TII->commuteInstruction(MI) ||
     427       16203 :             !canShrink(MI, TII, TRI, MRI))
     428        8268 :           continue;
     429             :       }
     430             : 
     431             :       // getVOPe32 could be -1 here if we started with an instruction that had
     432             :       // a 32-bit encoding and then commuted it to an instruction that did not.
     433      111396 :       if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
     434           0 :         continue;
     435             : 
     436      111396 :       int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
     437             : 
     438      111396 :       if (TII->isVOPC(Op32)) {
     439        5856 :         unsigned DstReg = MI.getOperand(0).getReg();
     440        8760 :         if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
     441             :           // VOPC instructions can only write to the VCC register. We can't
     442             :           // force them to use VCC here, because this is only one register and
     443             :           // cannot deal with sequences which would require multiple copies of
     444             :           // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...)
     445             :           //
     446             :           // So, instead of forcing the instruction to write to VCC, we provide
     447             :           // a hint to the register allocator to use VCC and then we will run
     448             :           // this pass again after RA and shrink it if it outputs to VCC.
     449             :           MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC);
     450        2904 :           continue;
     451             :         }
     452        2952 :         if (DstReg != AMDGPU::VCC)
     453         955 :           continue;
     454             :       }
     455             : 
     456       51839 :       if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
     457             :         // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC
     458             :         // instructions.
     459        5778 :         const MachineOperand *Src2 =
     460             :             TII->getNamedOperand(MI, AMDGPU::OpName::src2);
     461        5778 :         if (!Src2->isReg())
     462           0 :           continue;
     463        5778 :         unsigned SReg = Src2->getReg();
     464        8645 :         if (TargetRegisterInfo::isVirtualRegister(SReg)) {
     465             :           MRI.setRegAllocationHint(SReg, 0, AMDGPU::VCC);
     466        2867 :           continue;
     467             :         }
     468        2911 :         if (SReg != AMDGPU::VCC)
     469         831 :           continue;
     470             :       }
     471             : 
     472             :       // Check for the bool flag output for instructions like V_ADD_I32_e64.
     473       48141 :       const MachineOperand *SDst = TII->getNamedOperand(MI,
     474             :                                                         AMDGPU::OpName::sdst);
     475             : 
     476             :       // Check the carry-in operand for v_addc_u32_e64.
     477       48141 :       const MachineOperand *Src2 = TII->getNamedOperand(MI,
     478             :                                                         AMDGPU::OpName::src2);
     479             : 
     480       48141 :       if (SDst) {
     481       29592 :         if (SDst->getReg() != AMDGPU::VCC) {
     482        9404 :           if (TargetRegisterInfo::isVirtualRegister(SDst->getReg()))
     483             :             MRI.setRegAllocationHint(SDst->getReg(), 0, AMDGPU::VCC);
     484        9404 :           continue;
     485             :         }
     486             : 
     487             :         // All of the instructions with carry outs also have an SGPR input in
     488             :         // src2.
     489       10894 :         if (Src2 && Src2->getReg() != AMDGPU::VCC) {
     490         110 :           if (TargetRegisterInfo::isVirtualRegister(Src2->getReg()))
     491             :             MRI.setRegAllocationHint(Src2->getReg(), 0, AMDGPU::VCC);
     492             : 
     493         110 :           continue;
     494             :         }
     495             :       }
     496             : 
     497             :       // We can shrink this instruction
     498             :       LLVM_DEBUG(dbgs() << "Shrinking " << MI);
     499             : 
     500             :       MachineInstrBuilder Inst32 =
     501       77254 :           BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32));
     502             : 
     503             :       // Add the dst operand if the 32-bit encoding also has an explicit $vdst.
     504             :       // For VOPC instructions, this is replaced by an implicit def of vcc.
     505       38627 :       int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst);
     506       38627 :       if (Op32DstIdx != -1) {
     507             :         // dst
     508       36630 :         Inst32.add(MI.getOperand(0));
     509             :       } else {
     510             :         assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
     511             :                "Unexpected case");
     512             :       }
     513             : 
     514             : 
     515       38627 :       Inst32.add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
     516             : 
     517       38627 :       const MachineOperand *Src1 =
     518             :           TII->getNamedOperand(MI, AMDGPU::OpName::src1);
     519       38627 :       if (Src1)
     520             :         Inst32.add(*Src1);
     521             : 
     522       38627 :       if (Src2) {
     523        7614 :         int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
     524        7614 :         if (Op32Src2Idx != -1) {
     525             :           Inst32.add(*Src2);
     526             :         } else {
     527             :           // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
     528             :           // replaced with an implicit read of vcc. This was already added
     529             :           // during the initial BuildMI, so find it to preserve the flags.
     530        6401 :           copyFlagsToImplicitVCC(*Inst32, *Src2);
     531             :         }
     532             :       }
     533             : 
     534             :       ++NumInstructionsShrunk;
     535             : 
     536             :       // Copy extra operands not present in the instruction definition.
     537       38627 :       copyExtraImplicitOps(*Inst32, MF, MI);
     538             : 
     539       38627 :       MI.eraseFromParent();
     540       38627 :       foldImmediates(*Inst32, TII, MRI);
     541             : 
     542             :       LLVM_DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
     543             :     }
     544             :   }
     545             :   return false;
     546             : }

Generated by: LCOV version 1.13