LCOV - code coverage report
Current view: top level - lib/Target/ARM - ARMBaseInstrInfo.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 1409 1894 74.4 %
Date: 2018-10-20 13:21:21 Functions: 90 102 88.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file contains the Base ARM implementation of the TargetInstrInfo class.
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #include "ARMBaseInstrInfo.h"
      15             : #include "ARMBaseRegisterInfo.h"
      16             : #include "ARMConstantPoolValue.h"
      17             : #include "ARMFeatures.h"
      18             : #include "ARMHazardRecognizer.h"
      19             : #include "ARMMachineFunctionInfo.h"
      20             : #include "ARMSubtarget.h"
      21             : #include "MCTargetDesc/ARMAddressingModes.h"
      22             : #include "MCTargetDesc/ARMBaseInfo.h"
      23             : #include "llvm/ADT/DenseMap.h"
      24             : #include "llvm/ADT/STLExtras.h"
      25             : #include "llvm/ADT/SmallSet.h"
      26             : #include "llvm/ADT/SmallVector.h"
      27             : #include "llvm/ADT/Triple.h"
      28             : #include "llvm/CodeGen/LiveVariables.h"
      29             : #include "llvm/CodeGen/MachineBasicBlock.h"
      30             : #include "llvm/CodeGen/MachineConstantPool.h"
      31             : #include "llvm/CodeGen/MachineFrameInfo.h"
      32             : #include "llvm/CodeGen/MachineFunction.h"
      33             : #include "llvm/CodeGen/MachineInstr.h"
      34             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      35             : #include "llvm/CodeGen/MachineMemOperand.h"
      36             : #include "llvm/CodeGen/MachineOperand.h"
      37             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      38             : #include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
      39             : #include "llvm/CodeGen/SelectionDAGNodes.h"
      40             : #include "llvm/CodeGen/TargetInstrInfo.h"
      41             : #include "llvm/CodeGen/TargetRegisterInfo.h"
      42             : #include "llvm/CodeGen/TargetSchedule.h"
      43             : #include "llvm/IR/Attributes.h"
      44             : #include "llvm/IR/Constants.h"
      45             : #include "llvm/IR/DebugLoc.h"
      46             : #include "llvm/IR/Function.h"
      47             : #include "llvm/IR/GlobalValue.h"
      48             : #include "llvm/MC/MCAsmInfo.h"
      49             : #include "llvm/MC/MCInstrDesc.h"
      50             : #include "llvm/MC/MCInstrItineraries.h"
      51             : #include "llvm/Support/BranchProbability.h"
      52             : #include "llvm/Support/Casting.h"
      53             : #include "llvm/Support/CommandLine.h"
      54             : #include "llvm/Support/Compiler.h"
      55             : #include "llvm/Support/Debug.h"
      56             : #include "llvm/Support/ErrorHandling.h"
      57             : #include "llvm/Support/raw_ostream.h"
      58             : #include "llvm/Target/TargetMachine.h"
      59             : #include <algorithm>
      60             : #include <cassert>
      61             : #include <cstdint>
      62             : #include <iterator>
      63             : #include <new>
      64             : #include <utility>
      65             : #include <vector>
      66             : 
      67             : using namespace llvm;
      68             : 
      69             : #define DEBUG_TYPE "arm-instrinfo"
      70             : 
      71             : #define GET_INSTRINFO_CTOR_DTOR
      72             : #include "ARMGenInstrInfo.inc"
      73             : 
      74             : static cl::opt<bool>
      75             : EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
      76             :                cl::desc("Enable ARM 2-addr to 3-addr conv"));
      77             : 
      78             : /// ARM_MLxEntry - Record information about MLA / MLS instructions.
      79             : struct ARM_MLxEntry {
      80             :   uint16_t MLxOpc;     // MLA / MLS opcode
      81             :   uint16_t MulOpc;     // Expanded multiplication opcode
      82             :   uint16_t AddSubOpc;  // Expanded add / sub opcode
      83             :   bool NegAcc;         // True if the acc is negated before the add / sub.
      84             :   bool HasLane;        // True if instruction has an extra "lane" operand.
      85             : };
      86             : 
      87             : static const ARM_MLxEntry ARM_MLxTable[] = {
      88             :   // MLxOpc,          MulOpc,           AddSubOpc,       NegAcc, HasLane
      89             :   // fp scalar ops
      90             :   { ARM::VMLAS,       ARM::VMULS,       ARM::VADDS,      false,  false },
      91             :   { ARM::VMLSS,       ARM::VMULS,       ARM::VSUBS,      false,  false },
      92             :   { ARM::VMLAD,       ARM::VMULD,       ARM::VADDD,      false,  false },
      93             :   { ARM::VMLSD,       ARM::VMULD,       ARM::VSUBD,      false,  false },
      94             :   { ARM::VNMLAS,      ARM::VNMULS,      ARM::VSUBS,      true,   false },
      95             :   { ARM::VNMLSS,      ARM::VMULS,       ARM::VSUBS,      true,   false },
      96             :   { ARM::VNMLAD,      ARM::VNMULD,      ARM::VSUBD,      true,   false },
      97             :   { ARM::VNMLSD,      ARM::VMULD,       ARM::VSUBD,      true,   false },
      98             : 
      99             :   // fp SIMD ops
     100             :   { ARM::VMLAfd,      ARM::VMULfd,      ARM::VADDfd,     false,  false },
     101             :   { ARM::VMLSfd,      ARM::VMULfd,      ARM::VSUBfd,     false,  false },
     102             :   { ARM::VMLAfq,      ARM::VMULfq,      ARM::VADDfq,     false,  false },
     103             :   { ARM::VMLSfq,      ARM::VMULfq,      ARM::VSUBfq,     false,  false },
     104             :   { ARM::VMLAslfd,    ARM::VMULslfd,    ARM::VADDfd,     false,  true  },
     105             :   { ARM::VMLSslfd,    ARM::VMULslfd,    ARM::VSUBfd,     false,  true  },
     106             :   { ARM::VMLAslfq,    ARM::VMULslfq,    ARM::VADDfq,     false,  true  },
     107             :   { ARM::VMLSslfq,    ARM::VMULslfq,    ARM::VSUBfq,     false,  true  },
     108             : };
     109             : 
     110        5050 : ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
     111             :   : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
     112        5050 :     Subtarget(STI) {
     113       85850 :   for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
     114       80800 :     if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
     115           0 :       llvm_unreachable("Duplicated entries?");
     116       80800 :     MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
     117       80800 :     MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
     118             :   }
     119        5050 : }
     120             : 
     121             : // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
     122             : // currently defaults to no prepass hazard recognizer.
     123             : ScheduleHazardRecognizer *
     124       11322 : ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
     125             :                                                const ScheduleDAG *DAG) const {
     126       11322 :   if (usePreRAHazardRecognizer()) {
     127             :     const InstrItineraryData *II =
     128       11322 :         static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
     129       11322 :     return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
     130             :   }
     131           0 :   return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);
     132             : }
     133             : 
     134       11263 : ScheduleHazardRecognizer *ARMBaseInstrInfo::
     135             : CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
     136             :                                    const ScheduleDAG *DAG) const {
     137       11263 :   if (Subtarget.isThumb2() || Subtarget.hasVFP2())
     138       19190 :     return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG);
     139        1668 :   return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
     140             : }
     141             : 
     142           0 : MachineInstr *ARMBaseInstrInfo::convertToThreeAddress(
     143             :     MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const {
     144             :   // FIXME: Thumb2 support.
     145             : 
     146           0 :   if (!EnableARM3Addr)
     147             :     return nullptr;
     148             : 
     149           0 :   MachineFunction &MF = *MI.getParent()->getParent();
     150           0 :   uint64_t TSFlags = MI.getDesc().TSFlags;
     151             :   bool isPre = false;
     152           0 :   switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
     153             :   default: return nullptr;
     154           0 :   case ARMII::IndexModePre:
     155             :     isPre = true;
     156           0 :     break;
     157             :   case ARMII::IndexModePost:
     158             :     break;
     159             :   }
     160             : 
     161             :   // Try splitting an indexed load/store to an un-indexed one plus an add/sub
     162             :   // operation.
     163           0 :   unsigned MemOpc = getUnindexedOpcode(MI.getOpcode());
     164           0 :   if (MemOpc == 0)
     165             :     return nullptr;
     166             : 
     167           0 :   MachineInstr *UpdateMI = nullptr;
     168           0 :   MachineInstr *MemMI = nullptr;
     169           0 :   unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
     170           0 :   const MCInstrDesc &MCID = MI.getDesc();
     171           0 :   unsigned NumOps = MCID.getNumOperands();
     172           0 :   bool isLoad = !MI.mayStore();
     173           0 :   const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0);
     174           0 :   const MachineOperand &Base = MI.getOperand(2);
     175           0 :   const MachineOperand &Offset = MI.getOperand(NumOps - 3);
     176           0 :   unsigned WBReg = WB.getReg();
     177           0 :   unsigned BaseReg = Base.getReg();
     178           0 :   unsigned OffReg = Offset.getReg();
     179           0 :   unsigned OffImm = MI.getOperand(NumOps - 2).getImm();
     180           0 :   ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm();
     181           0 :   switch (AddrMode) {
     182           0 :   default: llvm_unreachable("Unknown indexed op!");
     183             :   case ARMII::AddrMode2: {
     184             :     bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
     185             :     unsigned Amt = ARM_AM::getAM2Offset(OffImm);
     186           0 :     if (OffReg == 0) {
     187           0 :       if (ARM_AM::getSOImmVal(Amt) == -1)
     188             :         // Can't encode it in a so_imm operand. This transformation will
     189             :         // add more than 1 instruction. Abandon!
     190             :         return nullptr;
     191           0 :       UpdateMI = BuildMI(MF, MI.getDebugLoc(),
     192           0 :                          get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
     193           0 :                      .addReg(BaseReg)
     194           0 :                      .addImm(Amt)
     195           0 :                      .add(predOps(Pred))
     196           0 :                      .add(condCodeOp());
     197           0 :     } else if (Amt != 0) {
     198             :       ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
     199             :       unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
     200           0 :       UpdateMI = BuildMI(MF, MI.getDebugLoc(),
     201           0 :                          get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
     202           0 :                      .addReg(BaseReg)
     203           0 :                      .addReg(OffReg)
     204           0 :                      .addReg(0)
     205           0 :                      .addImm(SOOpc)
     206           0 :                      .add(predOps(Pred))
     207           0 :                      .add(condCodeOp());
     208             :     } else
     209           0 :       UpdateMI = BuildMI(MF, MI.getDebugLoc(),
     210           0 :                          get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
     211           0 :                      .addReg(BaseReg)
     212           0 :                      .addReg(OffReg)
     213           0 :                      .add(predOps(Pred))
     214           0 :                      .add(condCodeOp());
     215             :     break;
     216             :   }
     217             :   case ARMII::AddrMode3 : {
     218             :     bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
     219             :     unsigned Amt = ARM_AM::getAM3Offset(OffImm);
     220           0 :     if (OffReg == 0)
     221             :       // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
     222           0 :       UpdateMI = BuildMI(MF, MI.getDebugLoc(),
     223           0 :                          get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
     224           0 :                      .addReg(BaseReg)
     225             :                      .addImm(Amt)
     226           0 :                      .add(predOps(Pred))
     227           0 :                      .add(condCodeOp());
     228             :     else
     229           0 :       UpdateMI = BuildMI(MF, MI.getDebugLoc(),
     230           0 :                          get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
     231           0 :                      .addReg(BaseReg)
     232           0 :                      .addReg(OffReg)
     233           0 :                      .add(predOps(Pred))
     234           0 :                      .add(condCodeOp());
     235             :     break;
     236             :   }
     237             :   }
     238             : 
     239             :   std::vector<MachineInstr*> NewMIs;
     240           0 :   if (isPre) {
     241           0 :     if (isLoad)
     242           0 :       MemMI =
     243           0 :           BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
     244           0 :               .addReg(WBReg)
     245             :               .addImm(0)
     246           0 :               .addImm(Pred);
     247             :     else
     248           0 :       MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
     249           0 :                   .addReg(MI.getOperand(1).getReg())
     250           0 :                   .addReg(WBReg)
     251           0 :                   .addReg(0)
     252             :                   .addImm(0)
     253           0 :                   .addImm(Pred);
     254           0 :     NewMIs.push_back(MemMI);
     255           0 :     NewMIs.push_back(UpdateMI);
     256             :   } else {
     257           0 :     if (isLoad)
     258           0 :       MemMI =
     259           0 :           BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
     260           0 :               .addReg(BaseReg)
     261             :               .addImm(0)
     262           0 :               .addImm(Pred);
     263             :     else
     264           0 :       MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
     265           0 :                   .addReg(MI.getOperand(1).getReg())
     266           0 :                   .addReg(BaseReg)
     267           0 :                   .addReg(0)
     268             :                   .addImm(0)
     269           0 :                   .addImm(Pred);
     270           0 :     if (WB.isDead())
     271           0 :       UpdateMI->getOperand(0).setIsDead();
     272           0 :     NewMIs.push_back(UpdateMI);
     273           0 :     NewMIs.push_back(MemMI);
     274             :   }
     275             : 
     276             :   // Transfer LiveVariables states, kill / dead info.
     277           0 :   if (LV) {
     278           0 :     for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     279           0 :       MachineOperand &MO = MI.getOperand(i);
     280           0 :       if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
     281             :         unsigned Reg = MO.getReg();
     282             : 
     283           0 :         LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
     284           0 :         if (MO.isDef()) {
     285           0 :           MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
     286           0 :           if (MO.isDead())
     287           0 :             LV->addVirtualRegisterDead(Reg, *NewMI);
     288             :         }
     289           0 :         if (MO.isUse() && MO.isKill()) {
     290           0 :           for (unsigned j = 0; j < 2; ++j) {
     291             :             // Look at the two new MI's in reverse order.
     292           0 :             MachineInstr *NewMI = NewMIs[j];
     293           0 :             if (!NewMI->readsRegister(Reg))
     294           0 :               continue;
     295           0 :             LV->addVirtualRegisterKilled(Reg, *NewMI);
     296           0 :             if (VI.removeKill(MI))
     297           0 :               VI.Kills.push_back(NewMI);
     298           0 :             break;
     299             :           }
     300             :         }
     301             :       }
     302             :     }
     303             :   }
     304             : 
     305           0 :   MachineBasicBlock::iterator MBBI = MI.getIterator();
     306           0 :   MFI->insert(MBBI, NewMIs[1]);
     307           0 :   MFI->insert(MBBI, NewMIs[0]);
     308           0 :   return NewMIs[0];
     309             : }
     310             : 
     311             : // Branch analysis.
     312      298728 : bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
     313             :                                      MachineBasicBlock *&TBB,
     314             :                                      MachineBasicBlock *&FBB,
     315             :                                      SmallVectorImpl<MachineOperand> &Cond,
     316             :                                      bool AllowModify) const {
     317      298728 :   TBB = nullptr;
     318      298728 :   FBB = nullptr;
     319             : 
     320      298728 :   MachineBasicBlock::iterator I = MBB.end();
     321      298728 :   if (I == MBB.begin())
     322             :     return false; // Empty blocks are easy.
     323             :   --I;
     324             : 
     325             :   // Walk backwards from the end of the basic block until the branch is
     326             :   // analyzed or we give up.
     327     1131533 :   while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
     328             :     // Flag to be raised on unanalyzeable instructions. This is useful in cases
     329             :     // where we want to clean up on the end of the basic block before we bail
     330             :     // out.
     331             :     bool CantAnalyze = false;
     332             : 
     333             :     // Skip over DEBUG values and predicated nonterminators.
     334      267493 :     while (I->isDebugInstr() || !I->isTerminator()) {
     335       13853 :       if (I == MBB.begin())
     336             :         return false;
     337             :       --I;
     338             :     }
     339             : 
     340      507794 :     if (isIndirectBranchOpcode(I->getOpcode()) ||
     341             :         isJumpTableBranchOpcode(I->getOpcode())) {
     342             :       // Indirect branches and jump tables can't be analyzed, but we still want
     343             :       // to clean up any instructions at the tail of the basic block.
     344             :       CantAnalyze = true;
     345             :     } else if (isUncondBranchOpcode(I->getOpcode())) {
     346       32079 :       TBB = I->getOperand(0).getMBB();
     347             :     } else if (isCondBranchOpcode(I->getOpcode())) {
     348             :       // Bail out if we encounter multiple conditional branches.
     349       77707 :       if (!Cond.empty())
     350             :         return true;
     351             : 
     352             :       assert(!FBB && "FBB should have been null.");
     353       77661 :       FBB = TBB;
     354       77661 :       TBB = I->getOperand(0).getMBB();
     355      155322 :       Cond.push_back(I->getOperand(1));
     356      155322 :       Cond.push_back(I->getOperand(2));
     357      139813 :     } else if (I->isReturn()) {
     358             :       // Returns can't be analyzed, but we should run cleanup.
     359      268052 :       CantAnalyze = !isPredicated(*I);
     360             :     } else {
     361             :       // We encountered other unrecognized terminator. Bail out immediately.
     362             :       return true;
     363             :     }
     364             : 
     365             :     // Cleanup code - to be run for unpredicated unconditional branches and
     366             :     //                returns.
     367      496128 :     if (!isPredicated(*I) &&
     368      337804 :           (isUncondBranchOpcode(I->getOpcode()) ||
     369             :            isIndirectBranchOpcode(I->getOpcode()) ||
     370      132566 :            isJumpTableBranchOpcode(I->getOpcode()) ||
     371             :            I->isReturn())) {
     372             :       // Forget any previous condition branch information - it no longer applies.
     373             :       Cond.clear();
     374      168902 :       FBB = nullptr;
     375             : 
     376             :       // If we can modify the function, delete everything below this
     377             :       // unconditional branch.
     378      168902 :       if (AllowModify) {
     379       66888 :         MachineBasicBlock::iterator DI = std::next(I);
     380       66894 :         while (DI != MBB.end()) {
     381             :           MachineInstr &InstToDelete = *DI;
     382             :           ++DI;
     383           6 :           InstToDelete.eraseFromParent();
     384             :         }
     385             :       }
     386             :     }
     387             : 
     388      248064 :     if (CantAnalyze)
     389             :       return true;
     390             : 
     391      111200 :     if (I == MBB.begin())
     392             :       return false;
     393             : 
     394             :     --I;
     395             :   }
     396             : 
     397             :   // We made it past the terminators without bailing out - we must have
     398             :   // analyzed this branch successfully.
     399             :   return false;
     400             : }
     401             : 
     402        9708 : unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB,
     403             :                                         int *BytesRemoved) const {
     404             :   assert(!BytesRemoved && "code size not handled");
     405             : 
     406        9708 :   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
     407        9708 :   if (I == MBB.end())
     408             :     return 0;
     409             : 
     410       19412 :   if (!isUncondBranchOpcode(I->getOpcode()) &&
     411             :       !isCondBranchOpcode(I->getOpcode()))
     412             :     return 0;
     413             : 
     414             :   // Remove the branch.
     415        9302 :   I->eraseFromParent();
     416             : 
     417        9302 :   I = MBB.end();
     418             : 
     419        9302 :   if (I == MBB.begin()) return 1;
     420             :   --I;
     421       17768 :   if (!isCondBranchOpcode(I->getOpcode()))
     422             :     return 1;
     423             : 
     424             :   // Remove the branch.
     425        2289 :   I->eraseFromParent();
     426        2289 :   return 2;
     427             : }
     428             : 
     429        8885 : unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB,
     430             :                                         MachineBasicBlock *TBB,
     431             :                                         MachineBasicBlock *FBB,
     432             :                                         ArrayRef<MachineOperand> Cond,
     433             :                                         const DebugLoc &DL,
     434             :                                         int *BytesAdded) const {
     435             :   assert(!BytesAdded && "code size not handled");
     436        8885 :   ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
     437        8885 :   int BOpc   = !AFI->isThumbFunction()
     438        8885 :     ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
     439             :   int BccOpc = !AFI->isThumbFunction()
     440        8885 :     ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
     441        8885 :   bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
     442             : 
     443             :   // Shouldn't be a fall through.
     444             :   assert(TBB && "insertBranch must not be told to insert a fallthrough");
     445             :   assert((Cond.size() == 2 || Cond.size() == 0) &&
     446             :          "ARM branch conditions have two components!");
     447             : 
     448             :   // For conditional branches, we use addOperand to preserve CPSR flags.
     449             : 
     450        8885 :   if (!FBB) {
     451        8709 :     if (Cond.empty()) { // Unconditional branch?
     452        2073 :       if (isThumb)
     453        2784 :         BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL));
     454             :       else
     455         681 :         BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
     456             :     } else
     457        6636 :       BuildMI(&MBB, DL, get(BccOpc))
     458             :           .addMBB(TBB)
     459        6636 :           .addImm(Cond[0].getImm())
     460             :           .add(Cond[1]);
     461        8709 :     return 1;
     462             :   }
     463             : 
     464             :   // Two-way conditional branch.
     465         176 :   BuildMI(&MBB, DL, get(BccOpc))
     466             :       .addMBB(TBB)
     467         176 :       .addImm(Cond[0].getImm())
     468             :       .add(Cond[1]);
     469         176 :   if (isThumb)
     470         212 :     BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
     471             :   else
     472          70 :     BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
     473             :   return 2;
     474             : }
     475             : 
     476        9160 : bool ARMBaseInstrInfo::
     477             : reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
     478        9160 :   ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
     479        9160 :   Cond[0].setImm(ARMCC::getOppositeCondition(CC));
     480        9160 :   return false;
     481             : }
     482             : 
     483      978436 : bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const {
     484      978436 :   if (MI.isBundle()) {
     485         899 :     MachineBasicBlock::const_instr_iterator I = MI.getIterator();
     486         899 :     MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
     487        1836 :     while (++I != E && I->isInsideBundle()) {
     488        1798 :       int PIdx = I->findFirstPredOperandIdx();
     489        1798 :       if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
     490             :         return true;
     491             :     }
     492             :     return false;
     493             :   }
     494             : 
     495      977537 :   int PIdx = MI.findFirstPredOperandIdx();
     496      977537 :   return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
     497             : }
     498             : 
     499         797 : bool ARMBaseInstrInfo::PredicateInstruction(
     500             :     MachineInstr &MI, ArrayRef<MachineOperand> Pred) const {
     501         797 :   unsigned Opc = MI.getOpcode();
     502         797 :   if (isUncondBranchOpcode(Opc)) {
     503           0 :     MI.setDesc(get(getMatchingCondBranchOpcode(Opc)));
     504           0 :     MachineInstrBuilder(*MI.getParent()->getParent(), MI)
     505           0 :       .addImm(Pred[0].getImm())
     506           0 :       .addReg(Pred[1].getReg());
     507           0 :     return true;
     508             :   }
     509             : 
     510         797 :   int PIdx = MI.findFirstPredOperandIdx();
     511         797 :   if (PIdx != -1) {
     512         797 :     MachineOperand &PMO = MI.getOperand(PIdx);
     513         797 :     PMO.setImm(Pred[0].getImm());
     514        1594 :     MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
     515         797 :     return true;
     516             :   }
     517             :   return false;
     518             : }
     519             : 
     520         110 : bool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
     521             :                                          ArrayRef<MachineOperand> Pred2) const {
     522         110 :   if (Pred1.size() > 2 || Pred2.size() > 2)
     523             :     return false;
     524             : 
     525         110 :   ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
     526         110 :   ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
     527         110 :   if (CC1 == CC2)
     528             :     return true;
     529             : 
     530          48 :   switch (CC1) {
     531             :   default:
     532             :     return false;
     533           0 :   case ARMCC::AL:
     534           0 :     return true;
     535           5 :   case ARMCC::HS:
     536           5 :     return CC2 == ARMCC::HI;
     537           6 :   case ARMCC::LS:
     538           6 :     return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
     539           3 :   case ARMCC::GE:
     540           3 :     return CC2 == ARMCC::GT;
     541           2 :   case ARMCC::LE:
     542           2 :     return CC2 == ARMCC::LT;
     543             :   }
     544             : }
     545             : 
     546       41989 : bool ARMBaseInstrInfo::DefinesPredicate(
     547             :     MachineInstr &MI, std::vector<MachineOperand> &Pred) const {
     548             :   bool Found = false;
     549      237151 :   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     550      195162 :     const MachineOperand &MO = MI.getOperand(i);
     551      195162 :     if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) ||
     552      133251 :         (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) {
     553        3273 :       Pred.push_back(MO);
     554             :       Found = true;
     555             :     }
     556             :   }
     557             : 
     558       41989 :   return Found;
     559             : }
     560             : 
     561          60 : bool ARMBaseInstrInfo::isCPSRDefined(const MachineInstr &MI) {
     562         381 :   for (const auto &MO : MI.operands())
     563         328 :     if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
     564             :       return true;
     565             :   return false;
     566             : }
     567             : 
     568           0 : bool ARMBaseInstrInfo::isAddrMode3OpImm(const MachineInstr &MI,
     569             :                                         unsigned Op) const {
     570           0 :   const MachineOperand &Offset = MI.getOperand(Op + 1);
     571           0 :   return Offset.getReg() != 0;
     572             : }
     573             : 
     574             : // Load with negative register offset requires additional 1cyc and +I unit
     575             : // for Cortex A57
     576           0 : bool ARMBaseInstrInfo::isAddrMode3OpMinusReg(const MachineInstr &MI,
     577             :                                              unsigned Op) const {
     578           0 :   const MachineOperand &Offset = MI.getOperand(Op + 1);
     579           0 :   const MachineOperand &Opc = MI.getOperand(Op + 2);
     580             :   assert(Opc.isImm());
     581             :   assert(Offset.isReg());
     582           0 :   int64_t OpcImm = Opc.getImm();
     583             : 
     584           0 :   bool isSub = ARM_AM::getAM3Op(OpcImm) == ARM_AM::sub;
     585           0 :   return (isSub && Offset.getReg() != 0);
     586             : }
     587             : 
     588           0 : bool ARMBaseInstrInfo::isLdstScaledReg(const MachineInstr &MI,
     589             :                                        unsigned Op) const {
     590           0 :   const MachineOperand &Opc = MI.getOperand(Op + 2);
     591           0 :   unsigned OffImm = Opc.getImm();
     592           0 :   return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
     593             : }
     594             : 
     595             : // Load, scaled register offset, not plus LSL2
     596           3 : bool ARMBaseInstrInfo::isLdstScaledRegNotPlusLsl2(const MachineInstr &MI,
     597             :                                                   unsigned Op) const {
     598           3 :   const MachineOperand &Opc = MI.getOperand(Op + 2);
     599           3 :   unsigned OffImm = Opc.getImm();
     600             : 
     601           3 :   bool isAdd = ARM_AM::getAM2Op(OffImm) == ARM_AM::add;
     602             :   unsigned Amt = ARM_AM::getAM2Offset(OffImm);
     603             :   ARM_AM::ShiftOpc ShiftOpc = ARM_AM::getAM2ShiftOpc(OffImm);
     604           3 :   if (ShiftOpc == ARM_AM::no_shift) return false; // not scaled
     605           3 :   bool SimpleScaled = (isAdd && ShiftOpc == ARM_AM::lsl && Amt == 2);
     606           3 :   return !SimpleScaled;
     607             : }
     608             : 
     609             : // Minus reg for ldstso addr mode
     610           3 : bool ARMBaseInstrInfo::isLdstSoMinusReg(const MachineInstr &MI,
     611             :                                         unsigned Op) const {
     612           6 :   unsigned OffImm = MI.getOperand(Op + 2).getImm();
     613           3 :   return ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
     614             : }
     615             : 
     616             : // Load, scaled register offset
     617           0 : bool ARMBaseInstrInfo::isAm2ScaledReg(const MachineInstr &MI,
     618             :                                       unsigned Op) const {
     619           0 :   unsigned OffImm = MI.getOperand(Op + 2).getImm();
     620           0 :   return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
     621             : }
     622             : 
     623       37013 : static bool isEligibleForITBlock(const MachineInstr *MI) {
     624       74026 :   switch (MI->getOpcode()) {
     625             :   default: return true;
     626          60 :   case ARM::tADC:   // ADC (register) T1
     627             :   case ARM::tADDi3: // ADD (immediate) T1
     628             :   case ARM::tADDi8: // ADD (immediate) T2
     629             :   case ARM::tADDrr: // ADD (register) T1
     630             :   case ARM::tAND:   // AND (register) T1
     631             :   case ARM::tASRri: // ASR (immediate) T1
     632             :   case ARM::tASRrr: // ASR (register) T1
     633             :   case ARM::tBIC:   // BIC (register) T1
     634             :   case ARM::tEOR:   // EOR (register) T1
     635             :   case ARM::tLSLri: // LSL (immediate) T1
     636             :   case ARM::tLSLrr: // LSL (register) T1
     637             :   case ARM::tLSRri: // LSR (immediate) T1
     638             :   case ARM::tLSRrr: // LSR (register) T1
     639             :   case ARM::tMUL:   // MUL T1
     640             :   case ARM::tMVN:   // MVN (register) T1
     641             :   case ARM::tORR:   // ORR (register) T1
     642             :   case ARM::tROR:   // ROR (register) T1
     643             :   case ARM::tRSB:   // RSB (immediate) T1
     644             :   case ARM::tSBC:   // SBC (register) T1
     645             :   case ARM::tSUBi3: // SUB (immediate) T1
     646             :   case ARM::tSUBi8: // SUB (immediate) T2
     647             :   case ARM::tSUBrr: // SUB (register) T1
     648          60 :     return !ARMBaseInstrInfo::isCPSRDefined(*MI);
     649             :   }
     650             : }
     651             : 
     652             : /// isPredicable - Return true if the specified instruction can be predicated.
     653             : /// By default, this returns true for every instruction with a
     654             : /// PredicateOperand.
     655       41972 : bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const {
     656       41972 :   if (!MI.isPredicable())
     657             :     return false;
     658             : 
     659       37049 :   if (MI.isBundle())
     660             :     return false;
     661             : 
     662       37013 :   if (!isEligibleForITBlock(&MI))
     663             :     return false;
     664             : 
     665             :   const ARMFunctionInfo *AFI =
     666       37006 :       MI.getParent()->getParent()->getInfo<ARMFunctionInfo>();
     667             : 
     668             :   // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
     669             :   // In their ARM encoding, they can't be encoded in a conditional form.
     670       37006 :   if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
     671             :     return false;
     672             : 
     673       34698 :   if (AFI->isThumb2Function()) {
     674       14564 :     if (getSubtarget().restrictIT())
     675         827 :       return isV8EligibleForIT(&MI);
     676             :   }
     677             : 
     678             :   return true;
     679             : }
     680             : 
     681             : namespace llvm {
     682             : 
     683          94 : template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
     684         613 :   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     685         519 :     const MachineOperand &MO = MI->getOperand(i);
     686         519 :     if (!MO.isReg() || MO.isUndef() || MO.isUse())
     687             :       continue;
     688         188 :     if (MO.getReg() != ARM::CPSR)
     689             :       continue;
     690          94 :     if (!MO.isDead())
     691             :       return false;
     692             :   }
     693             :   // all definitions of CPSR are dead
     694             :   return true;
     695             : }
     696             : 
     697             : } // end namespace llvm
     698             : 
     699             : /// GetInstSize - Return the size of the specified MachineInstr.
     700             : ///
     701      890858 : unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
     702      890858 :   const MachineBasicBlock &MBB = *MI.getParent();
     703      890858 :   const MachineFunction *MF = MBB.getParent();
     704      890858 :   const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
     705             : 
     706      890858 :   const MCInstrDesc &MCID = MI.getDesc();
     707     1781716 :   if (MCID.getSize())
     708             :     return MCID.getSize();
     709             : 
     710             :   // If this machine instr is an inline asm, measure it.
     711      307218 :   if (MI.getOpcode() == ARM::INLINEASM) {
     712      172372 :     unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
     713      172372 :     if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction())
     714         530 :       Size = alignTo(Size, 4);
     715      172372 :     return Size;
     716             :   }
     717             :   unsigned Opc = MI.getOpcode();
     718      134846 :   switch (Opc) {
     719             :   default:
     720             :     // pseudo-instruction sizes are zero.
     721             :     return 0;
     722           0 :   case TargetOpcode::BUNDLE:
     723           0 :     return getInstBundleLength(MI);
     724        2038 :   case ARM::MOVi16_ga_pcrel:
     725             :   case ARM::MOVTi16_ga_pcrel:
     726             :   case ARM::t2MOVi16_ga_pcrel:
     727             :   case ARM::t2MOVTi16_ga_pcrel:
     728        2038 :     return 4;
     729          13 :   case ARM::MOVi32imm:
     730             :   case ARM::t2MOVi32imm:
     731          13 :     return 8;
     732       23100 :   case ARM::CONSTPOOL_ENTRY:
     733             :   case ARM::JUMPTABLE_INSTS:
     734             :   case ARM::JUMPTABLE_ADDRS:
     735             :   case ARM::JUMPTABLE_TBB:
     736             :   case ARM::JUMPTABLE_TBH:
     737             :     // If this machine instr is a constant pool entry, its size is recorded as
     738             :     // operand #2.
     739       23100 :     return MI.getOperand(2).getImm();
     740           4 :   case ARM::Int_eh_sjlj_longjmp:
     741           4 :     return 16;
     742           2 :   case ARM::tInt_eh_sjlj_longjmp:
     743           2 :     return 10;
     744           3 :   case ARM::tInt_WIN_eh_sjlj_longjmp:
     745           3 :     return 12;
     746           7 :   case ARM::Int_eh_sjlj_setjmp:
     747             :   case ARM::Int_eh_sjlj_setjmp_nofp:
     748           7 :     return 20;
     749          13 :   case ARM::tInt_eh_sjlj_setjmp:
     750             :   case ARM::t2Int_eh_sjlj_setjmp:
     751             :   case ARM::t2Int_eh_sjlj_setjmp_nofp:
     752          13 :     return 12;
     753          41 :   case ARM::SPACE:
     754          41 :     return MI.getOperand(1).getImm();
     755             :   }
     756             : }
     757             : 
     758           0 : unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
     759             :   unsigned Size = 0;
     760           0 :   MachineBasicBlock::const_instr_iterator I = MI.getIterator();
     761           0 :   MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
     762           0 :   while (++I != E && I->isInsideBundle()) {
     763             :     assert(!I->isBundle() && "No nested bundle!");
     764           0 :     Size += getInstSizeInBytes(*I);
     765             :   }
     766           0 :   return Size;
     767             : }
     768             : 
     769           4 : void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB,
     770             :                                     MachineBasicBlock::iterator I,
     771             :                                     unsigned DestReg, bool KillSrc,
     772             :                                     const ARMSubtarget &Subtarget) const {
     773           4 :   unsigned Opc = Subtarget.isThumb()
     774           4 :                      ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
     775             :                      : ARM::MRS;
     776             : 
     777             :   MachineInstrBuilder MIB =
     778           8 :       BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
     779             : 
     780             :   // There is only 1 A/R class MRS instruction, and it always refers to
     781             :   // APSR. However, there are lots of other possibilities on M-class cores.
     782           4 :   if (Subtarget.isMClass())
     783             :     MIB.addImm(0x800);
     784             : 
     785           4 :   MIB.add(predOps(ARMCC::AL))
     786           4 :      .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
     787           4 : }
     788             : 
     789           4 : void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,
     790             :                                   MachineBasicBlock::iterator I,
     791             :                                   unsigned SrcReg, bool KillSrc,
     792             :                                   const ARMSubtarget &Subtarget) const {
     793           4 :   unsigned Opc = Subtarget.isThumb()
     794           4 :                      ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
     795             :                      : ARM::MSR;
     796             : 
     797           8 :   MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
     798             : 
     799           4 :   if (Subtarget.isMClass())
     800             :     MIB.addImm(0x800);
     801             :   else
     802             :     MIB.addImm(8);
     803             : 
     804           4 :   MIB.addReg(SrcReg, getKillRegState(KillSrc))
     805           4 :      .add(predOps(ARMCC::AL))
     806           4 :      .addReg(ARM::CPSR, RegState::Implicit | RegState::Define);
     807           4 : }
     808             : 
     809        8350 : void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     810             :                                    MachineBasicBlock::iterator I,
     811             :                                    const DebugLoc &DL, unsigned DestReg,
     812             :                                    unsigned SrcReg, bool KillSrc) const {
     813        8350 :   bool GPRDest = ARM::GPRRegClass.contains(DestReg);
     814             :   bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
     815             : 
     816        8350 :   if (GPRDest && GPRSrc) {
     817        9616 :     BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
     818        4808 :         .addReg(SrcReg, getKillRegState(KillSrc))
     819        4808 :         .add(predOps(ARMCC::AL))
     820        4808 :         .add(condCodeOp());
     821        8333 :     return;
     822             :   }
     823             : 
     824        3542 :   bool SPRDest = ARM::SPRRegClass.contains(DestReg);
     825             :   bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
     826             : 
     827             :   unsigned Opc = 0;
     828        3542 :   if (SPRDest && SPRSrc)
     829             :     Opc = ARM::VMOVS;
     830        2192 :   else if (GPRDest && SPRSrc)
     831             :     Opc = ARM::VMOVRS;
     832        1875 :   else if (SPRDest && GPRSrc)
     833             :     Opc = ARM::VMOVSR;
     834        3068 :   else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP())
     835             :     Opc = ARM::VMOVD;
     836         844 :   else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
     837             :     Opc = ARM::VORRq;
     838             : 
     839             :   if (Opc) {
     840        7034 :     MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
     841        3517 :     MIB.addReg(SrcReg, getKillRegState(KillSrc));
     842        3517 :     if (Opc == ARM::VORRq)
     843         397 :       MIB.addReg(SrcReg, getKillRegState(KillSrc));
     844        3517 :     MIB.add(predOps(ARMCC::AL));
     845             :     return;
     846             :   }
     847             : 
     848             :   // Handle register classes that require multiple instructions.
     849             :   unsigned BeginIdx = 0;
     850             :   unsigned SubRegs = 0;
     851             :   int Spacing = 1;
     852             : 
     853             :   // Use VORRq when possible.
     854          50 :   if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
     855             :     Opc = ARM::VORRq;
     856             :     BeginIdx = ARM::qsub_0;
     857             :     SubRegs = 2;
     858          50 :   } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
     859             :     Opc = ARM::VORRq;
     860             :     BeginIdx = ARM::qsub_0;
     861             :     SubRegs = 4;
     862             :   // Fall back to VMOVD.
     863          40 :   } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
     864             :     Opc = ARM::VMOVD;
     865             :     BeginIdx = ARM::dsub_0;
     866             :     SubRegs = 2;
     867          32 :   } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
     868             :     Opc = ARM::VMOVD;
     869             :     BeginIdx = ARM::dsub_0;
     870             :     SubRegs = 3;
     871          32 :   } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
     872             :     Opc = ARM::VMOVD;
     873             :     BeginIdx = ARM::dsub_0;
     874             :     SubRegs = 4;
     875          32 :   } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
     876           2 :     Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
     877             :     BeginIdx = ARM::gsub_0;
     878             :     SubRegs = 2;
     879          28 :   } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
     880             :     Opc = ARM::VMOVD;
     881             :     BeginIdx = ARM::dsub_0;
     882             :     SubRegs = 2;
     883             :     Spacing = 2;
     884          28 :   } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
     885             :     Opc = ARM::VMOVD;
     886             :     BeginIdx = ARM::dsub_0;
     887             :     SubRegs = 3;
     888             :     Spacing = 2;
     889          28 :   } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
     890             :     Opc = ARM::VMOVD;
     891             :     BeginIdx = ARM::dsub_0;
     892             :     SubRegs = 4;
     893             :     Spacing = 2;
     894          14 :   } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) {
     895             :     Opc = ARM::VMOVS;
     896             :     BeginIdx = ARM::ssub_0;
     897             :     SubRegs = 2;
     898           8 :   } else if (SrcReg == ARM::CPSR) {
     899           4 :     copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
     900           4 :     return;
     901           4 :   } else if (DestReg == ARM::CPSR) {
     902           4 :     copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
     903           4 :     return;
     904             :   }
     905             : 
     906             :   assert(Opc && "Impossible reg-to-reg copy");
     907             : 
     908          17 :   const TargetRegisterInfo *TRI = &getRegisterInfo();
     909          17 :   MachineInstrBuilder Mov;
     910             : 
     911             :   // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
     912          17 :   if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
     913           0 :     BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
     914           0 :     Spacing = -Spacing;
     915             :   }
     916             : #ifndef NDEBUG
     917             :   SmallSet<unsigned, 4> DstRegs;
     918             : #endif
     919          61 :   for (unsigned i = 0; i != SubRegs; ++i) {
     920          44 :     unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
     921          44 :     unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
     922             :     assert(Dst && Src && "Bad sub-register");
     923             : #ifndef NDEBUG
     924             :     assert(!DstRegs.count(Src) && "destructive vector copy");
     925             :     DstRegs.insert(Dst);
     926             : #endif
     927          88 :     Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
     928             :     // VORR takes two source operands.
     929          44 :     if (Opc == ARM::VORRq)
     930          20 :       Mov.addReg(Src);
     931          44 :     Mov = Mov.add(predOps(ARMCC::AL));
     932             :     // MOVr can set CC.
     933          44 :     if (Opc == ARM::MOVr)
     934           2 :       Mov = Mov.add(condCodeOp());
     935             :   }
     936             :   // Add implicit super-register defs and kills to the last instruction.
     937          17 :   Mov->addRegisterDefined(DestReg, TRI);
     938          17 :   if (KillSrc)
     939           1 :     Mov->addRegisterKilled(SrcReg, TRI);
     940             : }
     941             : 
     942         162 : bool ARMBaseInstrInfo::isCopyInstrImpl(const MachineInstr &MI,
     943             :                                        const MachineOperand *&Src,
     944             :                                        const MachineOperand *&Dest) const {
     945             :   // VMOVRRD is also a copy instruction but it requires
     946             :   // special way of handling. It is more complex copy version
     947             :   // and since that we are not considering it. For recognition
     948             :   // of such instruction isExtractSubregLike MI interface fuction
     949             :   // could be used.
     950             :   // VORRq is considered as a move only if two inputs are
     951             :   // the same register.
     952         162 :   if (!MI.isMoveReg() ||
     953          22 :       (MI.getOpcode() == ARM::VORRq &&
     954           0 :        MI.getOperand(1).getReg() != MI.getOperand(2).getReg()))
     955             :     return false;
     956          22 :   Dest = &MI.getOperand(0);
     957          22 :   Src = &MI.getOperand(1);
     958          22 :   return true;
     959             : }
     960             : 
     961             : const MachineInstrBuilder &
     962          70 : ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
     963             :                           unsigned SubIdx, unsigned State,
     964             :                           const TargetRegisterInfo *TRI) const {
     965          70 :   if (!SubIdx)
     966           0 :     return MIB.addReg(Reg, State);
     967             : 
     968          70 :   if (TargetRegisterInfo::isPhysicalRegister(Reg))
     969           8 :     return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
     970          62 :   return MIB.addReg(Reg, State, SubIdx);
     971             : }
     972             : 
     973        2357 : void ARMBaseInstrInfo::
     974             : storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     975             :                     unsigned SrcReg, bool isKill, int FI,
     976             :                     const TargetRegisterClass *RC,
     977             :                     const TargetRegisterInfo *TRI) const {
     978        2357 :   MachineFunction &MF = *MBB.getParent();
     979        2357 :   MachineFrameInfo &MFI = MF.getFrameInfo();
     980             :   unsigned Align = MFI.getObjectAlignment(FI);
     981             : 
     982        2357 :   MachineMemOperand *MMO = MF.getMachineMemOperand(
     983             :       MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
     984             :       MFI.getObjectSize(FI), Align);
     985             : 
     986        2357 :   switch (TRI->getSpillSize(*RC)) {
     987           2 :     case 2:
     988           4 :       if (ARM::HPRRegClass.hasSubClassEq(RC)) {
     989           4 :         BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH))
     990           2 :             .addReg(SrcReg, getKillRegState(isKill))
     991             :             .addFrameIndex(FI)
     992             :             .addImm(0)
     993             :             .addMemOperand(MMO)
     994           2 :             .add(predOps(ARMCC::AL));
     995             :       } else
     996           0 :         llvm_unreachable("Unknown reg class!");
     997           2 :       break;
     998        1698 :     case 4:
     999        3396 :       if (ARM::GPRRegClass.hasSubClassEq(RC)) {
    1000        4626 :         BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12))
    1001        1542 :             .addReg(SrcReg, getKillRegState(isKill))
    1002             :             .addFrameIndex(FI)
    1003             :             .addImm(0)
    1004             :             .addMemOperand(MMO)
    1005        1542 :             .add(predOps(ARMCC::AL));
    1006         312 :       } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
    1007         468 :         BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS))
    1008         156 :             .addReg(SrcReg, getKillRegState(isKill))
    1009             :             .addFrameIndex(FI)
    1010             :             .addImm(0)
    1011             :             .addMemOperand(MMO)
    1012         156 :             .add(predOps(ARMCC::AL));
    1013             :       } else
    1014           0 :         llvm_unreachable("Unknown reg class!");
    1015             :       break;
    1016         258 :     case 8:
    1017         516 :       if (ARM::DPRRegClass.hasSubClassEq(RC)) {
    1018         753 :         BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD))
    1019         251 :             .addReg(SrcReg, getKillRegState(isKill))
    1020             :             .addFrameIndex(FI)
    1021             :             .addImm(0)
    1022             :             .addMemOperand(MMO)
    1023         251 :             .add(predOps(ARMCC::AL));
    1024          14 :       } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
    1025           7 :         if (Subtarget.hasV5TEOps()) {
    1026          15 :           MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD));
    1027           5 :           AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
    1028           5 :           AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
    1029           5 :           MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
    1030           5 :              .add(predOps(ARMCC::AL));
    1031             :         } else {
    1032             :           // Fallback to STM instruction, which has existed since the dawn of
    1033             :           // time.
    1034           6 :           MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA))
    1035             :                                         .addFrameIndex(FI)
    1036             :                                         .addMemOperand(MMO)
    1037           2 :                                         .add(predOps(ARMCC::AL));
    1038           2 :           AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
    1039           2 :           AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
    1040             :         }
    1041             :       } else
    1042           0 :         llvm_unreachable("Unknown reg class!");
    1043             :       break;
    1044         396 :     case 16:
    1045         792 :       if (ARM::DPairRegClass.hasSubClassEq(RC)) {
    1046             :         // Use aligned spills if the stack can be realigned.
    1047         396 :         if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
    1048        1167 :           BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
    1049             :               .addFrameIndex(FI)
    1050             :               .addImm(16)
    1051         389 :               .addReg(SrcReg, getKillRegState(isKill))
    1052             :               .addMemOperand(MMO)
    1053         389 :               .add(predOps(ARMCC::AL));
    1054             :         } else {
    1055          21 :           BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA))
    1056           7 :               .addReg(SrcReg, getKillRegState(isKill))
    1057             :               .addFrameIndex(FI)
    1058             :               .addMemOperand(MMO)
    1059           7 :               .add(predOps(ARMCC::AL));
    1060             :         }
    1061             :       } else
    1062           0 :         llvm_unreachable("Unknown reg class!");
    1063             :       break;
    1064           1 :     case 24:
    1065           2 :       if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
    1066             :         // Use aligned spills if the stack can be realigned.
    1067           1 :         if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
    1068           0 :           BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))
    1069             :               .addFrameIndex(FI)
    1070             :               .addImm(16)
    1071           0 :               .addReg(SrcReg, getKillRegState(isKill))
    1072             :               .addMemOperand(MMO)
    1073           0 :               .add(predOps(ARMCC::AL));
    1074             :         } else {
    1075           1 :           MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(),
    1076           2 :                                             get(ARM::VSTMDIA))
    1077             :                                         .addFrameIndex(FI)
    1078           1 :                                         .add(predOps(ARMCC::AL))
    1079           1 :                                         .addMemOperand(MMO);
    1080           1 :           MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
    1081           1 :           MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
    1082           1 :           AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
    1083             :         }
    1084             :       } else
    1085           0 :         llvm_unreachable("Unknown reg class!");
    1086             :       break;
    1087           0 :     case 32:
    1088           0 :       if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
    1089           0 :         if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
    1090             :           // FIXME: It's possible to only store part of the QQ register if the
    1091             :           // spilled def has a sub-register index.
    1092           0 :           BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo))
    1093             :               .addFrameIndex(FI)
    1094             :               .addImm(16)
    1095           0 :               .addReg(SrcReg, getKillRegState(isKill))
    1096             :               .addMemOperand(MMO)
    1097           0 :               .add(predOps(ARMCC::AL));
    1098             :         } else {
    1099           0 :           MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(),
    1100           0 :                                             get(ARM::VSTMDIA))
    1101             :                                         .addFrameIndex(FI)
    1102           0 :                                         .add(predOps(ARMCC::AL))
    1103           0 :                                         .addMemOperand(MMO);
    1104           0 :           MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
    1105           0 :           MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
    1106           0 :           MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
    1107           0 :                 AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
    1108             :         }
    1109             :       } else
    1110           0 :         llvm_unreachable("Unknown reg class!");
    1111             :       break;
    1112           2 :     case 64:
    1113           4 :       if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
    1114           6 :         MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA))
    1115             :                                       .addFrameIndex(FI)
    1116           2 :                                       .add(predOps(ARMCC::AL))
    1117           2 :                                       .addMemOperand(MMO);
    1118           2 :         MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
    1119           2 :         MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
    1120           2 :         MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
    1121           2 :         MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
    1122           2 :         MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
    1123           2 :         MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
    1124           2 :         MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
    1125           2 :               AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
    1126             :       } else
    1127           0 :         llvm_unreachable("Unknown reg class!");
    1128           2 :       break;
    1129           0 :     default:
    1130           0 :       llvm_unreachable("Unknown reg class!");
    1131             :   }
    1132        2357 : }
    1133             : 
    1134       13351 : unsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
    1135             :                                               int &FrameIndex) const {
    1136       26702 :   switch (MI.getOpcode()) {
    1137             :   default: break;
    1138          23 :   case ARM::STRrs:
    1139             :   case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
    1140          23 :     if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
    1141          23 :         MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
    1142           0 :         MI.getOperand(3).getImm() == 0) {
    1143           0 :       FrameIndex = MI.getOperand(1).getIndex();
    1144           0 :       return MI.getOperand(0).getReg();
    1145             :     }
    1146             :     break;
    1147        1117 :   case ARM::STRi12:
    1148             :   case ARM::t2STRi12:
    1149             :   case ARM::tSTRspi:
    1150             :   case ARM::VSTRD:
    1151             :   case ARM::VSTRS:
    1152        2234 :     if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
    1153         370 :         MI.getOperand(2).getImm() == 0) {
    1154         332 :       FrameIndex = MI.getOperand(1).getIndex();
    1155         332 :       return MI.getOperand(0).getReg();
    1156             :     }
    1157             :     break;
    1158          12 :   case ARM::VST1q64:
    1159             :   case ARM::VST1d64TPseudo:
    1160             :   case ARM::VST1d64QPseudo:
    1161          24 :     if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
    1162           0 :       FrameIndex = MI.getOperand(0).getIndex();
    1163           0 :       return MI.getOperand(2).getReg();
    1164             :     }
    1165             :     break;
    1166           0 :   case ARM::VSTMQIA:
    1167           0 :     if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
    1168           0 :       FrameIndex = MI.getOperand(1).getIndex();
    1169           0 :       return MI.getOperand(0).getReg();
    1170             :     }
    1171             :     break;
    1172             :   }
    1173             : 
    1174             :   return 0;
    1175             : }
    1176             : 
    1177      135897 : unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
    1178             :                                                     int &FrameIndex) const {
    1179             :   SmallVector<const MachineMemOperand *, 1> Accesses;
    1180      135897 :   if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses)) {
    1181        2780 :     FrameIndex =
    1182        2780 :         cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
    1183        2780 :             ->getFrameIndex();
    1184        2780 :     return true;
    1185             :   }
    1186             :   return false;
    1187             : }
    1188             : 
    1189        2029 : void ARMBaseInstrInfo::
    1190             : loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
    1191             :                      unsigned DestReg, int FI,
    1192             :                      const TargetRegisterClass *RC,
    1193             :                      const TargetRegisterInfo *TRI) const {
    1194        2029 :   DebugLoc DL;
    1195        2029 :   if (I != MBB.end()) DL = I->getDebugLoc();
    1196        2029 :   MachineFunction &MF = *MBB.getParent();
    1197        2029 :   MachineFrameInfo &MFI = MF.getFrameInfo();
    1198             :   unsigned Align = MFI.getObjectAlignment(FI);
    1199        2029 :   MachineMemOperand *MMO = MF.getMachineMemOperand(
    1200             :       MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
    1201             :       MFI.getObjectSize(FI), Align);
    1202             : 
    1203        2029 :   switch (TRI->getSpillSize(*RC)) {
    1204           2 :   case 2:
    1205           4 :     if (ARM::HPRRegClass.hasSubClassEq(RC)) {
    1206           4 :       BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg)
    1207             :           .addFrameIndex(FI)
    1208             :           .addImm(0)
    1209             :           .addMemOperand(MMO)
    1210           2 :           .add(predOps(ARMCC::AL));
    1211             :     } else
    1212           0 :       llvm_unreachable("Unknown reg class!");
    1213           2 :     break;
    1214        1202 :   case 4:
    1215        2404 :     if (ARM::GPRRegClass.hasSubClassEq(RC)) {
    1216        2372 :       BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
    1217             :           .addFrameIndex(FI)
    1218             :           .addImm(0)
    1219             :           .addMemOperand(MMO)
    1220        1186 :           .add(predOps(ARMCC::AL));
    1221          32 :     } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
    1222          32 :       BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
    1223             :           .addFrameIndex(FI)
    1224             :           .addImm(0)
    1225             :           .addMemOperand(MMO)
    1226          16 :           .add(predOps(ARMCC::AL));
    1227             :     } else
    1228           0 :       llvm_unreachable("Unknown reg class!");
    1229             :     break;
    1230         435 :   case 8:
    1231         870 :     if (ARM::DPRRegClass.hasSubClassEq(RC)) {
    1232         862 :       BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
    1233             :           .addFrameIndex(FI)
    1234             :           .addImm(0)
    1235             :           .addMemOperand(MMO)
    1236         431 :           .add(predOps(ARMCC::AL));
    1237           8 :     } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
    1238           4 :       MachineInstrBuilder MIB;
    1239             : 
    1240           4 :       if (Subtarget.hasV5TEOps()) {
    1241           4 :         MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
    1242           2 :         AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
    1243           2 :         AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
    1244           2 :         MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
    1245           2 :            .add(predOps(ARMCC::AL));
    1246             :       } else {
    1247             :         // Fallback to LDM instruction, which has existed since the dawn of
    1248             :         // time.
    1249           4 :         MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
    1250             :                   .addFrameIndex(FI)
    1251             :                   .addMemOperand(MMO)
    1252           2 :                   .add(predOps(ARMCC::AL));
    1253           2 :         MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
    1254           2 :         MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
    1255             :       }
    1256             : 
    1257           4 :       if (TargetRegisterInfo::isPhysicalRegister(DestReg))
    1258           0 :         MIB.addReg(DestReg, RegState::ImplicitDefine);
    1259             :     } else
    1260           0 :       llvm_unreachable("Unknown reg class!");
    1261             :     break;
    1262         387 :   case 16:
    1263         774 :     if (ARM::DPairRegClass.hasSubClassEq(RC)) {
    1264         387 :       if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
    1265         770 :         BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
    1266             :             .addFrameIndex(FI)
    1267             :             .addImm(16)
    1268             :             .addMemOperand(MMO)
    1269         385 :             .add(predOps(ARMCC::AL));
    1270             :       } else {
    1271           4 :         BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
    1272             :             .addFrameIndex(FI)
    1273             :             .addMemOperand(MMO)
    1274           2 :             .add(predOps(ARMCC::AL));
    1275             :       }
    1276             :     } else
    1277           0 :       llvm_unreachable("Unknown reg class!");
    1278             :     break;
    1279           1 :   case 24:
    1280           2 :     if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
    1281           1 :       if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
    1282           0 :         BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
    1283             :             .addFrameIndex(FI)
    1284             :             .addImm(16)
    1285             :             .addMemOperand(MMO)
    1286           0 :             .add(predOps(ARMCC::AL));
    1287             :       } else {
    1288           2 :         MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
    1289             :                                       .addFrameIndex(FI)
    1290             :                                       .addMemOperand(MMO)
    1291           1 :                                       .add(predOps(ARMCC::AL));
    1292           1 :         MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
    1293           1 :         MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
    1294           1 :         MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
    1295           1 :         if (TargetRegisterInfo::isPhysicalRegister(DestReg))
    1296           0 :           MIB.addReg(DestReg, RegState::ImplicitDefine);
    1297             :       }
    1298             :     } else
    1299           0 :       llvm_unreachable("Unknown reg class!");
    1300             :     break;
    1301           0 :    case 32:
    1302           0 :     if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
    1303           0 :       if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
    1304           0 :         BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
    1305             :             .addFrameIndex(FI)
    1306             :             .addImm(16)
    1307             :             .addMemOperand(MMO)
    1308           0 :             .add(predOps(ARMCC::AL));
    1309             :       } else {
    1310           0 :         MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
    1311             :                                       .addFrameIndex(FI)
    1312           0 :                                       .add(predOps(ARMCC::AL))
    1313           0 :                                       .addMemOperand(MMO);
    1314           0 :         MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
    1315           0 :         MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
    1316           0 :         MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
    1317           0 :         MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
    1318           0 :         if (TargetRegisterInfo::isPhysicalRegister(DestReg))
    1319           0 :           MIB.addReg(DestReg, RegState::ImplicitDefine);
    1320             :       }
    1321             :     } else
    1322           0 :       llvm_unreachable("Unknown reg class!");
    1323             :     break;
    1324           2 :   case 64:
    1325           4 :     if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
    1326           4 :       MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
    1327             :                                     .addFrameIndex(FI)
    1328           2 :                                     .add(predOps(ARMCC::AL))
    1329           2 :                                     .addMemOperand(MMO);
    1330           2 :       MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
    1331           2 :       MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
    1332           2 :       MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
    1333           2 :       MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
    1334           2 :       MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
    1335           2 :       MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
    1336           2 :       MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
    1337           2 :       MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
    1338           2 :       if (TargetRegisterInfo::isPhysicalRegister(DestReg))
    1339           0 :         MIB.addReg(DestReg, RegState::ImplicitDefine);
    1340             :     } else
    1341           0 :       llvm_unreachable("Unknown reg class!");
    1342           2 :     break;
    1343           0 :   default:
    1344           0 :     llvm_unreachable("Unknown regclass!");
    1345             :   }
    1346        2029 : }
    1347             : 
    1348       35132 : unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
    1349             :                                                int &FrameIndex) const {
    1350       70264 :   switch (MI.getOpcode()) {
    1351             :   default: break;
    1352         159 :   case ARM::LDRrs:
    1353             :   case ARM::t2LDRs:  // FIXME: don't use t2LDRs to access frame.
    1354         159 :     if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
    1355         159 :         MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
    1356           0 :         MI.getOperand(3).getImm() == 0) {
    1357           0 :       FrameIndex = MI.getOperand(1).getIndex();
    1358           0 :       return MI.getOperand(0).getReg();
    1359             :     }
    1360             :     break;
    1361        8260 :   case ARM::LDRi12:
    1362             :   case ARM::t2LDRi12:
    1363             :   case ARM::tLDRspi:
    1364             :   case ARM::VLDRD:
    1365             :   case ARM::VLDRS:
    1366       16520 :     if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
    1367        4745 :         MI.getOperand(2).getImm() == 0) {
    1368        2963 :       FrameIndex = MI.getOperand(1).getIndex();
    1369        2963 :       return MI.getOperand(0).getReg();
    1370             :     }
    1371             :     break;
    1372           8 :   case ARM::VLD1q64:
    1373             :   case ARM::VLD1d8TPseudo:
    1374             :   case ARM::VLD1d16TPseudo:
    1375             :   case ARM::VLD1d32TPseudo:
    1376             :   case ARM::VLD1d64TPseudo:
    1377             :   case ARM::VLD1d8QPseudo:
    1378             :   case ARM::VLD1d16QPseudo:
    1379             :   case ARM::VLD1d32QPseudo:
    1380             :   case ARM::VLD1d64QPseudo:
    1381          16 :     if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
    1382           2 :       FrameIndex = MI.getOperand(1).getIndex();
    1383           2 :       return MI.getOperand(0).getReg();
    1384             :     }
    1385             :     break;
    1386           0 :   case ARM::VLDMQIA:
    1387           0 :     if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
    1388           0 :       FrameIndex = MI.getOperand(1).getIndex();
    1389           0 :       return MI.getOperand(0).getReg();
    1390             :     }
    1391             :     break;
    1392             :   }
    1393             : 
    1394             :   return 0;
    1395             : }
    1396             : 
    1397      139283 : unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
    1398             :                                                      int &FrameIndex) const {
    1399             :   SmallVector<const MachineMemOperand *, 1> Accesses;
    1400      139283 :   if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses)) {
    1401        3398 :     FrameIndex =
    1402        3398 :         cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
    1403        3398 :             ->getFrameIndex();
    1404        3398 :     return true;
    1405             :   }
    1406             :   return false;
    1407             : }
    1408             : 
    1409             : /// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
    1410             : /// depending on whether the result is used.
    1411          56 : void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
    1412          56 :   bool isThumb1 = Subtarget.isThumb1Only();
    1413             :   bool isThumb2 = Subtarget.isThumb2();
    1414          56 :   const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
    1415             : 
    1416             :   DebugLoc dl = MI->getDebugLoc();
    1417          56 :   MachineBasicBlock *BB = MI->getParent();
    1418             : 
    1419          56 :   MachineInstrBuilder LDM, STM;
    1420          56 :   if (isThumb1 || !MI->getOperand(1).isDead()) {
    1421          41 :     MachineOperand LDWb(MI->getOperand(1));
    1422          75 :     LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
    1423             :                                                  : isThumb1 ? ARM::tLDMIA_UPD
    1424         109 :                                                             : ARM::LDMIA_UPD))
    1425             :               .add(LDWb);
    1426             :   } else {
    1427          31 :     LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
    1428             :   }
    1429             : 
    1430          56 :   if (isThumb1 || !MI->getOperand(0).isDead()) {
    1431          41 :     MachineOperand STWb(MI->getOperand(0));
    1432          75 :     STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
    1433             :                                                  : isThumb1 ? ARM::tSTMIA_UPD
    1434         109 :                                                             : ARM::STMIA_UPD))
    1435             :               .add(STWb);
    1436             :   } else {
    1437          31 :     STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
    1438             :   }
    1439             : 
    1440          56 :   MachineOperand LDBase(MI->getOperand(3));
    1441          56 :   LDM.add(LDBase).add(predOps(ARMCC::AL));
    1442             : 
    1443          56 :   MachineOperand STBase(MI->getOperand(2));
    1444          56 :   STM.add(STBase).add(predOps(ARMCC::AL));
    1445             : 
    1446             :   // Sort the scratch registers into ascending order.
    1447          56 :   const TargetRegisterInfo &TRI = getRegisterInfo();
    1448             :   SmallVector<unsigned, 6> ScratchRegs;
    1449         285 :   for(unsigned I = 5; I < MI->getNumOperands(); ++I)
    1450         458 :     ScratchRegs.push_back(MI->getOperand(I).getReg());
    1451             :   llvm::sort(ScratchRegs,
    1452             :              [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {
    1453         173 :                return TRI.getEncodingValue(Reg1) <
    1454           0 :                       TRI.getEncodingValue(Reg2);
    1455             :              });
    1456             : 
    1457         285 :   for (const auto &Reg : ScratchRegs) {
    1458         229 :     LDM.addReg(Reg, RegState::Define);
    1459         229 :     STM.addReg(Reg, RegState::Kill);
    1460             :   }
    1461             : 
    1462          56 :   BB->erase(MI);
    1463          56 : }
    1464             : 
    1465       51906 : bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
    1466      103812 :   if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
    1467             :     assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() &&
    1468             :            "LOAD_STACK_GUARD currently supported only for MachO.");
    1469         120 :     expandLoadStackGuard(MI);
    1470         120 :     MI.getParent()->erase(MI);
    1471          60 :     return true;
    1472             :   }
    1473             : 
    1474       51846 :   if (MI.getOpcode() == ARM::MEMCPY) {
    1475          56 :     expandMEMCPY(MI);
    1476          56 :     return true;
    1477             :   }
    1478             : 
    1479             :   // This hook gets to expand COPY instructions before they become
    1480             :   // copyPhysReg() calls.  Look for VMOVS instructions that can legally be
    1481             :   // widened to VMOVD.  We prefer the VMOVD when possible because it may be
    1482             :   // changed into a VORR that can go down the NEON pipeline.
    1483       51790 :   if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || Subtarget.isFPOnlySP())
    1484             :     return false;
    1485             : 
    1486             :   // Look for a copy between even S-registers.  That is where we keep floats
    1487             :   // when using NEON v2f32 instructions for f32 arithmetic.
    1488       11766 :   unsigned DstRegS = MI.getOperand(0).getReg();
    1489       11766 :   unsigned SrcRegS = MI.getOperand(1).getReg();
    1490       23532 :   if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
    1491             :     return false;
    1492             : 
    1493        1349 :   const TargetRegisterInfo *TRI = &getRegisterInfo();
    1494        1349 :   unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
    1495             :                                               &ARM::DPRRegClass);
    1496        1349 :   unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
    1497             :                                               &ARM::DPRRegClass);
    1498        1349 :   if (!DstRegD || !SrcRegD)
    1499             :     return false;
    1500             : 
    1501             :   // We want to widen this into a DstRegD = VMOVD SrcRegD copy.  This is only
    1502             :   // legal if the COPY already defines the full DstRegD, and it isn't a
    1503             :   // sub-register insertion.
    1504         858 :   if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
    1505         835 :     return false;
    1506             : 
    1507             :   // A dead copy shouldn't show up here, but reject it just in case.
    1508          16 :   if (MI.getOperand(0).isDead())
    1509             :     return false;
    1510             : 
    1511             :   // All clear, widen the COPY.
    1512             :   LLVM_DEBUG(dbgs() << "widening:    " << MI);
    1513           8 :   MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
    1514             : 
    1515             :   // Get rid of the old implicit-def of DstRegD.  Leave it if it defines a Q-reg
    1516             :   // or some other super-register.
    1517           8 :   int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD);
    1518           8 :   if (ImpDefIdx != -1)
    1519           1 :     MI.RemoveOperand(ImpDefIdx);
    1520             : 
    1521             :   // Change the opcode and operands.
    1522           8 :   MI.setDesc(get(ARM::VMOVD));
    1523           8 :   MI.getOperand(0).setReg(DstRegD);
    1524          16 :   MI.getOperand(1).setReg(SrcRegD);
    1525           8 :   MIB.add(predOps(ARMCC::AL));
    1526             : 
    1527             :   // We are now reading SrcRegD instead of SrcRegS.  This may upset the
    1528             :   // register scavenger and machine verifier, so we need to indicate that we
    1529             :   // are reading an undefined value from SrcRegD, but a proper value from
    1530             :   // SrcRegS.
    1531           8 :   MI.getOperand(1).setIsUndef();
    1532           8 :   MIB.addReg(SrcRegS, RegState::Implicit);
    1533             : 
    1534             :   // SrcRegD may actually contain an unrelated value in the ssub_1
    1535             :   // sub-register.  Don't kill it.  Only kill the ssub_0 sub-register.
    1536          16 :   if (MI.getOperand(1).isKill()) {
    1537             :     MI.getOperand(1).setIsKill(false);
    1538           7 :     MI.addRegisterKilled(SrcRegS, TRI, true);
    1539             :   }
    1540             : 
    1541             :   LLVM_DEBUG(dbgs() << "replaced by: " << MI);
    1542             :   return true;
    1543             : }
    1544             : 
    1545             : /// Create a copy of a const pool value. Update CPI to the new index and return
    1546             : /// the label UID.
    1547           0 : static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
    1548           0 :   MachineConstantPool *MCP = MF.getConstantPool();
    1549           0 :   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
    1550             : 
    1551           0 :   const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
    1552             :   assert(MCPE.isMachineConstantPoolEntry() &&
    1553             :          "Expecting a machine constantpool entry!");
    1554           0 :   ARMConstantPoolValue *ACPV =
    1555             :     static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
    1556             : 
    1557             :   unsigned PCLabelId = AFI->createPICLabelUId();
    1558             :   ARMConstantPoolValue *NewCPV = nullptr;
    1559             : 
    1560             :   // FIXME: The below assumes PIC relocation model and that the function
    1561             :   // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
    1562             :   // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
    1563             :   // instructions, so that's probably OK, but is PIC always correct when
    1564             :   // we get here?
    1565           0 :   if (ACPV->isGlobalValue())
    1566           0 :     NewCPV = ARMConstantPoolConstant::Create(
    1567           0 :         cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
    1568           0 :         4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
    1569           0 :   else if (ACPV->isExtSymbol())
    1570             :     NewCPV = ARMConstantPoolSymbol::
    1571           0 :       Create(MF.getFunction().getContext(),
    1572             :              cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
    1573           0 :   else if (ACPV->isBlockAddress())
    1574             :     NewCPV = ARMConstantPoolConstant::
    1575           0 :       Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
    1576             :              ARMCP::CPBlockAddress, 4);
    1577           0 :   else if (ACPV->isLSDA())
    1578           0 :     NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId,
    1579             :                                              ARMCP::CPLSDA, 4);
    1580           0 :   else if (ACPV->isMachineBasicBlock())
    1581             :     NewCPV = ARMConstantPoolMBB::
    1582           0 :       Create(MF.getFunction().getContext(),
    1583             :              cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
    1584             :   else
    1585           0 :     llvm_unreachable("Unexpected ARM constantpool value type!!");
    1586           0 :   CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
    1587           0 :   return PCLabelId;
    1588             : }
    1589             : 
    1590        2967 : void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB,
    1591             :                                      MachineBasicBlock::iterator I,
    1592             :                                      unsigned DestReg, unsigned SubIdx,
    1593             :                                      const MachineInstr &Orig,
    1594             :                                      const TargetRegisterInfo &TRI) const {
    1595        2967 :   unsigned Opcode = Orig.getOpcode();
    1596        2967 :   switch (Opcode) {
    1597        2967 :   default: {
    1598        2967 :     MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
    1599        2967 :     MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
    1600             :     MBB.insert(I, MI);
    1601        2967 :     break;
    1602             :   }
    1603           0 :   case ARM::tLDRpci_pic:
    1604             :   case ARM::t2LDRpci_pic: {
    1605           0 :     MachineFunction &MF = *MBB.getParent();
    1606           0 :     unsigned CPI = Orig.getOperand(1).getIndex();
    1607           0 :     unsigned PCLabelId = duplicateCPV(MF, CPI);
    1608           0 :     BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
    1609             :         .addConstantPoolIndex(CPI)
    1610           0 :         .addImm(PCLabelId)
    1611             :         .cloneMemRefs(Orig);
    1612             :     break;
    1613             :   }
    1614             :   }
    1615        2967 : }
    1616             : 
    1617             : MachineInstr &
    1618         392 : ARMBaseInstrInfo::duplicate(MachineBasicBlock &MBB,
    1619             :     MachineBasicBlock::iterator InsertBefore,
    1620             :     const MachineInstr &Orig) const {
    1621         392 :   MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
    1622         392 :   MachineBasicBlock::instr_iterator I = Cloned.getIterator();
    1623             :   for (;;) {
    1624         792 :     switch (I->getOpcode()) {
    1625           0 :     case ARM::tLDRpci_pic:
    1626             :     case ARM::t2LDRpci_pic: {
    1627           0 :       MachineFunction &MF = *MBB.getParent();
    1628           0 :       unsigned CPI = I->getOperand(1).getIndex();
    1629           0 :       unsigned PCLabelId = duplicateCPV(MF, CPI);
    1630           0 :       I->getOperand(1).setIndex(CPI);
    1631           0 :       I->getOperand(2).setImm(PCLabelId);
    1632             :       break;
    1633             :     }
    1634             :     }
    1635         396 :     if (!I->isBundledWithSucc())
    1636             :       break;
    1637             :     ++I;
    1638             :   }
    1639         392 :   return Cloned;
    1640             : }
    1641             : 
    1642         447 : bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0,
    1643             :                                         const MachineInstr &MI1,
    1644             :                                         const MachineRegisterInfo *MRI) const {
    1645         447 :   unsigned Opcode = MI0.getOpcode();
    1646         894 :   if (Opcode == ARM::t2LDRpci ||
    1647         447 :       Opcode == ARM::t2LDRpci_pic ||
    1648         892 :       Opcode == ARM::tLDRpci ||
    1649         446 :       Opcode == ARM::tLDRpci_pic ||
    1650         440 :       Opcode == ARM::LDRLIT_ga_pcrel ||
    1651             :       Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
    1652         870 :       Opcode == ARM::tLDRLIT_ga_pcrel ||
    1653         435 :       Opcode == ARM::MOV_ga_pcrel ||
    1654         860 :       Opcode == ARM::MOV_ga_pcrel_ldr ||
    1655         430 :       Opcode == ARM::t2MOV_ga_pcrel) {
    1656         128 :     if (MI1.getOpcode() != Opcode)
    1657             :       return false;
    1658          64 :     if (MI0.getNumOperands() != MI1.getNumOperands())
    1659             :       return false;
    1660             : 
    1661          64 :     const MachineOperand &MO0 = MI0.getOperand(1);
    1662          64 :     const MachineOperand &MO1 = MI1.getOperand(1);
    1663         192 :     if (MO0.getOffset() != MO1.getOffset())
    1664             :       return false;
    1665             : 
    1666         128 :     if (Opcode == ARM::LDRLIT_ga_pcrel ||
    1667          64 :         Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
    1668          64 :         Opcode == ARM::tLDRLIT_ga_pcrel ||
    1669          54 :         Opcode == ARM::MOV_ga_pcrel ||
    1670          53 :         Opcode == ARM::MOV_ga_pcrel_ldr ||
    1671             :         Opcode == ARM::t2MOV_ga_pcrel)
    1672             :       // Ignore the PC labels.
    1673          57 :       return MO0.getGlobal() == MO1.getGlobal();
    1674             : 
    1675           7 :     const MachineFunction *MF = MI0.getParent()->getParent();
    1676           7 :     const MachineConstantPool *MCP = MF->getConstantPool();
    1677           7 :     int CPI0 = MO0.getIndex();
    1678           7 :     int CPI1 = MO1.getIndex();
    1679           7 :     const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
    1680           7 :     const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
    1681           7 :     bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
    1682           7 :     bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
    1683           7 :     if (isARMCP0 && isARMCP1) {
    1684           1 :       ARMConstantPoolValue *ACPV0 =
    1685             :         static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
    1686           1 :       ARMConstantPoolValue *ACPV1 =
    1687             :         static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
    1688           1 :       return ACPV0->hasSameValue(ACPV1);
    1689           6 :     } else if (!isARMCP0 && !isARMCP1) {
    1690           6 :       return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
    1691             :     }
    1692             :     return false;
    1693         383 :   } else if (Opcode == ARM::PICLDR) {
    1694           0 :     if (MI1.getOpcode() != Opcode)
    1695             :       return false;
    1696           0 :     if (MI0.getNumOperands() != MI1.getNumOperands())
    1697             :       return false;
    1698             : 
    1699           0 :     unsigned Addr0 = MI0.getOperand(1).getReg();
    1700           0 :     unsigned Addr1 = MI1.getOperand(1).getReg();
    1701           0 :     if (Addr0 != Addr1) {
    1702           0 :       if (!MRI ||
    1703           0 :           !TargetRegisterInfo::isVirtualRegister(Addr0) ||
    1704             :           !TargetRegisterInfo::isVirtualRegister(Addr1))
    1705             :         return false;
    1706             : 
    1707             :       // This assumes SSA form.
    1708           0 :       MachineInstr *Def0 = MRI->getVRegDef(Addr0);
    1709           0 :       MachineInstr *Def1 = MRI->getVRegDef(Addr1);
    1710             :       // Check if the loaded value, e.g. a constantpool of a global address, are
    1711             :       // the same.
    1712           0 :       if (!produceSameValue(*Def0, *Def1, MRI))
    1713             :         return false;
    1714             :     }
    1715             : 
    1716           0 :     for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
    1717             :       // %12 = PICLDR %11, 0, 14, %noreg
    1718           0 :       const MachineOperand &MO0 = MI0.getOperand(i);
    1719           0 :       const MachineOperand &MO1 = MI1.getOperand(i);
    1720           0 :       if (!MO0.isIdenticalTo(MO1))
    1721             :         return false;
    1722             :     }
    1723             :     return true;
    1724             :   }
    1725             : 
    1726         383 :   return MI0.isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
    1727             : }
    1728             : 
    1729             : /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
    1730             : /// determine if two loads are loading from the same base address. It should
    1731             : /// only return true if the base pointers are the same and the only differences
    1732             : /// between the two addresses is the offset. It also returns the offsets by
    1733             : /// reference.
    1734             : ///
    1735             : /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
    1736             : /// is permanently disabled.
    1737      150571 : bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
    1738             :                                                int64_t &Offset1,
    1739             :                                                int64_t &Offset2) const {
    1740             :   // Don't worry about Thumb: just ARM and Thumb2.
    1741      150571 :   if (Subtarget.isThumb1Only()) return false;
    1742             : 
    1743      131191 :   if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
    1744             :     return false;
    1745             : 
    1746       94071 :   switch (Load1->getMachineOpcode()) {
    1747             :   default:
    1748             :     return false;
    1749             :   case ARM::LDRi12:
    1750             :   case ARM::LDRBi12:
    1751             :   case ARM::LDRD:
    1752             :   case ARM::LDRH:
    1753             :   case ARM::LDRSB:
    1754             :   case ARM::LDRSH:
    1755             :   case ARM::VLDRD:
    1756             :   case ARM::VLDRS:
    1757             :   case ARM::t2LDRi8:
    1758             :   case ARM::t2LDRBi8:
    1759             :   case ARM::t2LDRDi8:
    1760             :   case ARM::t2LDRSHi8:
    1761             :   case ARM::t2LDRi12:
    1762             :   case ARM::t2LDRBi12:
    1763             :   case ARM::t2LDRSHi12:
    1764             :     break;
    1765             :   }
    1766             : 
    1767       73567 :   switch (Load2->getMachineOpcode()) {
    1768             :   default:
    1769             :     return false;
    1770             :   case ARM::LDRi12:
    1771             :   case ARM::LDRBi12:
    1772             :   case ARM::LDRD:
    1773             :   case ARM::LDRH:
    1774             :   case ARM::LDRSB:
    1775             :   case ARM::LDRSH:
    1776             :   case ARM::VLDRD:
    1777             :   case ARM::VLDRS:
    1778             :   case ARM::t2LDRi8:
    1779             :   case ARM::t2LDRBi8:
    1780             :   case ARM::t2LDRSHi8:
    1781             :   case ARM::t2LDRi12:
    1782             :   case ARM::t2LDRBi12:
    1783             :   case ARM::t2LDRSHi12:
    1784             :     break;
    1785             :   }
    1786             : 
    1787             :   // Check if base addresses and chain operands match.
    1788       65541 :   if (Load1->getOperand(0) != Load2->getOperand(0) ||
    1789             :       Load1->getOperand(4) != Load2->getOperand(4))
    1790             :     return false;
    1791             : 
    1792             :   // Index should be Reg0.
    1793             :   if (Load1->getOperand(3) != Load2->getOperand(3))
    1794             :     return false;
    1795             : 
    1796             :   // Determine the offsets.
    1797             :   if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
    1798             :       isa<ConstantSDNode>(Load2->getOperand(1))) {
    1799       14825 :     Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
    1800       29650 :     Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
    1801       14825 :     return true;
    1802             :   }
    1803             : 
    1804             :   return false;
    1805             : }
    1806             : 
    1807             : /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
    1808             : /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
    1809             : /// be scheduled togther. On some targets if two loads are loading from
    1810             : /// addresses in the same cache line, it's better if they are scheduled
    1811             : /// together. This function takes two integers that represent the load offsets
    1812             : /// from the common base address. It returns true if it decides it's desirable
    1813             : /// to schedule the two loads together. "NumLoads" is the number of loads that
    1814             : /// have already been scheduled after Load1.
    1815             : ///
    1816             : /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
    1817             : /// is permanently disabled.
    1818        3197 : bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
    1819             :                                                int64_t Offset1, int64_t Offset2,
    1820             :                                                unsigned NumLoads) const {
    1821             :   // Don't worry about Thumb: just ARM and Thumb2.
    1822        3197 :   if (Subtarget.isThumb1Only()) return false;
    1823             : 
    1824             :   assert(Offset2 > Offset1);
    1825             : 
    1826        3197 :   if ((Offset2 - Offset1) / 8 > 64)
    1827             :     return false;
    1828             : 
    1829             :   // Check if the machine opcodes are different. If they are different
    1830             :   // then we consider them to not be of the same base address,
    1831             :   // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
    1832             :   // In this case, they are considered to be the same because they are different
    1833             :   // encoding forms of the same basic instruction.
    1834        3197 :   if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
    1835          24 :       !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
    1836             :          Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
    1837           2 :         (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
    1838             :          Load2->getMachineOpcode() == ARM::t2LDRBi8)))
    1839             :     return false;  // FIXME: overly conservative?
    1840             : 
    1841             :   // Four loads in a row should be sufficient.
    1842        3176 :   if (NumLoads >= 3)
    1843         486 :     return false;
    1844             : 
    1845             :   return true;
    1846             : }
    1847             : 
    1848      101339 : bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
    1849             :                                             const MachineBasicBlock *MBB,
    1850             :                                             const MachineFunction &MF) const {
    1851             :   // Debug info is never a scheduling boundary. It's necessary to be explicit
    1852             :   // due to the special treatment of IT instructions below, otherwise a
    1853             :   // dbg_value followed by an IT will result in the IT instruction being
    1854             :   // considered a scheduling hazard, which is wrong. It should be the actual
    1855             :   // instruction preceding the dbg_value instruction(s), just like it is
    1856             :   // when debug info is not present.
    1857             :   if (MI.isDebugInstr())
    1858             :     return false;
    1859             : 
    1860             :   // Terminators and labels can't be scheduled around.
    1861      101212 :   if (MI.isTerminator() || MI.isPosition())
    1862             :     return true;
    1863             : 
    1864             :   // Treat the start of the IT block as a scheduling boundary, but schedule
    1865             :   // t2IT along with all instructions following it.
    1866             :   // FIXME: This is a big hammer. But the alternative is to add all potential
    1867             :   // true and anti dependencies to IT block instructions as implicit operands
    1868             :   // to the t2IT instruction. The added compile time and complexity does not
    1869             :   // seem worth it.
    1870             :   MachineBasicBlock::const_iterator I = MI;
    1871             :   // Make sure to skip any debug instructions
    1872       74065 :   while (++I != MBB->end() && I->isDebugInstr())
    1873             :     ;
    1874       74016 :   if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
    1875             :     return true;
    1876             : 
    1877             :   // Don't attempt to schedule around any instruction that defines
    1878             :   // a stack-oriented pointer, as it's unlikely to be profitable. This
    1879             :   // saves compile time, because it doesn't require every single
    1880             :   // stack slot reference to depend on the instruction that does the
    1881             :   // modification.
    1882             :   // Calls don't actually change the stack pointer, even if they have imp-defs.
    1883             :   // No ARM calling conventions change the stack pointer. (X86 calling
    1884             :   // conventions sometimes do).
    1885      148032 :   if (!MI.isCall() && MI.definesRegister(ARM::SP))
    1886        7219 :     return true;
    1887             : 
    1888             :   return false;
    1889             : }
    1890             : 
    1891        1487 : bool ARMBaseInstrInfo::
    1892             : isProfitableToIfCvt(MachineBasicBlock &MBB,
    1893             :                     unsigned NumCycles, unsigned ExtraPredCycles,
    1894             :                     BranchProbability Probability) const {
    1895        1487 :   if (!NumCycles)
    1896             :     return false;
    1897             : 
    1898             :   // If we are optimizing for size, see if the branch in the predecessor can be
    1899             :   // lowered to cbn?z by the constant island lowering pass, and return false if
    1900             :   // so. This results in a shorter instruction sequence.
    1901        1487 :   if (MBB.getParent()->getFunction().optForSize()) {
    1902         127 :     MachineBasicBlock *Pred = *MBB.pred_begin();
    1903         127 :     if (!Pred->empty()) {
    1904             :       MachineInstr *LastMI = &*Pred->rbegin();
    1905         254 :       if (LastMI->getOpcode() == ARM::t2Bcc) {
    1906             :         MachineBasicBlock::iterator CmpMI = LastMI;
    1907         118 :         if (CmpMI != Pred->begin()) {
    1908             :           --CmpMI;
    1909         236 :           if (CmpMI->getOpcode() == ARM::tCMPi8 ||
    1910             :               CmpMI->getOpcode() == ARM::t2CMPri) {
    1911          85 :             unsigned Reg = CmpMI->getOperand(0).getReg();
    1912          85 :             unsigned PredReg = 0;
    1913          85 :             ARMCC::CondCodes P = getInstrPredicate(*CmpMI, PredReg);
    1914          85 :             if (P == ARMCC::AL && CmpMI->getOperand(1).getImm() == 0 &&
    1915             :                 isARMLowRegister(Reg))
    1916          43 :               return false;
    1917             :           }
    1918             :         }
    1919             :       }
    1920             :     }
    1921             :   }
    1922        1444 :   return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
    1923        1444 :                              MBB, 0, 0, Probability);
    1924             : }
    1925             : 
    1926        1550 : bool ARMBaseInstrInfo::
    1927             : isProfitableToIfCvt(MachineBasicBlock &TBB,
    1928             :                     unsigned TCycles, unsigned TExtra,
    1929             :                     MachineBasicBlock &FBB,
    1930             :                     unsigned FCycles, unsigned FExtra,
    1931             :                     BranchProbability Probability) const {
    1932        1550 :   if (!TCycles)
    1933             :     return false;
    1934             : 
    1935             :   // Attempt to estimate the relative costs of predication versus branching.
    1936             :   // Here we scale up each component of UnpredCost to avoid precision issue when
    1937             :   // scaling TCycles/FCycles by Probability.
    1938             :   const unsigned ScalingUpFactor = 1024;
    1939             : 
    1940        1550 :   unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
    1941             :   unsigned UnpredCost;
    1942        1550 :   if (!Subtarget.hasBranchPredictor()) {
    1943             :     // When we don't have a branch predictor it's always cheaper to not take a
    1944             :     // branch than take it, so we have to take that into account.
    1945             :     unsigned NotTakenBranchCost = 1;
    1946          24 :     unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
    1947             :     unsigned TUnpredCycles, FUnpredCycles;
    1948          24 :     if (!FCycles) {
    1949             :       // Triangle: TBB is the fallthrough
    1950          20 :       TUnpredCycles = TCycles + NotTakenBranchCost;
    1951             :       FUnpredCycles = TakenBranchCost;
    1952             :     } else {
    1953             :       // Diamond: TBB is the block that is branched to, FBB is the fallthrough
    1954           4 :       TUnpredCycles = TCycles + TakenBranchCost;
    1955           4 :       FUnpredCycles = FCycles + NotTakenBranchCost;
    1956             :       // The branch at the end of FBB will disappear when it's predicated, so
    1957             :       // discount it from PredCost.
    1958           4 :       PredCost -= 1 * ScalingUpFactor;
    1959             :     }
    1960             :     // The total cost is the cost of each path scaled by their probabilites
    1961          24 :     unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
    1962          48 :     unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
    1963          24 :     UnpredCost = TUnpredCost + FUnpredCost;
    1964             :     // When predicating assume that the first IT can be folded away but later
    1965             :     // ones cost one cycle each
    1966          48 :     if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
    1967           5 :       PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
    1968             :     }
    1969             :   } else {
    1970        1526 :     unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
    1971             :     unsigned FUnpredCost =
    1972        3052 :       Probability.getCompl().scale(FCycles * ScalingUpFactor);
    1973        1526 :     UnpredCost = TUnpredCost + FUnpredCost;
    1974        1526 :     UnpredCost += 1 * ScalingUpFactor; // The branch itself
    1975        1526 :     UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
    1976             :   }
    1977             : 
    1978        1550 :   return PredCost <= UnpredCost;
    1979             : }
    1980             : 
    1981             : bool
    1982          73 : ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
    1983             :                                             MachineBasicBlock &FMBB) const {
    1984             :   // Reduce false anti-dependencies to let the target's out-of-order execution
    1985             :   // engine do its thing.
    1986          73 :   return Subtarget.isProfitableToUnpredicate();
    1987             : }
    1988             : 
    1989             : /// getInstrPredicate - If instruction is predicated, returns its predicate
    1990             : /// condition, otherwise returns AL. It also returns the condition code
    1991             : /// register by reference.
    1992       93866 : ARMCC::CondCodes llvm::getInstrPredicate(const MachineInstr &MI,
    1993             :                                          unsigned &PredReg) {
    1994       93866 :   int PIdx = MI.findFirstPredOperandIdx();
    1995       93866 :   if (PIdx == -1) {
    1996       14087 :     PredReg = 0;
    1997       14087 :     return ARMCC::AL;
    1998             :   }
    1999             : 
    2000       79779 :   PredReg = MI.getOperand(PIdx+1).getReg();
    2001      159558 :   return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
    2002             : }
    2003             : 
    2004           0 : unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {
    2005           0 :   if (Opc == ARM::B)
    2006             :     return ARM::Bcc;
    2007           0 :   if (Opc == ARM::tB)
    2008             :     return ARM::tBcc;
    2009           0 :   if (Opc == ARM::t2B)
    2010             :     return ARM::t2Bcc;
    2011             : 
    2012           0 :   llvm_unreachable("Unknown unconditional branch opcode!");
    2013             : }
    2014             : 
    2015       10608 : MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI,
    2016             :                                                        bool NewMI,
    2017             :                                                        unsigned OpIdx1,
    2018             :                                                        unsigned OpIdx2) const {
    2019       21216 :   switch (MI.getOpcode()) {
    2020        1011 :   case ARM::MOVCCr:
    2021             :   case ARM::t2MOVCCr: {
    2022             :     // MOVCC can be commuted by inverting the condition.
    2023        1011 :     unsigned PredReg = 0;
    2024        1011 :     ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
    2025             :     // MOVCC AL can't be inverted. Shouldn't happen.
    2026        1011 :     if (CC == ARMCC::AL || PredReg != ARM::CPSR)
    2027             :       return nullptr;
    2028             :     MachineInstr *CommutedMI =
    2029        1011 :         TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
    2030        1011 :     if (!CommutedMI)
    2031             :       return nullptr;
    2032             :     // After swapping the MOVCC operands, also invert the condition.
    2033        1011 :     CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
    2034        1011 :         .setImm(ARMCC::getOppositeCondition(CC));
    2035        1011 :     return CommutedMI;
    2036             :   }
    2037             :   }
    2038        9597 :   return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
    2039             : }
    2040             : 
    2041             : /// Identify instructions that can be folded into a MOVCC instruction, and
    2042             : /// return the defining instruction.
    2043           0 : static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
    2044             :                                       const MachineRegisterInfo &MRI,
    2045             :                                       const TargetInstrInfo *TII) {
    2046           0 :   if (!TargetRegisterInfo::isVirtualRegister(Reg))
    2047           0 :     return nullptr;
    2048           0 :   if (!MRI.hasOneNonDBGUse(Reg))
    2049           0 :     return nullptr;
    2050           0 :   MachineInstr *MI = MRI.getVRegDef(Reg);
    2051           0 :   if (!MI)
    2052           0 :     return nullptr;
    2053             :   // MI is folded into the MOVCC by predicating it.
    2054           0 :   if (!MI->isPredicable())
    2055           0 :     return nullptr;
    2056             :   // Check if MI has any non-dead defs or physreg uses. This also detects
    2057             :   // predicated instructions which will be reading CPSR.
    2058           0 :   for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
    2059           0 :     const MachineOperand &MO = MI->getOperand(i);
    2060             :     // Reject frame index operands, PEI can't handle the predicated pseudos.
    2061           0 :     if (MO.isFI() || MO.isCPI() || MO.isJTI())
    2062           0 :       return nullptr;
    2063           0 :     if (!MO.isReg())
    2064           0 :       continue;
    2065             :     // MI can't have any tied operands, that would conflict with predication.
    2066           0 :     if (MO.isTied())
    2067           0 :       return nullptr;
    2068           0 :     if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
    2069           0 :       return nullptr;
    2070           0 :     if (MO.isDef() && !MO.isDead())
    2071           0 :       return nullptr;
    2072             :   }
    2073           0 :   bool DontMoveAcrossStores = true;
    2074           0 :   if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
    2075           0 :     return nullptr;
    2076             :   return MI;
    2077             : }
    2078             : 
    2079         467 : bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr &MI,
    2080             :                                      SmallVectorImpl<MachineOperand> &Cond,
    2081             :                                      unsigned &TrueOp, unsigned &FalseOp,
    2082             :                                      bool &Optimizable) const {
    2083             :   assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
    2084             :          "Unknown select instruction");
    2085             :   // MOVCC operands:
    2086             :   // 0: Def.
    2087             :   // 1: True use.
    2088             :   // 2: False use.
    2089             :   // 3: Condition code.
    2090             :   // 4: CPSR use.
    2091         467 :   TrueOp = 1;
    2092         467 :   FalseOp = 2;
    2093         934 :   Cond.push_back(MI.getOperand(3));
    2094         934 :   Cond.push_back(MI.getOperand(4));
    2095             :   // We can always fold a def.
    2096         467 :   Optimizable = true;
    2097         467 :   return false;
    2098             : }
    2099             : 
    2100             : MachineInstr *
    2101         467 : ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI,
    2102             :                                  SmallPtrSetImpl<MachineInstr *> &SeenMIs,
    2103             :                                  bool PreferFalse) const {
    2104             :   assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
    2105             :          "Unknown select instruction");
    2106         467 :   MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
    2107         467 :   MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this);
    2108             :   bool Invert = !DefMI;
    2109         467 :   if (!DefMI)
    2110         397 :     DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
    2111         467 :   if (!DefMI)
    2112             :     return nullptr;
    2113             : 
    2114             :   // Find new register class to use.
    2115         238 :   MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
    2116         168 :   unsigned DestReg = MI.getOperand(0).getReg();
    2117         168 :   const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
    2118         168 :   if (!MRI.constrainRegClass(DestReg, PreviousClass))
    2119             :     return nullptr;
    2120             : 
    2121             :   // Create a new predicated version of DefMI.
    2122             :   // Rfalse is the first use.
    2123             :   MachineInstrBuilder NewMI =
    2124         336 :       BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
    2125             : 
    2126             :   // Copy all the DefMI operands, excluding its (null) predicate.
    2127         168 :   const MCInstrDesc &DefDesc = DefMI->getDesc();
    2128         452 :   for (unsigned i = 1, e = DefDesc.getNumOperands();
    2129         452 :        i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
    2130         284 :     NewMI.add(DefMI->getOperand(i));
    2131             : 
    2132         168 :   unsigned CondCode = MI.getOperand(3).getImm();
    2133         168 :   if (Invert)
    2134          98 :     NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode)));
    2135             :   else
    2136             :     NewMI.addImm(CondCode);
    2137         168 :   NewMI.add(MI.getOperand(4));
    2138             : 
    2139             :   // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
    2140         168 :   if (NewMI->hasOptionalDef())
    2141         156 :     NewMI.add(condCodeOp());
    2142             : 
    2143             :   // The output register value when the predicate is false is an implicit
    2144             :   // register operand tied to the first def.
    2145             :   // The tie makes the register allocator ensure the FalseReg is allocated the
    2146             :   // same register as operand 0.
    2147             :   FalseReg.setImplicit();
    2148             :   NewMI.add(FalseReg);
    2149         168 :   NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
    2150             : 
    2151             :   // Update SeenMIs set: register newly created MI and erase removed DefMI.
    2152         168 :   SeenMIs.insert(NewMI);
    2153             :   SeenMIs.erase(DefMI);
    2154             : 
    2155             :   // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
    2156             :   // DefMI would be invalid when tranferred inside the loop.  Checking for a
    2157             :   // loop is expensive, but at least remove kill flags if they are in different
    2158             :   // BBs.
    2159         168 :   if (DefMI->getParent() != MI.getParent())
    2160          17 :     NewMI->clearKillInfo();
    2161             : 
    2162             :   // The caller will erase MI, but not DefMI.
    2163         168 :   DefMI->eraseFromParent();
    2164         168 :   return NewMI;
    2165             : }
    2166             : 
    2167             : /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
    2168             : /// instruction is encoded with an 'S' bit is determined by the optional CPSR
    2169             : /// def operand.
    2170             : ///
    2171             : /// This will go away once we can teach tblgen how to set the optional CPSR def
    2172             : /// operand itself.
    2173             : struct AddSubFlagsOpcodePair {
    2174             :   uint16_t PseudoOpc;
    2175             :   uint16_t MachineOpc;
    2176             : };
    2177             : 
    2178             : static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
    2179             :   {ARM::ADDSri, ARM::ADDri},
    2180             :   {ARM::ADDSrr, ARM::ADDrr},
    2181             :   {ARM::ADDSrsi, ARM::ADDrsi},
    2182             :   {ARM::ADDSrsr, ARM::ADDrsr},
    2183             : 
    2184             :   {ARM::SUBSri, ARM::SUBri},
    2185             :   {ARM::SUBSrr, ARM::SUBrr},
    2186             :   {ARM::SUBSrsi, ARM::SUBrsi},
    2187             :   {ARM::SUBSrsr, ARM::SUBrsr},
    2188             : 
    2189             :   {ARM::RSBSri, ARM::RSBri},
    2190             :   {ARM::RSBSrsi, ARM::RSBrsi},
    2191             :   {ARM::RSBSrsr, ARM::RSBrsr},
    2192             : 
    2193             :   {ARM::tADDSi3, ARM::tADDi3},
    2194             :   {ARM::tADDSi8, ARM::tADDi8},
    2195             :   {ARM::tADDSrr, ARM::tADDrr},
    2196             :   {ARM::tADCS, ARM::tADC},
    2197             : 
    2198             :   {ARM::tSUBSi3, ARM::tSUBi3},
    2199             :   {ARM::tSUBSi8, ARM::tSUBi8},
    2200             :   {ARM::tSUBSrr, ARM::tSUBrr},
    2201             :   {ARM::tSBCS, ARM::tSBC},
    2202             : 
    2203             :   {ARM::t2ADDSri, ARM::t2ADDri},
    2204             :   {ARM::t2ADDSrr, ARM::t2ADDrr},
    2205             :   {ARM::t2ADDSrs, ARM::t2ADDrs},
    2206             : 
    2207             :   {ARM::t2SUBSri, ARM::t2SUBri},
    2208             :   {ARM::t2SUBSrr, ARM::t2SUBrr},
    2209             :   {ARM::t2SUBSrs, ARM::t2SUBrs},
    2210             : 
    2211             :   {ARM::t2RSBSri, ARM::t2RSBri},
    2212             :   {ARM::t2RSBSrs, ARM::t2RSBrs},
    2213             : };
    2214             : 
    2215     1315052 : unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
    2216    36811350 :   for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i)
    2217    35496951 :     if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc)
    2218         653 :       return AddSubFlagsOpcodeMap[i].MachineOpc;
    2219             :   return 0;
    2220             : }
    2221             : 
    2222        2953 : void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
    2223             :                                    MachineBasicBlock::iterator &MBBI,
    2224             :                                    const DebugLoc &dl, unsigned DestReg,
    2225             :                                    unsigned BaseReg, int NumBytes,
    2226             :                                    ARMCC::CondCodes Pred, unsigned PredReg,
    2227             :                                    const ARMBaseInstrInfo &TII,
    2228             :                                    unsigned MIFlags) {
    2229        2953 :   if (NumBytes == 0 && DestReg != BaseReg) {
    2230         616 :     BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
    2231         308 :         .addReg(BaseReg, RegState::Kill)
    2232         308 :         .add(predOps(Pred, PredReg))
    2233         308 :         .add(condCodeOp())
    2234             :         .setMIFlags(MIFlags);
    2235         308 :     return;
    2236             :   }
    2237             : 
    2238             :   bool isSub = NumBytes < 0;
    2239        2645 :   if (isSub) NumBytes = -NumBytes;
    2240             : 
    2241        5346 :   while (NumBytes) {
    2242        2701 :     unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
    2243        2701 :     unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
    2244             :     assert(ThisVal && "Didn't extract field correctly");
    2245             : 
    2246             :     // We will handle these bits from offset, clear them.
    2247        2701 :     NumBytes &= ~ThisVal;
    2248             : 
    2249             :     assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
    2250             : 
    2251             :     // Build the new ADD / SUB.
    2252        2701 :     unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
    2253        5402 :     BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
    2254        2701 :         .addReg(BaseReg, RegState::Kill)
    2255        2701 :         .addImm(ThisVal)
    2256        2701 :         .add(predOps(Pred, PredReg))
    2257        2701 :         .add(condCodeOp())
    2258             :         .setMIFlags(MIFlags);
    2259             :     BaseReg = DestReg;
    2260             :   }
    2261             : }
    2262             : 
    2263        3117 : bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
    2264             :                                       MachineFunction &MF, MachineInstr *MI,
    2265             :                                       unsigned NumBytes) {
    2266             :   // This optimisation potentially adds lots of load and store
    2267             :   // micro-operations, it's only really a great benefit to code-size.
    2268        3117 :   if (!MF.getFunction().optForMinSize())
    2269             :     return false;
    2270             : 
    2271             :   // If only one register is pushed/popped, LLVM can use an LDR/STR
    2272             :   // instead. We can't modify those so make sure we're dealing with an
    2273             :   // instruction we understand.
    2274         174 :   bool IsPop = isPopOpcode(MI->getOpcode());
    2275             :   bool IsPush = isPushOpcode(MI->getOpcode());
    2276          87 :   if (!IsPush && !IsPop)
    2277             :     return false;
    2278             : 
    2279          83 :   bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
    2280             :                       MI->getOpcode() == ARM::VLDMDIA_UPD;
    2281          72 :   bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
    2282         153 :                      MI->getOpcode() == ARM::tPOP ||
    2283             :                      MI->getOpcode() == ARM::tPOP_RET;
    2284             : 
    2285             :   assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
    2286             :                           MI->getOperand(1).getReg() == ARM::SP)) &&
    2287             :          "trying to fold sp update into non-sp-updating push/pop");
    2288             : 
    2289             :   // The VFP push & pop act on D-registers, so we can only fold an adjustment
    2290             :   // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
    2291             :   // if this is violated.
    2292         144 :   if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
    2293             :     return false;
    2294             : 
    2295             :   // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
    2296             :   // pred) so the list starts at 4. Thumb1 starts after the predicate.
    2297          81 :   int RegListIdx = IsT1PushPop ? 2 : 4;
    2298             : 
    2299             :   // Calculate the space we'll need in terms of registers.
    2300             :   unsigned RegsNeeded;
    2301             :   const TargetRegisterClass *RegClass;
    2302          81 :   if (IsVFPPushPop) {
    2303          20 :     RegsNeeded = NumBytes / 8;
    2304             :     RegClass = &ARM::DPRRegClass;
    2305             :   } else {
    2306          61 :     RegsNeeded = NumBytes / 4;
    2307             :     RegClass = &ARM::GPRRegClass;
    2308             :   }
    2309             : 
    2310             :   // We're going to have to strip all list operands off before
    2311             :   // re-adding them since the order matters, so save the existing ones
    2312             :   // for later.
    2313             :   SmallVector<MachineOperand, 4> RegList;
    2314             : 
    2315             :   // We're also going to need the first register transferred by this
    2316             :   // instruction, which won't necessarily be the first register in the list.
    2317             :   unsigned FirstRegEnc = -1;
    2318             : 
    2319          81 :   const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo();
    2320         332 :   for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
    2321         251 :     MachineOperand &MO = MI->getOperand(i);
    2322         251 :     RegList.push_back(MO);
    2323             : 
    2324         251 :     if (MO.isReg() && TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
    2325             :       FirstRegEnc = TRI->getEncodingValue(MO.getReg());
    2326             :   }
    2327             : 
    2328          81 :   const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
    2329             : 
    2330             :   // Now try to find enough space in the reglist to allocate NumBytes.
    2331         396 :   for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
    2332             :        --CurRegEnc) {
    2333         319 :     unsigned CurReg = RegClass->getRegister(CurRegEnc);
    2334         319 :     if (!IsPop) {
    2335             :       // Pushing any register is completely harmless, mark the register involved
    2336             :       // as undef since we don't care about its value and must not restore it
    2337             :       // during stack unwinding.
    2338         157 :       RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
    2339             :                                                   false, false, true));
    2340         157 :       --RegsNeeded;
    2341         157 :       continue;
    2342             :     }
    2343             : 
    2344             :     // However, we can only pop an extra register if it's not live. For
    2345             :     // registers live within the function we might clobber a return value
    2346             :     // register; the other way a register can be live here is if it's
    2347             :     // callee-saved.
    2348         269 :     if (isCalleeSavedRegister(CurReg, CSRegs) ||
    2349         317 :         MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
    2350             :         MachineBasicBlock::LQR_Dead) {
    2351             :       // VFP pops don't allow holes in the register list, so any skip is fatal
    2352             :       // for our transformation. GPR pops do, so we should just keep looking.
    2353          59 :       if (IsVFPPushPop)
    2354             :         return false;
    2355             :       else
    2356             :         continue;
    2357             :     }
    2358             : 
    2359             :     // Mark the unimportant registers as <def,dead> in the POP.
    2360         103 :     RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
    2361             :                                                 true));
    2362         103 :     --RegsNeeded;
    2363             :   }
    2364             : 
    2365          77 :   if (RegsNeeded > 0)
    2366             :     return false;
    2367             : 
    2368             :   // Finally we know we can profitably perform the optimisation so go
    2369             :   // ahead: strip all existing registers off and add them back again
    2370             :   // in the right order.
    2371         207 :   for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
    2372         154 :     MI->RemoveOperand(i);
    2373             : 
    2374             :   // Add the complete list back in.
    2375             :   MachineInstrBuilder MIB(MF, &*MI);
    2376         360 :   for (int i = RegList.size() - 1; i >= 0; --i)
    2377         307 :     MIB.add(RegList[i]);
    2378             : 
    2379             :   return true;
    2380             : }
    2381             : 
    2382        8000 : bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
    2383             :                                 unsigned FrameReg, int &Offset,
    2384             :                                 const ARMBaseInstrInfo &TII) {
    2385        8000 :   unsigned Opcode = MI.getOpcode();
    2386             :   const MCInstrDesc &Desc = MI.getDesc();
    2387        8000 :   unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
    2388             :   bool isSub = false;
    2389             : 
    2390             :   // Memory operands in inline assembly always use AddrMode2.
    2391        8000 :   if (Opcode == ARM::INLINEASM)
    2392             :     AddrMode = ARMII::AddrMode2;
    2393             : 
    2394        8000 :   if (Opcode == ARM::ADDri) {
    2395        1265 :     Offset += MI.getOperand(FrameRegIdx+1).getImm();
    2396        1265 :     if (Offset == 0) {
    2397             :       // Turn it into a move.
    2398         166 :       MI.setDesc(TII.get(ARM::MOVr));
    2399         166 :       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
    2400         166 :       MI.RemoveOperand(FrameRegIdx+1);
    2401         166 :       Offset = 0;
    2402         166 :       return true;
    2403        1099 :     } else if (Offset < 0) {
    2404          51 :       Offset = -Offset;
    2405             :       isSub = true;
    2406          51 :       MI.setDesc(TII.get(ARM::SUBri));
    2407             :     }
    2408             : 
    2409             :     // Common case: small offset, fits into instruction.
    2410        1099 :     if (ARM_AM::getSOImmVal(Offset) != -1) {
    2411             :       // Replace the FrameIndex with sp / fp
    2412         777 :       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
    2413        1554 :       MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
    2414         777 :       Offset = 0;
    2415         777 :       return true;
    2416             :     }
    2417             : 
    2418             :     // Otherwise, pull as much of the immedidate into this ADDri/SUBri
    2419             :     // as possible.
    2420         322 :     unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
    2421         322 :     unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
    2422             : 
    2423             :     // We will handle these bits from offset, clear them.
    2424         322 :     Offset &= ~ThisImmVal;
    2425             : 
    2426             :     // Get the properly encoded SOImmVal field.
    2427             :     assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
    2428             :            "Bit extraction didn't work?");
    2429         322 :     MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
    2430             :  } else {
    2431             :     unsigned ImmIdx = 0;
    2432             :     int InstrOffs = 0;
    2433             :     unsigned NumBits = 0;
    2434             :     unsigned Scale = 1;
    2435        6735 :     switch (AddrMode) {
    2436        6000 :     case ARMII::AddrMode_i12:
    2437        6000 :       ImmIdx = FrameRegIdx + 1;
    2438        6000 :       InstrOffs = MI.getOperand(ImmIdx).getImm();
    2439             :       NumBits = 12;
    2440        6000 :       break;
    2441           0 :     case ARMII::AddrMode2:
    2442           0 :       ImmIdx = FrameRegIdx+2;
    2443           0 :       InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
    2444             :       if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
    2445           0 :         InstrOffs *= -1;
    2446             :       NumBits = 12;
    2447             :       break;
    2448          70 :     case ARMII::AddrMode3:
    2449          70 :       ImmIdx = FrameRegIdx+2;
    2450         210 :       InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
    2451             :       if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
    2452           0 :         InstrOffs *= -1;
    2453             :       NumBits = 8;
    2454             :       break;
    2455             :     case ARMII::AddrMode4:
    2456             :     case ARMII::AddrMode6:
    2457             :       // Can't fold any offset even if it's zero.
    2458             :       return false;
    2459         614 :     case ARMII::AddrMode5:
    2460         614 :       ImmIdx = FrameRegIdx+1;
    2461        1842 :       InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
    2462             :       if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
    2463           0 :         InstrOffs *= -1;
    2464             :       NumBits = 8;
    2465             :       Scale = 4;
    2466             :       break;
    2467          22 :     case ARMII::AddrMode5FP16:
    2468          22 :       ImmIdx = FrameRegIdx+1;
    2469          66 :       InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
    2470             :       if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
    2471           0 :         InstrOffs *= -1;
    2472             :       NumBits = 8;
    2473             :       Scale = 2;
    2474             :       break;
    2475           0 :     default:
    2476           0 :       llvm_unreachable("Unsupported addressing mode!");
    2477             :     }
    2478             : 
    2479        6706 :     Offset += InstrOffs * Scale;
    2480             :     assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
    2481        6706 :     if (Offset < 0) {
    2482         215 :       Offset = -Offset;
    2483             :       isSub = true;
    2484             :     }
    2485             : 
    2486             :     // Attempt to fold address comp. if opcode has offset bits
    2487             :     if (NumBits > 0) {
    2488             :       // Common case: small offset, fits into instruction.
    2489        6706 :       MachineOperand &ImmOp = MI.getOperand(ImmIdx);
    2490        6706 :       int ImmedOffset = Offset / Scale;
    2491        6706 :       unsigned Mask = (1 << NumBits) - 1;
    2492        6706 :       if ((unsigned)Offset <= Mask * Scale) {
    2493             :         // Replace the FrameIndex with sp
    2494        6660 :         MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
    2495             :         // FIXME: When addrmode2 goes away, this will simplify (like the
    2496             :         // T2 version), as the LDR.i12 versions don't need the encoding
    2497             :         // tricks for the offset value.
    2498        6660 :         if (isSub) {
    2499         215 :           if (AddrMode == ARMII::AddrMode_i12)
    2500         169 :             ImmedOffset = -ImmedOffset;
    2501             :           else
    2502          46 :             ImmedOffset |= 1 << NumBits;
    2503             :         }
    2504        6660 :         ImmOp.ChangeToImmediate(ImmedOffset);
    2505        6660 :         Offset = 0;
    2506        6660 :         return true;
    2507             :       }
    2508             : 
    2509             :       // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
    2510          46 :       ImmedOffset = ImmedOffset & Mask;
    2511          46 :       if (isSub) {
    2512           0 :         if (AddrMode == ARMII::AddrMode_i12)
    2513           0 :           ImmedOffset = -ImmedOffset;
    2514             :         else
    2515           0 :           ImmedOffset |= 1 << NumBits;
    2516             :       }
    2517          46 :       ImmOp.ChangeToImmediate(ImmedOffset);
    2518          46 :       Offset &= ~(Mask*Scale);
    2519             :     }
    2520             :   }
    2521             : 
    2522         368 :   Offset = (isSub) ? -Offset : Offset;
    2523         368 :   return Offset == 0;
    2524             : }
    2525             : 
    2526             : /// analyzeCompare - For a comparison instruction, return the source registers
    2527             : /// in SrcReg and SrcReg2 if having two register operands, and the value it
    2528             : /// compares against in CmpValue. Return true if the comparison instruction
    2529             : /// can be analyzed.
    2530       37293 : bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
    2531             :                                       unsigned &SrcReg2, int &CmpMask,
    2532             :                                       int &CmpValue) const {
    2533       74586 :   switch (MI.getOpcode()) {
    2534             :   default: break;
    2535        5024 :   case ARM::CMPri:
    2536             :   case ARM::t2CMPri:
    2537             :   case ARM::tCMPi8:
    2538        5024 :     SrcReg = MI.getOperand(0).getReg();
    2539        5024 :     SrcReg2 = 0;
    2540        5024 :     CmpMask = ~0;
    2541        5024 :     CmpValue = MI.getOperand(1).getImm();
    2542        5024 :     return true;
    2543         891 :   case ARM::CMPrr:
    2544             :   case ARM::t2CMPrr:
    2545         891 :     SrcReg = MI.getOperand(0).getReg();
    2546         891 :     SrcReg2 = MI.getOperand(1).getReg();
    2547         891 :     CmpMask = ~0;
    2548         891 :     CmpValue = 0;
    2549         891 :     return true;
    2550         267 :   case ARM::TSTri:
    2551             :   case ARM::t2TSTri:
    2552         267 :     SrcReg = MI.getOperand(0).getReg();
    2553         267 :     SrcReg2 = 0;
    2554         267 :     CmpMask = MI.getOperand(1).getImm();
    2555         267 :     CmpValue = 0;
    2556         267 :     return true;
    2557             :   }
    2558             : 
    2559             :   return false;
    2560             : }
    2561             : 
    2562             : /// isSuitableForMask - Identify a suitable 'and' instruction that
    2563             : /// operates on the given source register and applies the same mask
    2564             : /// as a 'tst' instruction. Provide a limited look-through for copies.
    2565             : /// When successful, MI will hold the found instruction.
    2566             : static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
    2567             :                               int CmpMask, bool CommonUse) {
    2568         194 :   switch (MI->getOpcode()) {
    2569           0 :     case ARM::ANDri:
    2570             :     case ARM::t2ANDri:
    2571           0 :       if (CmpMask != MI->getOperand(2).getImm())
    2572             :         return false;
    2573           0 :       if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
    2574             :         return true;
    2575             :       break;
    2576             :   }
    2577             : 
    2578             :   return false;
    2579             : }
    2580             : 
    2581             : /// getSwappedCondition - assume the flags are set by MI(a,b), return
    2582             : /// the condition code if we modify the instructions such that flags are
    2583             : /// set by MI(b,a).
    2584             : inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) {
    2585             :   switch (CC) {
    2586             :   default: return ARMCC::AL;
    2587             :   case ARMCC::EQ: return ARMCC::EQ;
    2588             :   case ARMCC::NE: return ARMCC::NE;
    2589             :   case ARMCC::HS: return ARMCC::LS;
    2590             :   case ARMCC::LO: return ARMCC::HI;
    2591             :   case ARMCC::HI: return ARMCC::LO;
    2592             :   case ARMCC::LS: return ARMCC::HS;
    2593             :   case ARMCC::GE: return ARMCC::LE;
    2594             :   case ARMCC::LT: return ARMCC::GT;
    2595             :   case ARMCC::GT: return ARMCC::LT;
    2596             :   case ARMCC::LE: return ARMCC::GE;
    2597             :   }
    2598             : }
    2599             : 
    2600             : /// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
    2601             : /// the condition code if we modify the instructions such that flags are
    2602             : /// set by ADD(a,b,X).
    2603             : inline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) {
    2604             :   switch (CC) {
    2605             :   default: return ARMCC::AL;
    2606             :   case ARMCC::HS: return ARMCC::LO;
    2607             :   case ARMCC::LO: return ARMCC::HS;
    2608             :   case ARMCC::VS: return ARMCC::VS;
    2609             :   case ARMCC::VC: return ARMCC::VC;
    2610             :   }
    2611             : }
    2612             : 
    2613             : /// isRedundantFlagInstr - check whether the first instruction, whose only
    2614             : /// purpose is to update flags, can be made redundant.
    2615             : /// CMPrr can be made redundant by SUBrr if the operands are the same.
    2616             : /// CMPri can be made redundant by SUBri if the operands are the same.
    2617             : /// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
    2618             : /// This function can be extended later on.
    2619        5833 : inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
    2620             :                                         unsigned SrcReg, unsigned SrcReg2,
    2621             :                                         int ImmValue, const MachineInstr *OI) {
    2622        5833 :   if ((CmpI->getOpcode() == ARM::CMPrr ||
    2623        1376 :        CmpI->getOpcode() == ARM::t2CMPrr) &&
    2624        1376 :       (OI->getOpcode() == ARM::SUBrr ||
    2625        5833 :        OI->getOpcode() == ARM::t2SUBrr) &&
    2626          50 :       ((OI->getOperand(1).getReg() == SrcReg &&
    2627          50 :         OI->getOperand(2).getReg() == SrcReg2) ||
    2628          14 :        (OI->getOperand(1).getReg() == SrcReg2 &&
    2629          14 :         OI->getOperand(2).getReg() == SrcReg)))
    2630             :     return true;
    2631             : 
    2632        4348 :   if ((CmpI->getOpcode() == ARM::CMPri ||
    2633        3832 :        CmpI->getOpcode() == ARM::t2CMPri) &&
    2634        3832 :       (OI->getOpcode() == ARM::SUBri ||
    2635         239 :        OI->getOpcode() == ARM::t2SUBri) &&
    2636        6040 :       OI->getOperand(1).getReg() == SrcReg &&
    2637          18 :       OI->getOperand(2).getImm() == ImmValue)
    2638             :     return true;
    2639             : 
    2640        5792 :   if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
    2641        1344 :       (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
    2642        1344 :        OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
    2643          80 :       OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
    2644        5867 :       OI->getOperand(0).getReg() == SrcReg &&
    2645          41 :       OI->getOperand(1).getReg() == SrcReg2)
    2646          10 :     return true;
    2647             :   return false;
    2648             : }
    2649             : 
    2650        1464 : static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
    2651        2928 :   switch (MI->getOpcode()) {
    2652             :   default: return false;
    2653          66 :   case ARM::tLSLri:
    2654             :   case ARM::tLSRri:
    2655             :   case ARM::tLSLrr:
    2656             :   case ARM::tLSRrr:
    2657             :   case ARM::tSUBrr:
    2658             :   case ARM::tADDrr:
    2659             :   case ARM::tADDi3:
    2660             :   case ARM::tADDi8:
    2661             :   case ARM::tSUBi3:
    2662             :   case ARM::tSUBi8:
    2663             :   case ARM::tMUL:
    2664          66 :     IsThumb1 = true;
    2665             :     LLVM_FALLTHROUGH;
    2666             :   case ARM::RSBrr:
    2667             :   case ARM::RSBri:
    2668             :   case ARM::RSCrr:
    2669             :   case ARM::RSCri:
    2670             :   case ARM::ADDrr:
    2671             :   case ARM::ADDri:
    2672             :   case ARM::ADCrr:
    2673             :   case ARM::ADCri:
    2674             :   case ARM::SUBrr:
    2675             :   case ARM::SUBri:
    2676             :   case ARM::SBCrr:
    2677             :   case ARM::SBCri:
    2678             :   case ARM::t2RSBri:
    2679             :   case ARM::t2ADDrr:
    2680             :   case ARM::t2ADDri:
    2681             :   case ARM::t2ADCrr:
    2682             :   case ARM::t2ADCri:
    2683             :   case ARM::t2SUBrr:
    2684             :   case ARM::t2SUBri:
    2685             :   case ARM::t2SBCrr:
    2686             :   case ARM::t2SBCri:
    2687             :   case ARM::ANDrr:
    2688             :   case ARM::ANDri:
    2689             :   case ARM::t2ANDrr:
    2690             :   case ARM::t2ANDri:
    2691             :   case ARM::ORRrr:
    2692             :   case ARM::ORRri:
    2693             :   case ARM::t2ORRrr:
    2694             :   case ARM::t2ORRri:
    2695             :   case ARM::EORrr:
    2696             :   case ARM::EORri:
    2697             :   case ARM::t2EORrr:
    2698             :   case ARM::t2EORri:
    2699             :   case ARM::t2LSRri:
    2700             :   case ARM::t2LSRrr:
    2701             :   case ARM::t2LSLri:
    2702             :   case ARM::t2LSLrr:
    2703             :     return true;
    2704             :   }
    2705             : }
    2706             : 
    2707             : /// optimizeCompareInstr - Convert the instruction supplying the argument to the
    2708             : /// comparison into one that sets the zero bit in the flags register;
    2709             : /// Remove a redundant Compare instruction if an earlier instruction can set the
    2710             : /// flags in the same way as Compare.
    2711             : /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
    2712             : /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
    2713             : /// condition code of instructions which use the flags.
    2714        2706 : bool ARMBaseInstrInfo::optimizeCompareInstr(
    2715             :     MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
    2716             :     int CmpValue, const MachineRegisterInfo *MRI) const {
    2717             :   // Get the unique definition of SrcReg.
    2718        2706 :   MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
    2719        2706 :   if (!MI) return false;
    2720             : 
    2721             :   // Masked compares sometimes use the same register as the corresponding 'and'.
    2722        2706 :   if (CmpMask != ~0) {
    2723           0 :     if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) {
    2724             :       MI = nullptr;
    2725          67 :       for (MachineRegisterInfo::use_instr_iterator
    2726          47 :            UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
    2727         181 :            UI != UE; ++UI) {
    2728          67 :         if (UI->getParent() != CmpInstr.getParent())
    2729             :           continue;
    2730             :         MachineInstr *PotentialAND = &*UI;
    2731           0 :         if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
    2732           0 :             isPredicated(*PotentialAND))
    2733          50 :           continue;
    2734             :         MI = PotentialAND;
    2735             :         break;
    2736             :       }
    2737          47 :       if (!MI) return false;
    2738             :     }
    2739             :   }
    2740             : 
    2741             :   // Get ready to iterate backward from CmpInstr.
    2742             :   MachineBasicBlock::iterator I = CmpInstr, E = MI,
    2743        2659 :                               B = CmpInstr.getParent()->begin();
    2744             : 
    2745             :   // Early exit if CmpInstr is at the beginning of the BB.
    2746        2659 :   if (I == B) return false;
    2747             : 
    2748             :   // There are two possible candidates which can be changed to set CPSR:
    2749             :   // One is MI, the other is a SUB or ADD instruction.
    2750             :   // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
    2751             :   // ADDr[ri](r1, r2, X).
    2752             :   // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
    2753             :   MachineInstr *SubAdd = nullptr;
    2754        2490 :   if (SrcReg2 != 0)
    2755             :     // MI is not a candidate for CMPrr.
    2756             :     MI = nullptr;
    2757        2026 :   else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
    2758             :     // Conservatively refuse to convert an instruction which isn't in the same
    2759             :     // BB as the comparison.
    2760             :     // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
    2761             :     // Thus we cannot return here.
    2762        1124 :     if (CmpInstr.getOpcode() == ARM::CMPri ||
    2763             :         CmpInstr.getOpcode() == ARM::t2CMPri)
    2764             :       MI = nullptr;
    2765             :     else
    2766             :       return false;
    2767             :   }
    2768             : 
    2769         960 :   bool IsThumb1 = false;
    2770        1464 :   if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
    2771             :     return false;
    2772             : 
    2773             :   // We also want to do this peephole for cases like this: if (a*b == 0),
    2774             :   // and optimise away the CMP instruction from the generated code sequence:
    2775             :   // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
    2776             :   // resulting from the select instruction, but these MOVS instructions for
    2777             :   // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
    2778             :   // However, if we only have MOVS instructions in between the CMP and the
    2779             :   // other instruction (the MULS in this example), then the CPSR is dead so we
    2780             :   // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
    2781             :   // reordering and then continue the analysis hoping we can eliminate the
    2782             :   // CMP. This peephole works on the vregs, so is still in SSA form. As a
    2783             :   // consequence, the movs won't redefine/kill the MUL operands which would
    2784             :   // make this reordering illegal.
    2785        1317 :   if (MI && IsThumb1) {
    2786             :     --I;
    2787             :     bool CanReorder = true;
    2788             :     const bool HasStmts = I != E;
    2789          70 :     for (; I != E; --I) {
    2790          14 :       if (I->getOpcode() != ARM::tMOVi8) {
    2791             :         CanReorder = false;
    2792             :         break;
    2793             :       }
    2794             :     }
    2795          66 :     if (HasStmts && CanReorder) {
    2796           2 :       MI = MI->removeFromParent();
    2797             :       E = CmpInstr;
    2798           2 :       CmpInstr.getParent()->insert(E, MI);
    2799             :     }
    2800          66 :     I = CmpInstr;
    2801             :     E = MI;
    2802             :   }
    2803             : 
    2804             :   // Check that CPSR isn't set between the comparison instruction and the one we
    2805             :   // want to change. At the same time, search for SubAdd.
    2806        1317 :   const TargetRegisterInfo *TRI = &getRegisterInfo();
    2807             :   do {
    2808             :     const MachineInstr &Instr = *--I;
    2809             : 
    2810             :     // Check whether CmpInstr can be made redundant by the current instruction.
    2811        2357 :     if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr)) {
    2812             :       SubAdd = &*I;
    2813             :       break;
    2814             :     }
    2815             : 
    2816             :     // Allow E (which was initially MI) to be SubAdd but do not search before E.
    2817        2316 :     if (I == E)
    2818             :       break;
    2819             : 
    2820        2354 :     if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
    2821             :         Instr.readsRegister(ARM::CPSR, TRI))
    2822             :       // This instruction modifies or uses CPSR after the one we want to
    2823             :       // change. We can't do this transformation.
    2824          79 :       return false;
    2825             : 
    2826        1111 :   } while (I != B);
    2827             : 
    2828             :   // Return false if no candidates exist.
    2829        1238 :   if (!MI && !SubAdd)
    2830             :     return false;
    2831             : 
    2832             :   // The single candidate is called MI.
    2833         394 :   if (!MI) MI = SubAdd;
    2834             : 
    2835             :   // We can't use a predicated instruction - it doesn't always write the flags.
    2836         394 :   if (isPredicated(*MI))
    2837             :     return false;
    2838             : 
    2839             :   // Scan forward for the use of CPSR
    2840             :   // When checking against MI: if it's a conditional code that requires
    2841             :   // checking of the V bit or C bit, then this is not safe to do.
    2842             :   // It is safe to remove CmpInstr if CPSR is redefined or killed.
    2843             :   // If we are done with the basic block, we need to check whether CPSR is
    2844             :   // live-out.
    2845             :   SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
    2846             :       OperandsToUpdate;
    2847             :   bool isSafe = false;
    2848         390 :   I = CmpInstr;
    2849         390 :   E = CmpInstr.getParent()->end();
    2850        2586 :   while (!isSafe && ++I != E) {
    2851             :     const MachineInstr &Instr = *I;
    2852        3856 :     for (unsigned IO = 0, EO = Instr.getNumOperands();
    2853        3856 :          !isSafe && IO != EO; ++IO) {
    2854        2963 :       const MachineOperand &MO = Instr.getOperand(IO);
    2855        2963 :       if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
    2856             :         isSafe = true;
    2857             :         break;
    2858             :       }
    2859        2957 :       if (!MO.isReg() || MO.getReg() != ARM::CPSR)
    2860             :         continue;
    2861         421 :       if (MO.isDef()) {
    2862             :         isSafe = true;
    2863             :         break;
    2864             :       }
    2865             :       // Condition code is after the operand before CPSR except for VSELs.
    2866             :       ARMCC::CondCodes CC;
    2867             :       bool IsInstrVSel = true;
    2868         814 :       switch (Instr.getOpcode()) {
    2869         403 :       default:
    2870             :         IsInstrVSel = false;
    2871         403 :         CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
    2872         403 :         break;
    2873             :       case ARM::VSELEQD:
    2874             :       case ARM::VSELEQS:
    2875             :         CC = ARMCC::EQ;
    2876             :         break;
    2877           0 :       case ARM::VSELGTD:
    2878             :       case ARM::VSELGTS:
    2879             :         CC = ARMCC::GT;
    2880           0 :         break;
    2881           0 :       case ARM::VSELGED:
    2882             :       case ARM::VSELGES:
    2883             :         CC = ARMCC::GE;
    2884           0 :         break;
    2885           0 :       case ARM::VSELVSS:
    2886             :       case ARM::VSELVSD:
    2887             :         CC = ARMCC::VS;
    2888           0 :         break;
    2889             :       }
    2890             : 
    2891         407 :       if (SubAdd) {
    2892             :         // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
    2893             :         // on CMP needs to be updated to be based on SUB.
    2894             :         // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
    2895             :         // needs to be modified.
    2896             :         // Push the condition code operands to OperandsToUpdate.
    2897             :         // If it is safe to remove CmpInstr, the condition code of these
    2898             :         // operands will be modified.
    2899          47 :         unsigned Opc = SubAdd->getOpcode();
    2900          47 :         bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
    2901          47 :                      Opc == ARM::SUBri || Opc == ARM::t2SUBri;
    2902          41 :         if (!IsSub || (SrcReg2 != 0 && SubAdd->getOperand(1).getReg() == SrcReg2 &&
    2903          13 :                        SubAdd->getOperand(2).getReg() == SrcReg)) {
    2904             :           // VSel doesn't support condition code update.
    2905          19 :           if (IsInstrVSel)
    2906             :             return false;
    2907             :           // Ensure we can swap the condition.
    2908          19 :           ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC));
    2909          19 :           if (NewCC == ARMCC::AL)
    2910             :             return false;
    2911          19 :           OperandsToUpdate.push_back(
    2912          38 :               std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
    2913             :         }
    2914             :       } else {
    2915             :         // No SubAdd, so this is x = <op> y, z; cmp x, 0.
    2916             :         switch (CC) {
    2917             :         case ARMCC::EQ: // Z
    2918             :         case ARMCC::NE: // Z
    2919             :         case ARMCC::MI: // N
    2920             :         case ARMCC::PL: // N
    2921             :         case ARMCC::AL: // none
    2922             :           // CPSR can be used multiple times, we should continue.
    2923             :           break;
    2924             :         case ARMCC::HS: // C
    2925             :         case ARMCC::LO: // C
    2926             :         case ARMCC::VS: // V
    2927             :         case ARMCC::VC: // V
    2928             :         case ARMCC::HI: // C Z
    2929             :         case ARMCC::LS: // C Z
    2930             :         case ARMCC::GE: // N V
    2931             :         case ARMCC::LT: // N V
    2932             :         case ARMCC::GT: // Z N V
    2933             :         case ARMCC::LE: // Z N V
    2934             :           // The instruction uses the V bit or C bit which is not safe.
    2935             :           return false;
    2936             :         }
    2937             :       }
    2938             :     }
    2939             :   }
    2940             : 
    2941             :   // If CPSR is not killed nor re-defined, we should check whether it is
    2942             :   // live-out. If it is live-out, do not optimize.
    2943         376 :   if (!isSafe) {
    2944         356 :     MachineBasicBlock *MBB = CmpInstr.getParent();
    2945             :     for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
    2946         911 :              SE = MBB->succ_end(); SI != SE; ++SI)
    2947         558 :       if ((*SI)->isLiveIn(ARM::CPSR))
    2948             :         return false;
    2949             :   }
    2950             : 
    2951             :   // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
    2952             :   // set CPSR so this is represented as an explicit output)
    2953         373 :   if (!IsThumb1) {
    2954         616 :     MI->getOperand(5).setReg(ARM::CPSR);
    2955         616 :     MI->getOperand(5).setIsDef(true);
    2956             :   }
    2957             :   assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
    2958         373 :   CmpInstr.eraseFromParent();
    2959             : 
    2960             :   // Modify the condition code of operands in OperandsToUpdate.
    2961             :   // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
    2962             :   // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
    2963         389 :   for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
    2964          32 :     OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
    2965             : 
    2966             :   return true;
    2967             : }
    2968             : 
    2969       40834 : bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const {
    2970             :   // Do not sink MI if it might be used to optimize a redundant compare.
    2971             :   // We heuristically only look at the instruction immediately following MI to
    2972             :   // avoid potentially searching the entire basic block.
    2973       40834 :   if (isPredicated(MI))
    2974             :     return true;
    2975             :   MachineBasicBlock::const_iterator Next = &MI;
    2976             :   ++Next;
    2977             :   unsigned SrcReg, SrcReg2;
    2978             :   int CmpMask, CmpValue;
    2979       71677 :   if (Next != MI.getParent()->end() &&
    2980       37303 :       analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
    2981        3476 :       isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI))
    2982          10 :     return false;
    2983             :   return true;
    2984             : }
    2985             : 
    2986        5953 : bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
    2987             :                                      unsigned Reg,
    2988             :                                      MachineRegisterInfo *MRI) const {
    2989             :   // Fold large immediates into add, sub, or, xor.
    2990        5953 :   unsigned DefOpc = DefMI.getOpcode();
    2991        5953 :   if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
    2992             :     return false;
    2993        2672 :   if (!DefMI.getOperand(1).isImm())
    2994             :     // Could be t2MOVi32imm @xx
    2995             :     return false;
    2996             : 
    2997         564 :   if (!MRI->hasOneNonDBGUse(Reg))
    2998             :     return false;
    2999             : 
    3000         345 :   const MCInstrDesc &DefMCID = DefMI.getDesc();
    3001         690 :   if (DefMCID.hasOptionalDef()) {
    3002           0 :     unsigned NumOps = DefMCID.getNumOperands();
    3003           0 :     const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
    3004           0 :     if (MO.getReg() == ARM::CPSR && !MO.isDead())
    3005             :       // If DefMI defines CPSR and it is not dead, it's obviously not safe
    3006             :       // to delete DefMI.
    3007             :       return false;
    3008             :   }
    3009             : 
    3010         345 :   const MCInstrDesc &UseMCID = UseMI.getDesc();
    3011         690 :   if (UseMCID.hasOptionalDef()) {
    3012         156 :     unsigned NumOps = UseMCID.getNumOperands();
    3013         312 :     if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
    3014             :       // If the instruction sets the flag, do not attempt this optimization
    3015             :       // since it may change the semantics of the code.
    3016             :       return false;
    3017             :   }
    3018             : 
    3019             :   unsigned UseOpc = UseMI.getOpcode();
    3020             :   unsigned NewUseOpc = 0;
    3021         340 :   uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();
    3022             :   uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
    3023             :   bool Commute = false;
    3024         340 :   switch (UseOpc) {
    3025             :   default: return false;
    3026          32 :   case ARM::SUBrr:
    3027             :   case ARM::ADDrr:
    3028             :   case ARM::ORRrr:
    3029             :   case ARM::EORrr:
    3030             :   case ARM::t2SUBrr:
    3031             :   case ARM::t2ADDrr:
    3032             :   case ARM::t2ORRrr:
    3033             :   case ARM::t2EORrr: {
    3034          32 :     Commute = UseMI.getOperand(2).getReg() != Reg;
    3035             :     switch (UseOpc) {
    3036             :     default: break;
    3037           9 :     case ARM::ADDrr:
    3038             :     case ARM::SUBrr:
    3039           9 :       if (UseOpc == ARM::SUBrr && Commute)
    3040             :         return false;
    3041             : 
    3042             :       // ADD/SUB are special because they're essentially the same operation, so
    3043             :       // we can handle a larger range of immediates.
    3044           9 :       if (ARM_AM::isSOImmTwoPartVal(ImmVal))
    3045           3 :         NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
    3046           6 :       else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
    3047             :         ImmVal = -ImmVal;
    3048           2 :         NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
    3049             :       } else
    3050             :         return false;
    3051             :       SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
    3052             :       SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
    3053           5 :       break;
    3054           2 :     case ARM::ORRrr:
    3055             :     case ARM::EORrr:
    3056           2 :       if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
    3057             :         return false;
    3058             :       SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
    3059             :       SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
    3060             :       switch (UseOpc) {
    3061             :       default: break;
    3062           1 :       case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
    3063           0 :       case ARM::EORrr: NewUseOpc = ARM::EORri; break;
    3064             :       }
    3065             :       break;
    3066          17 :     case ARM::t2ADDrr:
    3067             :     case ARM::t2SUBrr:
    3068          17 :       if (UseOpc == ARM::t2SUBrr && Commute)
    3069             :         return false;
    3070             : 
    3071             :       // ADD/SUB are special because they're essentially the same operation, so
    3072             :       // we can handle a larger range of immediates.
    3073          17 :       if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
    3074           9 :         NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2ADDri : ARM::t2SUBri;
    3075           8 :       else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
    3076             :         ImmVal = -ImmVal;
    3077           2 :         NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2SUBri : ARM::t2ADDri;
    3078             :       } else
    3079             :         return false;
    3080          11 :       SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
    3081             :       SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
    3082          11 :       break;
    3083           4 :     case ARM::t2ORRrr:
    3084             :     case ARM::t2EORrr:
    3085           4 :       if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
    3086             :         return false;
    3087           0 :       SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
    3088             :       SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
    3089             :       switch (UseOpc) {
    3090             :       default: break;
    3091           0 :       case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
    3092           0 :       case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
    3093             :       }
    3094             :       break;
    3095             :     }
    3096             :   }
    3097             :   }
    3098             : 
    3099          17 :   unsigned OpIdx = Commute ? 2 : 1;
    3100          17 :   unsigned Reg1 = UseMI.getOperand(OpIdx).getReg();
    3101             :   bool isKill = UseMI.getOperand(OpIdx).isKill();
    3102          17 :   unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
    3103          34 :   BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
    3104          34 :           NewReg)
    3105          17 :       .addReg(Reg1, getKillRegState(isKill))
    3106          17 :       .addImm(SOImmValV1)
    3107          17 :       .add(predOps(ARMCC::AL))
    3108          17 :       .add(condCodeOp());
    3109          17 :   UseMI.setDesc(get(NewUseOpc));
    3110          34 :   UseMI.getOperand(1).setReg(NewReg);
    3111          17 :   UseMI.getOperand(1).setIsKill();
    3112          34 :   UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
    3113          17 :   DefMI.eraseFromParent();
    3114          17 :   return true;
    3115             : }
    3116             : 
    3117           0 : static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
    3118             :                                         const MachineInstr &MI) {
    3119           0 :   switch (MI.getOpcode()) {
    3120           0 :   default: {
    3121             :     const MCInstrDesc &Desc = MI.getDesc();
    3122           0 :     int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
    3123             :     assert(UOps >= 0 && "bad # UOps");
    3124           0 :     return UOps;
    3125             :   }
    3126             : 
    3127           0 :   case ARM::LDRrs:
    3128             :   case ARM::LDRBrs:
    3129             :   case ARM::STRrs:
    3130             :   case ARM::STRBrs: {
    3131           0 :     unsigned ShOpVal = MI.getOperand(3).getImm();
    3132             :     bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
    3133             :     unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
    3134           0 :     if (!isSub &&
    3135           0 :         (ShImm == 0 ||
    3136           0 :          ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
    3137             :           ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
    3138           0 :       return 1;
    3139             :     return 2;
    3140             :   }
    3141             : 
    3142           0 :   case ARM::LDRH:
    3143             :   case ARM::STRH: {
    3144           0 :     if (!MI.getOperand(2).getReg())
    3145             :       return 1;
    3146             : 
    3147           0 :     unsigned ShOpVal = MI.getOperand(3).getImm();
    3148             :     bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
    3149             :     unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
    3150           0 :     if (!isSub &&
    3151           0 :         (ShImm == 0 ||
    3152           0 :          ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
    3153             :           ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
    3154           0 :       return 1;
    3155             :     return 2;
    3156             :   }
    3157             : 
    3158           0 :   case ARM::LDRSB:
    3159             :   case ARM::LDRSH:
    3160           0 :     return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;
    3161             : 
    3162           0 :   case ARM::LDRSB_POST:
    3163             :   case ARM::LDRSH_POST: {
    3164           0 :     unsigned Rt = MI.getOperand(0).getReg();
    3165           0 :     unsigned Rm = MI.getOperand(3).getReg();
    3166           0 :     return (Rt == Rm) ? 4 : 3;
    3167             :   }
    3168             : 
    3169           0 :   case ARM::LDR_PRE_REG:
    3170             :   case ARM::LDRB_PRE_REG: {
    3171           0 :     unsigned Rt = MI.getOperand(0).getReg();
    3172           0 :     unsigned Rm = MI.getOperand(3).getReg();
    3173           0 :     if (Rt == Rm)
    3174             :       return 3;
    3175           0 :     unsigned ShOpVal = MI.getOperand(4).getImm();
    3176             :     bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
    3177             :     unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
    3178           0 :     if (!isSub &&
    3179           0 :         (ShImm == 0 ||
    3180           0 :          ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
    3181             :           ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
    3182           0 :       return 2;
    3183             :     return 3;
    3184             :   }
    3185             : 
    3186           0 :   case ARM::STR_PRE_REG:
    3187             :   case ARM::STRB_PRE_REG: {
    3188           0 :     unsigned ShOpVal = MI.getOperand(4).getImm();
    3189             :     bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
    3190             :     unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
    3191           0 :     if (!isSub &&
    3192           0 :         (ShImm == 0 ||
    3193           0 :          ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
    3194             :           ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
    3195           0 :       return 2;
    3196             :     return 3;
    3197             :   }
    3198             : 
    3199           0 :   case ARM::LDRH_PRE:
    3200             :   case ARM::STRH_PRE: {
    3201           0 :     unsigned Rt = MI.getOperand(0).getReg();
    3202           0 :     unsigned Rm = MI.getOperand(3).getReg();
    3203           0 :     if (!Rm)
    3204             :       return 2;
    3205           0 :     if (Rt == Rm)
    3206             :       return 3;
    3207           0 :     return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;
    3208             :   }
    3209             : 
    3210           0 :   case ARM::LDR_POST_REG:
    3211             :   case ARM::LDRB_POST_REG:
    3212             :   case ARM::LDRH_POST: {
    3213           0 :     unsigned Rt = MI.getOperand(0).getReg();
    3214           0 :     unsigned Rm = MI.getOperand(3).getReg();
    3215           0 :     return (Rt == Rm) ? 3 : 2;
    3216             :   }
    3217             : 
    3218             :   case ARM::LDR_PRE_IMM:
    3219             :   case ARM::LDRB_PRE_IMM:
    3220             :   case ARM::LDR_POST_IMM:
    3221             :   case ARM::LDRB_POST_IMM:
    3222             :   case ARM::STRB_POST_IMM:
    3223             :   case ARM::STRB_POST_REG:
    3224             :   case ARM::STRB_PRE_IMM:
    3225             :   case ARM::STRH_POST:
    3226             :   case ARM::STR_POST_IMM:
    3227             :   case ARM::STR_POST_REG:
    3228             :   case ARM::STR_PRE_IMM:
    3229             :     return 2;
    3230             : 
    3231           0 :   case ARM::LDRSB_PRE:
    3232             :   case ARM::LDRSH_PRE: {
    3233           0 :     unsigned Rm = MI.getOperand(3).getReg();
    3234           0 :     if (Rm == 0)
    3235             :       return 3;
    3236           0 :     unsigned Rt = MI.getOperand(0).getReg();
    3237           0 :     if (Rt == Rm)
    3238             :       return 4;
    3239           0 :     unsigned ShOpVal = MI.getOperand(4).getImm();
    3240             :     bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
    3241             :     unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
    3242           0 :     if (!isSub &&
    3243           0 :         (ShImm == 0 ||
    3244           0 :          ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
    3245             :           ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
    3246           0 :       return 3;
    3247             :     return 4;
    3248             :   }
    3249             : 
    3250           0 :   case ARM::LDRD: {
    3251           0 :     unsigned Rt = MI.getOperand(0).getReg();
    3252           0 :     unsigned Rn = MI.getOperand(2).getReg();
    3253           0 :     unsigned Rm = MI.getOperand(3).getReg();
    3254           0 :     if (Rm)
    3255           0 :       return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
    3256             :                                                                           : 3;
    3257           0 :     return (Rt == Rn) ? 3 : 2;
    3258             :   }
    3259             : 
    3260           0 :   case ARM::STRD: {
    3261           0 :     unsigned Rm = MI.getOperand(3).getReg();
    3262           0 :     if (Rm)
    3263           0 :       return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
    3264             :                                                                           : 3;
    3265             :     return 2;
    3266             :   }
    3267             : 
    3268           0 :   case ARM::LDRD_POST:
    3269             :   case ARM::t2LDRD_POST:
    3270           0 :     return 3;
    3271             : 
    3272           0 :   case ARM::STRD_POST:
    3273             :   case ARM::t2STRD_POST:
    3274           0 :     return 4;
    3275             : 
    3276           0 :   case ARM::LDRD_PRE: {
    3277           0 :     unsigned Rt = MI.getOperand(0).getReg();
    3278           0 :     unsigned Rn = MI.getOperand(3).getReg();
    3279           0 :     unsigned Rm = MI.getOperand(4).getReg();
    3280           0 :     if (Rm)
    3281           0 :       return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
    3282             :                                                                           : 4;
    3283           0 :     return (Rt == Rn) ? 4 : 3;
    3284             :   }
    3285             : 
    3286           0 :   case ARM::t2LDRD_PRE: {
    3287           0 :     unsigned Rt = MI.getOperand(0).getReg();
    3288           0 :     unsigned Rn = MI.getOperand(3).getReg();
    3289           0 :     return (Rt == Rn) ? 4 : 3;
    3290             :   }
    3291             : 
    3292           0 :   case ARM::STRD_PRE: {
    3293           0 :     unsigned Rm = MI.getOperand(4).getReg();
    3294           0 :     if (Rm)
    3295           0 :       return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
    3296             :                                                                           : 4;
    3297             :     return 3;
    3298             :   }
    3299             : 
    3300           0 :   case ARM::t2STRD_PRE:
    3301           0 :     return 3;
    3302             : 
    3303             :   case ARM::t2LDR_POST:
    3304             :   case ARM::t2LDRB_POST:
    3305             :   case ARM::t2LDRB_PRE:
    3306             :   case ARM::t2LDRSBi12:
    3307             :   case ARM::t2LDRSBi8:
    3308             :   case ARM::t2LDRSBpci:
    3309             :   case ARM::t2LDRSBs:
    3310             :   case ARM::t2LDRH_POST:
    3311             :   case ARM::t2LDRH_PRE:
    3312             :   case ARM::t2LDRSBT:
    3313             :   case ARM::t2LDRSB_POST:
    3314             :   case ARM::t2LDRSB_PRE:
    3315             :   case ARM::t2LDRSH_POST:
    3316             :   case ARM::t2LDRSH_PRE:
    3317             :   case ARM::t2LDRSHi12:
    3318             :   case ARM::t2LDRSHi8:
    3319             :   case ARM::t2LDRSHpci:
    3320             :   case ARM::t2LDRSHs:
    3321             :     return 2;
    3322             : 
    3323           0 :   case ARM::t2LDRDi8: {
    3324           0 :     unsigned Rt = MI.getOperand(0).getReg();
    3325           0 :     unsigned Rn = MI.getOperand(2).getReg();
    3326           0 :     return (Rt == Rn) ? 3 : 2;
    3327             :   }
    3328             : 
    3329             :   case ARM::t2STRB_POST:
    3330             :   case ARM::t2STRB_PRE:
    3331             :   case ARM::t2STRBs:
    3332             :   case ARM::t2STRDi8:
    3333             :   case ARM::t2STRH_POST:
    3334             :   case ARM::t2STRH_PRE:
    3335             :   case ARM::t2STRHs:
    3336             :   case ARM::t2STR_POST:
    3337             :   case ARM::t2STR_PRE:
    3338             :   case ARM::t2STRs:
    3339             :     return 2;
    3340             :   }
    3341             : }
    3342             : 
    3343             : // Return the number of 32-bit words loaded by LDM or stored by STM. If this
    3344             : // can't be easily determined return 0 (missing MachineMemOperand).
    3345             : //
    3346             : // FIXME: The current MachineInstr design does not support relying on machine
    3347             : // mem operands to determine the width of a memory access. Instead, we expect
    3348             : // the target to provide this information based on the instruction opcode and
    3349             : // operands. However, using MachineMemOperand is the best solution now for
    3350             : // two reasons:
    3351             : //
    3352             : // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
    3353             : // operands. This is much more dangerous than using the MachineMemOperand
    3354             : // sizes because CodeGen passes can insert/remove optional machine operands. In
    3355             : // fact, it's totally incorrect for preRA passes and appears to be wrong for
    3356             : // postRA passes as well.
    3357             : //
    3358             : // 2) getNumLDMAddresses is only used by the scheduling machine model and any
    3359             : // machine model that calls this should handle the unknown (zero size) case.
    3360             : //
    3361             : // Long term, we should require a target hook that verifies MachineMemOperand
    3362             : // sizes during MC lowering. That target hook should be local to MC lowering
    3363             : // because we can't ensure that it is aware of other MI forms. Doing this will
    3364             : // ensure that MachineMemOperands are correctly propagated through all passes.
    3365        1351 : unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr &MI) const {
    3366             :   unsigned Size = 0;
    3367          31 :   for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
    3368             :                                   E = MI.memoperands_end();
    3369        1382 :        I != E; ++I) {
    3370          31 :     Size += (*I)->getSize();
    3371             :   }
    3372        1351 :   return Size / 4;
    3373             : }
    3374             : 
    3375           0 : static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,
    3376             :                                                     unsigned NumRegs) {
    3377           0 :   unsigned UOps = 1 + NumRegs; // 1 for address computation.
    3378           0 :   switch (Opc) {
    3379             :   default:
    3380             :     break;
    3381           0 :   case ARM::VLDMDIA_UPD:
    3382             :   case ARM::VLDMDDB_UPD:
    3383             :   case ARM::VLDMSIA_UPD:
    3384             :   case ARM::VLDMSDB_UPD:
    3385             :   case ARM::VSTMDIA_UPD:
    3386             :   case ARM::VSTMDDB_UPD:
    3387             :   case ARM::VSTMSIA_UPD:
    3388             :   case ARM::VSTMSDB_UPD:
    3389             :   case ARM::LDMIA_UPD:
    3390             :   case ARM::LDMDA_UPD:
    3391             :   case ARM::LDMDB_UPD:
    3392             :   case ARM::LDMIB_UPD:
    3393             :   case ARM::STMIA_UPD:
    3394             :   case ARM::STMDA_UPD:
    3395             :   case ARM::STMDB_UPD:
    3396             :   case ARM::STMIB_UPD:
    3397             :   case ARM::tLDMIA_UPD:
    3398             :   case ARM::tSTMIA_UPD:
    3399             :   case ARM::t2LDMIA_UPD:
    3400             :   case ARM::t2LDMDB_UPD:
    3401             :   case ARM::t2STMIA_UPD:
    3402             :   case ARM::t2STMDB_UPD:
    3403           0 :     ++UOps; // One for base register writeback.
    3404           0 :     break;
    3405           0 :   case ARM::LDMIA_RET:
    3406             :   case ARM::tPOP_RET:
    3407             :   case ARM::t2LDMIA_RET:
    3408           0 :     UOps += 2; // One for base reg wb, one for write to pc.
    3409           0 :     break;
    3410             :   }
    3411           0 :   return UOps;
    3412             : }
    3413             : 
    3414        4457 : unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
    3415             :                                           const MachineInstr &MI) const {
    3416        4457 :   if (!ItinData || ItinData->isEmpty())
    3417             :     return 1;
    3418             : 
    3419        4457 :   const MCInstrDesc &Desc = MI.getDesc();
    3420        4457 :   unsigned Class = Desc.getSchedClass();
    3421             :   int ItinUOps = ItinData->getNumMicroOps(Class);
    3422        4457 :   if (ItinUOps >= 0) {
    3423           0 :     if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
    3424           0 :       return getNumMicroOpsSwiftLdSt(ItinData, MI);
    3425             : 
    3426           0 :     return ItinUOps;
    3427             :   }
    3428             : 
    3429             :   unsigned Opc = MI.getOpcode();
    3430        4457 :   switch (Opc) {
    3431           0 :   default:
    3432           0 :     llvm_unreachable("Unexpected multi-uops instruction!");
    3433             :   case ARM::VLDMQIA:
    3434             :   case ARM::VSTMQIA:
    3435             :     return 2;
    3436             : 
    3437             :   // The number of uOps for load / store multiple are determined by the number
    3438             :   // registers.
    3439             :   //
    3440             :   // On Cortex-A8, each pair of register loads / stores can be scheduled on the
    3441             :   // same cycle. The scheduling for the first load / store must be done
    3442             :   // separately by assuming the address is not 64-bit aligned.
    3443             :   //
    3444             :   // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
    3445             :   // is not 64-bit aligned, then AGU would take an extra cycle.  For VFP / NEON
    3446             :   // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
    3447         414 :   case ARM::VLDMDIA:
    3448             :   case ARM::VLDMDIA_UPD:
    3449             :   case ARM::VLDMDDB_UPD:
    3450             :   case ARM::VLDMSIA:
    3451             :   case ARM::VLDMSIA_UPD:
    3452             :   case ARM::VLDMSDB_UPD:
    3453             :   case ARM::VSTMDIA:
    3454             :   case ARM::VSTMDIA_UPD:
    3455             :   case ARM::VSTMDDB_UPD:
    3456             :   case ARM::VSTMSIA:
    3457             :   case ARM::VSTMSIA_UPD:
    3458             :   case ARM::VSTMSDB_UPD: {
    3459         414 :     unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
    3460         414 :     return (NumRegs / 2) + (NumRegs % 2) + 1;
    3461             :   }
    3462             : 
    3463        4043 :   case ARM::LDMIA_RET:
    3464             :   case ARM::LDMIA:
    3465             :   case ARM::LDMDA:
    3466             :   case ARM::LDMDB:
    3467             :   case ARM::LDMIB:
    3468             :   case ARM::LDMIA_UPD:
    3469             :   case ARM::LDMDA_UPD:
    3470             :   case ARM::LDMDB_UPD:
    3471             :   case ARM::LDMIB_UPD:
    3472             :   case ARM::STMIA:
    3473             :   case ARM::STMDA:
    3474             :   case ARM::STMDB:
    3475             :   case ARM::STMIB:
    3476             :   case ARM::STMIA_UPD:
    3477             :   case ARM::STMDA_UPD:
    3478             :   case ARM::STMDB_UPD:
    3479             :   case ARM::STMIB_UPD:
    3480             :   case ARM::tLDMIA:
    3481             :   case ARM::tLDMIA_UPD:
    3482             :   case ARM::tSTMIA_UPD:
    3483             :   case ARM::tPOP_RET:
    3484             :   case ARM::tPOP:
    3485             :   case ARM::tPUSH:
    3486             :   case ARM::t2LDMIA_RET:
    3487             :   case ARM::t2LDMIA:
    3488             :   case ARM::t2LDMDB:
    3489             :   case ARM::t2LDMIA_UPD:
    3490             :   case ARM::t2LDMDB_UPD:
    3491             :   case ARM::t2STMIA:
    3492             :   case ARM::t2STMDB:
    3493             :   case ARM::t2STMIA_UPD:
    3494             :   case ARM::t2STMDB_UPD: {
    3495        4043 :     unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
    3496        4043 :     switch (Subtarget.getLdStMultipleTiming()) {
    3497           0 :     case ARMSubtarget::SingleIssuePlusExtras:
    3498           0 :       return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs);
    3499             :     case ARMSubtarget::SingleIssue:
    3500             :       // Assume the worst.
    3501             :       return NumRegs;
    3502         277 :     case ARMSubtarget::DoubleIssue: {
    3503         277 :       if (NumRegs < 4)
    3504             :         return 2;
    3505             :       // 4 registers would be issued: 2, 2.
    3506             :       // 5 registers would be issued: 2, 2, 1.
    3507          97 :       unsigned UOps = (NumRegs / 2);
    3508          97 :       if (NumRegs % 2)
    3509          33 :         ++UOps;
    3510             :       return UOps;
    3511             :     }
    3512          64 :     case ARMSubtarget::DoubleIssueCheckUnalignedAccess: {
    3513          64 :       unsigned UOps = (NumRegs / 2);
    3514             :       // If there are odd number of registers or if it's not 64-bit aligned,
    3515             :       // then it takes an extra AGU (Address Generation Unit) cycle.
    3516         114 :       if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
    3517           0 :           (*MI.memoperands_begin())->getAlignment() < 8)
    3518          64 :         ++UOps;
    3519             :       return UOps;
    3520             :       }
    3521           0 :     }
    3522             :   }
    3523             :   }
    3524           0 :   llvm_unreachable("Didn't find the number of microops");
    3525             : }
    3526             : 
    3527             : int
    3528         204 : ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
    3529             :                                   const MCInstrDesc &DefMCID,
    3530             :                                   unsigned DefClass,
    3531             :                                   unsigned DefIdx, unsigned DefAlign) const {
    3532         204 :   int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
    3533         204 :   if (RegNo <= 0)
    3534             :     // Def is the address writeback.
    3535             :     return ItinData->getOperandCycle(DefClass, DefIdx);
    3536             : 
    3537             :   int DefCycle;
    3538         204 :   if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
    3539             :     // (regno / 2) + (regno % 2) + 1
    3540          16 :     DefCycle = RegNo / 2 + 1;
    3541          16 :     if (RegNo % 2)
    3542           7 :       ++DefCycle;
    3543         186 :   } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
    3544             :     DefCycle = RegNo;
    3545             :     bool isSLoad = false;
    3546             : 
    3547           4 :     switch (DefMCID.getOpcode()) {
    3548             :     default: break;
    3549             :     case ARM::VLDMSIA:
    3550             :     case ARM::VLDMSIA_UPD:
    3551             :     case ARM::VLDMSDB_UPD:
    3552             :       isSLoad = true;
    3553             :       break;
    3554             :     }
    3555             : 
    3556             :     // If there are odd number of 'S' registers or if it's not 64-bit aligned,
    3557             :     // then it takes an extra cycle.
    3558           2 :     if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
    3559           0 :       ++DefCycle;
    3560             :   } else {
    3561             :     // Assume the worst.
    3562         186 :     DefCycle = RegNo + 2;
    3563             :   }
    3564             : 
    3565             :   return DefCycle;
    3566             : }
    3567             : 
    3568           0 : bool ARMBaseInstrInfo::isLDMBaseRegInList(const MachineInstr &MI) const {
    3569           0 :   unsigned BaseReg = MI.getOperand(0).getReg();
    3570           0 :   for (unsigned i = 1, sz = MI.getNumOperands(); i < sz; ++i) {
    3571             :     const auto &Op = MI.getOperand(i);
    3572           0 :     if (Op.isReg() && Op.getReg() == BaseReg)
    3573             :       return true;
    3574             :   }
    3575             :   return false;
    3576             : }
    3577             : unsigned
    3578           7 : ARMBaseInstrInfo::getLDMVariableDefsSize(const MachineInstr &MI) const {
    3579             :   // ins GPR:$Rn, $p (2xOp), reglist:$regs, variable_ops
    3580             :   // (outs GPR:$wb), (ins GPR:$Rn, $p (2xOp), reglist:$regs, variable_ops)
    3581          14 :   return MI.getNumOperands() + 1 - MI.getDesc().getNumOperands();
    3582             : }
    3583             : 
    3584             : int
    3585         394 : ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
    3586             :                                  const MCInstrDesc &DefMCID,
    3587             :                                  unsigned DefClass,
    3588             :                                  unsigned DefIdx, unsigned DefAlign) const {
    3589         394 :   int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
    3590         394 :   if (RegNo <= 0)
    3591             :     // Def is the address writeback.
    3592             :     return ItinData->getOperandCycle(DefClass, DefIdx);
    3593             : 
    3594             :   int DefCycle;
    3595         394 :   if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
    3596             :     // 4 registers would be issued: 1, 2, 1.
    3597             :     // 5 registers would be issued: 1, 2, 2.
    3598          13 :     DefCycle = RegNo / 2;
    3599          13 :     if (DefCycle < 1)
    3600             :       DefCycle = 1;
    3601             :     // Result latency is issue cycle + 2: E2.
    3602          13 :     DefCycle += 2;
    3603         375 :   } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
    3604           6 :     DefCycle = (RegNo / 2);
    3605             :     // If there are odd number of registers or if it's not 64-bit aligned,
    3606             :     // then it takes an extra AGU (Address Generation Unit) cycle.
    3607           6 :     if ((RegNo % 2) || DefAlign < 8)
    3608           6 :       ++DefCycle;
    3609             :     // Result latency is AGU cycles + 2.
    3610           6 :     DefCycle += 2;
    3611             :   } else {
    3612             :     // Assume the worst.
    3613         375 :     DefCycle = RegNo + 2;
    3614             :   }
    3615             : 
    3616             :   return DefCycle;
    3617             : }
    3618             : 
    3619             : int
    3620          12 : ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
    3621             :                                   const MCInstrDesc &UseMCID,
    3622             :                                   unsigned UseClass,
    3623             :                                   unsigned UseIdx, unsigned UseAlign) const {
    3624          12 :   int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
    3625          12 :   if (RegNo <= 0)
    3626             :     return ItinData->getOperandCycle(UseClass, UseIdx);
    3627             : 
    3628             :   int UseCycle;
    3629          12 :   if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
    3630             :     // (regno / 2) + (regno % 2) + 1
    3631           6 :     UseCycle = RegNo / 2 + 1;
    3632           6 :     if (RegNo % 2)
    3633           1 :       ++UseCycle;
    3634           3 :   } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
    3635             :     UseCycle = RegNo;
    3636             :     bool isSStore = false;
    3637             : 
    3638           6 :     switch (UseMCID.getOpcode()) {
    3639             :     default: break;
    3640             :     case ARM::VSTMSIA:
    3641             :     case ARM::VSTMSIA_UPD:
    3642             :     case ARM::VSTMSDB_UPD:
    3643             :       isSStore = true;
    3644             :       break;
    3645             :     }
    3646             : 
    3647             :     // If there are odd number of 'S' registers or if it's not 64-bit aligned,
    3648             :     // then it takes an extra cycle.
    3649           3 :     if ((isSStore && (RegNo % 2)) || UseAlign < 8)
    3650           0 :       ++UseCycle;
    3651             :   } else {
    3652             :     // Assume the worst.
    3653           3 :     UseCycle = RegNo + 2;
    3654             :   }
    3655             : 
    3656             :   return UseCycle;
    3657             : }
    3658             : 
    3659             : int
    3660         286 : ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
    3661             :                                  const MCInstrDesc &UseMCID,
    3662             :                                  unsigned UseClass,
    3663             :                                  unsigned UseIdx, unsigned UseAlign) const {
    3664         286 :   int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
    3665         286 :   if (RegNo <= 0)
    3666             :     return ItinData->getOperandCycle(UseClass, UseIdx);
    3667             : 
    3668             :   int UseCycle;
    3669         286 :   if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
    3670          15 :     UseCycle = RegNo / 2;
    3671          15 :     if (UseCycle < 2)
    3672             :       UseCycle = 2;
    3673             :     // Read in E3.
    3674          15 :     UseCycle += 2;
    3675         271 :   } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
    3676           0 :     UseCycle = (RegNo / 2);
    3677             :     // If there are odd number of registers or if it's not 64-bit aligned,
    3678             :     // then it takes an extra AGU (Address Generation Unit) cycle.
    3679           0 :     if ((RegNo % 2) || UseAlign < 8)
    3680           0 :       ++UseCycle;
    3681             :   } else {
    3682             :     // Assume the worst.
    3683             :     UseCycle = 1;
    3684             :   }
    3685             :   return UseCycle;
    3686             : }
    3687             : 
    3688             : int
    3689       68207 : ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
    3690             :                                     const MCInstrDesc &DefMCID,
    3691             :                                     unsigned DefIdx, unsigned DefAlign,
    3692             :                                     const MCInstrDesc &UseMCID,
    3693             :                                     unsigned UseIdx, unsigned UseAlign) const {
    3694       68207 :   unsigned DefClass = DefMCID.getSchedClass();
    3695       68207 :   unsigned UseClass = UseMCID.getSchedClass();
    3696             : 
    3697      136414 :   if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
    3698       65846 :     return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
    3699             : 
    3700             :   // This may be a def / use of a variable_ops instruction, the operand
    3701             :   // latency might be determinable dynamically. Let the target try to
    3702             :   // figure it out.
    3703             :   int DefCycle = -1;
    3704             :   bool LdmBypass = false;
    3705        4722 :   switch (DefMCID.getOpcode()) {
    3706             :   default:
    3707             :     DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
    3708             :     break;
    3709             : 
    3710         204 :   case ARM::VLDMDIA:
    3711             :   case ARM::VLDMDIA_UPD:
    3712             :   case ARM::VLDMDDB_UPD:
    3713             :   case ARM::VLDMSIA:
    3714             :   case ARM::VLDMSIA_UPD:
    3715             :   case ARM::VLDMSDB_UPD:
    3716         204 :     DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
    3717         204 :     break;
    3718             : 
    3719         394 :   case ARM::LDMIA_RET:
    3720             :   case ARM::LDMIA:
    3721             :   case ARM::LDMDA:
    3722             :   case ARM::LDMDB:
    3723             :   case ARM::LDMIB:
    3724             :   case ARM::LDMIA_UPD:
    3725             :   case ARM::LDMDA_UPD:
    3726             :   case ARM::LDMDB_UPD:
    3727             :   case ARM::LDMIB_UPD:
    3728             :   case ARM::tLDMIA:
    3729             :   case ARM::tLDMIA_UPD:
    3730             :   case ARM::tPUSH:
    3731             :   case ARM::t2LDMIA_RET:
    3732             :   case ARM::t2LDMIA:
    3733             :   case ARM::t2LDMDB:
    3734             :   case ARM::t2LDMIA_UPD:
    3735             :   case ARM::t2LDMDB_UPD:
    3736             :     LdmBypass = true;
    3737         394 :     DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
    3738         394 :     break;
    3739             :   }
    3740             : 
    3741        2361 :   if (DefCycle == -1)
    3742             :     // We can't seem to determine the result latency of the def, assume it's 2.
    3743             :     DefCycle = 2;
    3744             : 
    3745             :   int UseCycle = -1;
    3746        4722 :   switch (UseMCID.getOpcode()) {
    3747             :   default:
    3748             :     UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
    3749         499 :     break;
    3750             : 
    3751          12 :   case ARM::VSTMDIA:
    3752             :   case ARM::VSTMDIA_UPD:
    3753             :   case ARM::VSTMDDB_UPD:
    3754             :   case ARM::VSTMSIA:
    3755             :   case ARM::VSTMSIA_UPD:
    3756             :   case ARM::VSTMSDB_UPD:
    3757          12 :     UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
    3758          12 :     break;
    3759             : 
    3760         286 :   case ARM::STMIA:
    3761             :   case ARM::STMDA:
    3762             :   case ARM::STMDB:
    3763             :   case ARM::STMIB:
    3764             :   case ARM::STMIA_UPD:
    3765             :   case ARM::STMDA_UPD:
    3766             :   case ARM::STMDB_UPD:
    3767             :   case ARM::STMIB_UPD:
    3768             :   case ARM::tSTMIA_UPD:
    3769             :   case ARM::tPOP_RET:
    3770             :   case ARM::tPOP:
    3771             :   case ARM::t2STMIA:
    3772             :   case ARM::t2STMDB:
    3773             :   case ARM::t2STMIA_UPD:
    3774             :   case ARM::t2STMDB_UPD:
    3775         286 :     UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
    3776         286 :     break;
    3777             :   }
    3778             : 
    3779         797 :   if (UseCycle == -1)
    3780             :     // Assume it's read in the first stage.
    3781             :     UseCycle = 1;
    3782             : 
    3783        2361 :   UseCycle = DefCycle - UseCycle + 1;
    3784        2361 :   if (UseCycle > 0) {
    3785        2280 :     if (LdmBypass) {
    3786             :       // It's a variable_ops instruction so we can't use DefIdx here. Just use
    3787             :       // first def operand.
    3788         788 :       if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
    3789             :                                           UseClass, UseIdx))
    3790             :         --UseCycle;
    3791        1886 :     } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
    3792             :                                                UseClass, UseIdx)) {
    3793             :       --UseCycle;
    3794             :     }
    3795             :   }
    3796             : 
    3797             :   return UseCycle;
    3798             : }
    3799             : 
    3800         425 : static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
    3801             :                                            const MachineInstr *MI, unsigned Reg,
    3802             :                                            unsigned &DefIdx, unsigned &Dist) {
    3803         425 :   Dist = 0;
    3804             : 
    3805             :   MachineBasicBlock::const_iterator I = MI; ++I;
    3806             :   MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator());
    3807             :   assert(II->isInsideBundle() && "Empty bundle?");
    3808             : 
    3809             :   int Idx = -1;
    3810         438 :   while (II->isInsideBundle()) {
    3811         438 :     Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
    3812         438 :     if (Idx != -1)
    3813             :       break;
    3814             :     --II;
    3815          13 :     ++Dist;
    3816             :   }
    3817             : 
    3818             :   assert(Idx != -1 && "Cannot find bundled definition!");
    3819         425 :   DefIdx = Idx;
    3820         425 :   return &*II;
    3821             : }
    3822             : 
    3823        1294 : static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
    3824             :                                            const MachineInstr &MI, unsigned Reg,
    3825             :                                            unsigned &UseIdx, unsigned &Dist) {
    3826        1294 :   Dist = 0;
    3827             : 
    3828        1294 :   MachineBasicBlock::const_instr_iterator II = ++MI.getIterator();
    3829             :   assert(II->isInsideBundle() && "Empty bundle?");
    3830        1294 :   MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
    3831             : 
    3832             :   // FIXME: This doesn't properly handle multiple uses.
    3833             :   int Idx = -1;
    3834        2608 :   while (II != E && II->isInsideBundle()) {
    3835        2607 :     Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
    3836        2607 :     if (Idx != -1)
    3837             :       break;
    3838        2628 :     if (II->getOpcode() != ARM::t2IT)
    3839          20 :       ++Dist;
    3840             :     ++II;
    3841             :   }
    3842             : 
    3843        1294 :   if (Idx == -1) {
    3844           1 :     Dist = 0;
    3845           1 :     return nullptr;
    3846             :   }
    3847             : 
    3848        1293 :   UseIdx = Idx;
    3849        1293 :   return &*II;
    3850             : }
    3851             : 
    3852             : /// Return the number of cycles to add to (or subtract from) the static
    3853             : /// itinerary based on the def opcode and alignment. The caller will ensure that
    3854             : /// adjusted latency is at least one cycle.
    3855      159925 : static int adjustDefLatency(const ARMSubtarget &Subtarget,
    3856             :                             const MachineInstr &DefMI,
    3857             :                             const MCInstrDesc &DefMCID, unsigned DefAlign) {
    3858             :   int Adjust = 0;
    3859      299026 :   if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
    3860             :     // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
    3861             :     // variants are one cycle cheaper.
    3862       44248 :     switch (DefMCID.getOpcode()) {
    3863             :     default: break;
    3864          58 :     case ARM::LDRrs:
    3865             :     case ARM::LDRBrs: {
    3866          58 :       unsigned ShOpVal = DefMI.getOperand(3).getImm();
    3867             :       unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
    3868          58 :       if (ShImm == 0 ||
    3869          27 :           (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
    3870             :         --Adjust;
    3871             :       break;
    3872             :     }
    3873          67 :     case ARM::t2LDRs:
    3874             :     case ARM::t2LDRBs:
    3875             :     case ARM::t2LDRHs:
    3876             :     case ARM::t2LDRSHs: {
    3877             :       // Thumb2 mode: lsl only.
    3878          67 :       unsigned ShAmt = DefMI.getOperand(3).getImm();
    3879          67 :       if (ShAmt == 0 || ShAmt == 2)
    3880             :         --Adjust;
    3881             :       break;
    3882             :     }
    3883             :     }
    3884      137801 :   } else if (Subtarget.isSwift()) {
    3885             :     // FIXME: Properly handle all of the latency adjustments for address
    3886             :     // writeback.
    3887           6 :     switch (DefMCID.getOpcode()) {
    3888             :     default: break;
    3889           0 :     case ARM::LDRrs:
    3890             :     case ARM::LDRBrs: {
    3891           0 :       unsigned ShOpVal = DefMI.getOperand(3).getImm();
    3892             :       bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
    3893             :       unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
    3894           0 :       if (!isSub &&
    3895           0 :           (ShImm == 0 ||
    3896           0 :            ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
    3897             :             ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
    3898             :         Adjust -= 2;
    3899           0 :       else if (!isSub &&
    3900           0 :                ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
    3901             :         --Adjust;
    3902             :       break;
    3903             :     }
    3904           0 :     case ARM::t2LDRs:
    3905             :     case ARM::t2LDRBs:
    3906             :     case ARM::t2LDRHs:
    3907             :     case ARM::t2LDRSHs: {
    3908             :       // Thumb2 mode: lsl only.
    3909           0 :       unsigned ShAmt = DefMI.getOperand(3).getImm();
    3910           0 :       if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
    3911             :         Adjust -= 2;
    3912             :       break;
    3913             :     }
    3914             :     }
    3915             :   }
    3916             : 
    3917      159925 :   if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
    3918        8098 :     switch (DefMCID.getOpcode()) {
    3919             :     default: break;
    3920         194 :     case ARM::VLD1q8:
    3921             :     case ARM::VLD1q16:
    3922             :     case ARM::VLD1q32:
    3923             :     case ARM::VLD1q64:
    3924             :     case ARM::VLD1q8wb_fixed:
    3925             :     case ARM::VLD1q16wb_fixed:
    3926             :     case ARM::VLD1q32wb_fixed:
    3927             :     case ARM::VLD1q64wb_fixed:
    3928             :     case ARM::VLD1q8wb_register:
    3929             :     case ARM::VLD1q16wb_register:
    3930             :     case ARM::VLD1q32wb_register:
    3931             :     case ARM::VLD1q64wb_register:
    3932             :     case ARM::VLD2d8:
    3933             :     case ARM::VLD2d16:
    3934             :     case ARM::VLD2d32:
    3935             :     case ARM::VLD2q8:
    3936             :     case ARM::VLD2q16:
    3937             :     case ARM::VLD2q32:
    3938             :     case ARM::VLD2d8wb_fixed:
    3939             :     case ARM::VLD2d16wb_fixed:
    3940             :     case ARM::VLD2d32wb_fixed:
    3941             :     case ARM::VLD2q8wb_fixed:
    3942             :     case ARM::VLD2q16wb_fixed:
    3943             :     case ARM::VLD2q32wb_fixed:
    3944             :     case ARM::VLD2d8wb_register:
    3945             :     case ARM::VLD2d16wb_register:
    3946             :     case ARM::VLD2d32wb_register:
    3947             :     case ARM::VLD2q8wb_register:
    3948             :     case ARM::VLD2q16wb_register:
    3949             :     case ARM::VLD2q32wb_register:
    3950             :     case ARM::VLD3d8:
    3951             :     case ARM::VLD3d16:
    3952             :     case ARM::VLD3d32:
    3953             :     case ARM::VLD1d64T:
    3954             :     case ARM::VLD3d8_UPD:
    3955             :     case ARM::VLD3d16_UPD:
    3956             :     case ARM::VLD3d32_UPD:
    3957             :     case ARM::VLD1d64Twb_fixed:
    3958             :     case ARM::VLD1d64Twb_register:
    3959             :     case ARM::VLD3q8_UPD:
    3960             :     case ARM::VLD3q16_UPD:
    3961             :     case ARM::VLD3q32_UPD:
    3962             :     case ARM::VLD4d8:
    3963             :     case ARM::VLD4d16:
    3964             :     case ARM::VLD4d32:
    3965             :     case ARM::VLD1d64Q:
    3966             :     case ARM::VLD4d8_UPD:
    3967             :     case ARM::VLD4d16_UPD:
    3968             :     case ARM::VLD4d32_UPD:
    3969             :     case ARM::VLD1d64Qwb_fixed:
    3970             :     case ARM::VLD1d64Qwb_register:
    3971             :     case ARM::VLD4q8_UPD:
    3972             :     case ARM::VLD4q16_UPD:
    3973             :     case ARM::VLD4q32_UPD:
    3974             :     case ARM::VLD1DUPq8:
    3975             :     case ARM::VLD1DUPq16:
    3976             :     case ARM::VLD1DUPq32:
    3977             :     case ARM::VLD1DUPq8wb_fixed:
    3978             :     case ARM::VLD1DUPq16wb_fixed:
    3979             :     case ARM::VLD1DUPq32wb_fixed:
    3980             :     case ARM::VLD1DUPq8wb_register:
    3981             :     case ARM::VLD1DUPq16wb_register:
    3982             :     case ARM::VLD1DUPq32wb_register:
    3983             :     case ARM::VLD2DUPd8:
    3984             :     case ARM::VLD2DUPd16:
    3985             :     case ARM::VLD2DUPd32:
    3986             :     case ARM::VLD2DUPd8wb_fixed:
    3987             :     case ARM::VLD2DUPd16wb_fixed:
    3988             :     case ARM::VLD2DUPd32wb_fixed:
    3989             :     case ARM::VLD2DUPd8wb_register:
    3990             :     case ARM::VLD2DUPd16wb_register:
    3991             :     case ARM::VLD2DUPd32wb_register:
    3992             :     case ARM::VLD4DUPd8:
    3993             :     case ARM::VLD4DUPd16:
    3994             :     case ARM::VLD4DUPd32:
    3995             :     case ARM::VLD4DUPd8_UPD:
    3996             :     case ARM::VLD4DUPd16_UPD:
    3997             :     case ARM::VLD4DUPd32_UPD:
    3998             :     case ARM::VLD1LNd8:
    3999             :     case ARM::VLD1LNd16:
    4000             :     case ARM::VLD1LNd32:
    4001             :     case ARM::VLD1LNd8_UPD:
    4002             :     case ARM::VLD1LNd16_UPD:
    4003             :     case ARM::VLD1LNd32_UPD:
    4004             :     case ARM::VLD2LNd8:
    4005             :     case ARM::VLD2LNd16:
    4006             :     case ARM::VLD2LNd32:
    4007             :     case ARM::VLD2LNq16:
    4008             :     case ARM::VLD2LNq32:
    4009             :     case ARM::VLD2LNd8_UPD:
    4010             :     case ARM::VLD2LNd16_UPD:
    4011             :     case ARM::VLD2LNd32_UPD:
    4012             :     case ARM::VLD2LNq16_UPD:
    4013             :     case ARM::VLD2LNq32_UPD:
    4014             :     case ARM::VLD4LNd8:
    4015             :     case ARM::VLD4LNd16:
    4016             :     case ARM::VLD4LNd32:
    4017             :     case ARM::VLD4LNq16:
    4018             :     case ARM::VLD4LNq32:
    4019             :     case ARM::VLD4LNd8_UPD:
    4020             :     case ARM::VLD4LNd16_UPD:
    4021             :     case ARM::VLD4LNd32_UPD:
    4022             :     case ARM::VLD4LNq16_UPD:
    4023             :     case ARM::VLD4LNq32_UPD:
    4024             :       // If the address is not 64-bit aligned, the latencies of these
    4025             :       // instructions increases by one.
    4026         194 :       ++Adjust;
    4027         194 :       break;
    4028             :     }
    4029             :   }
    4030      159925 :   return Adjust;
    4031             : }
    4032             : 
    4033       58077 : int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
    4034             :                                         const MachineInstr &DefMI,
    4035             :                                         unsigned DefIdx,
    4036             :                                         const MachineInstr &UseMI,
    4037             :                                         unsigned UseIdx) const {
    4038             :   // No operand latency. The caller may fall back to getInstrLatency.
    4039       58077 :   if (!ItinData || ItinData->isEmpty())
    4040             :     return -1;
    4041             : 
    4042       58077 :   const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
    4043       58077 :   unsigned Reg = DefMO.getReg();
    4044             : 
    4045             :   const MachineInstr *ResolvedDefMI = &DefMI;
    4046       58077 :   unsigned DefAdj = 0;
    4047       58077 :   if (DefMI.isBundle())
    4048             :     ResolvedDefMI =
    4049         425 :         getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);
    4050       58038 :   if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
    4051       58020 :       ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
    4052             :     return 1;
    4053             :   }
    4054             : 
    4055             :   const MachineInstr *ResolvedUseMI = &UseMI;
    4056       58018 :   unsigned UseAdj = 0;
    4057       58018 :   if (UseMI.isBundle()) {
    4058             :     ResolvedUseMI =
    4059        1294 :         getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
    4060        1294 :     if (!ResolvedUseMI)
    4061             :       return -1;
    4062             :   }
    4063             : 
    4064       58017 :   return getOperandLatencyImpl(
    4065             :       ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
    4066       58017 :       Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
    4067             : }
    4068             : 
    4069       58017 : int ARMBaseInstrInfo::getOperandLatencyImpl(
    4070             :     const InstrItineraryData *ItinData, const MachineInstr &DefMI,
    4071             :     unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
    4072             :     const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
    4073             :     unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
    4074       58017 :   if (Reg == ARM::CPSR) {
    4075        4166 :     if (DefMI.getOpcode() == ARM::FMSTAT) {
    4076             :       // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
    4077         457 :       return Subtarget.isLikeA9() ? 1 : 20;
    4078             :     }
    4079             : 
    4080             :     // CPSR set and branch can be paired in the same cycle.
    4081        1626 :     if (UseMI.isBranch())
    4082             :       return 0;
    4083             : 
    4084             :     // Otherwise it takes the instruction latency (generally one).
    4085        1626 :     unsigned Latency = getInstrLatency(ItinData, DefMI);
    4086             : 
    4087             :     // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
    4088             :     // its uses. Instructions which are otherwise scheduled between them may
    4089             :     // incur a code size penalty (not able to use the CPSR setting 16-bit
    4090             :     // instructions).
    4091        1626 :     if (Latency > 0 && Subtarget.isThumb2()) {
    4092         641 :       const MachineFunction *MF = DefMI.getParent()->getParent();
    4093             :       // FIXME: Use Function::optForSize().
    4094         641 :       if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize))
    4095          28 :         --Latency;
    4096             :     }
    4097        1626 :     return Latency;
    4098             :   }
    4099             : 
    4100       55934 :   if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
    4101             :     return -1;
    4102             : 
    4103             :   unsigned DefAlign = DefMI.hasOneMemOperand()
    4104       10635 :                           ? (*DefMI.memoperands_begin())->getAlignment()
    4105       51794 :                           : 0;
    4106             :   unsigned UseAlign = UseMI.hasOneMemOperand()
    4107        7766 :                           ? (*UseMI.memoperands_begin())->getAlignment()
    4108       48925 :                           : 0;
    4109             : 
    4110             :   // Get the itinerary's latency if possible, and handle variable_ops.
    4111       41159 :   int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, UseMCID,
    4112             :                                   UseIdx, UseAlign);
    4113             :   // Unable to find operand latency. The caller may resort to getInstrLatency.
    4114       41159 :   if (Latency < 0)
    4115             :     return Latency;
    4116             : 
    4117             :   // Adjust for IT block position.
    4118       34299 :   int Adj = DefAdj + UseAdj;
    4119             : 
    4120             :   // Adjust for dynamic def-side opcode variants not captured by the itinerary.
    4121       34299 :   Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
    4122       34299 :   if (Adj >= 0 || (int)Latency > -Adj) {
    4123       34299 :     return Latency + Adj;
    4124             :   }
    4125             :   // Return the itinerary latency, which may be zero but not less than zero.
    4126             :   return Latency;
    4127             : }
    4128             : 
    4129             : int
    4130       73716 : ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
    4131             :                                     SDNode *DefNode, unsigned DefIdx,
    4132             :                                     SDNode *UseNode, unsigned UseIdx) const {
    4133       73716 :   if (!DefNode->isMachineOpcode())
    4134             :     return 1;
    4135             : 
    4136       51474 :   const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
    4137             : 
    4138       51474 :   if (isZeroCost(DefMCID.Opcode))
    4139             :     return 0;
    4140             : 
    4141       44231 :   if (!ItinData || ItinData->isEmpty())
    4142        4074 :     return DefMCID.mayLoad() ? 3 : 1;
    4143             : 
    4144       42194 :   if (!UseNode->isMachineOpcode()) {
    4145       15146 :     int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
    4146       15146 :     int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
    4147       15146 :     int Threshold = 1 + Adj;
    4148       15146 :     return Latency <= Threshold ? 1 : Latency - Adj;
    4149             :   }
    4150             : 
    4151             :   const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
    4152             :   const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
    4153       27048 :   unsigned DefAlign = !DefMN->memoperands_empty()
    4154        8129 :     ? (*DefMN->memoperands_begin())->getAlignment() : 0;
    4155             :   const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
    4156       27048 :   unsigned UseAlign = !UseMN->memoperands_empty()
    4157        5980 :     ? (*UseMN->memoperands_begin())->getAlignment() : 0;
    4158       27048 :   int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
    4159             :                                   UseMCID, UseIdx, UseAlign);
    4160             : 
    4161       27048 :   if (Latency > 1 &&
    4162       23567 :       (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
    4163             :        Subtarget.isCortexA7())) {
    4164             :     // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
    4165             :     // variants are one cycle cheaper.
    4166        3472 :     switch (DefMCID.getOpcode()) {
    4167             :     default: break;
    4168           9 :     case ARM::LDRrs:
    4169             :     case ARM::LDRBrs: {
    4170             :       unsigned ShOpVal =
    4171          27 :         cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
    4172             :       unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
    4173           9 :       if (ShImm == 0 ||
    4174           7 :           (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
    4175           9 :         --Latency;
    4176             :       break;
    4177             :     }
    4178          18 :     case ARM::t2LDRs:
    4179             :     case ARM::t2LDRBs:
    4180             :     case ARM::t2LDRHs:
    4181             :     case ARM::t2LDRSHs: {
    4182             :       // Thumb2 mode: lsl only.
    4183             :       unsigned ShAmt =
    4184          36 :         cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
    4185          18 :       if (ShAmt == 0 || ShAmt == 2)
    4186          17 :         --Latency;
    4187             :       break;
    4188             :     }
    4189             :     }
    4190       25312 :   } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) {
    4191             :     // FIXME: Properly handle all of the latency adjustments for address
    4192             :     // writeback.
    4193           0 :     switch (DefMCID.getOpcode()) {
    4194             :     default: break;
    4195           0 :     case ARM::LDRrs:
    4196             :     case ARM::LDRBrs: {
    4197             :       unsigned ShOpVal =
    4198           0 :         cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
    4199             :       unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
    4200           0 :       if (ShImm == 0 ||
    4201           0 :           ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
    4202             :            ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
    4203           0 :         Latency -= 2;
    4204           0 :       else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
    4205           0 :         --Latency;
    4206             :       break;
    4207             :     }
    4208           0 :     case ARM::t2LDRs:
    4209             :     case ARM::t2LDRBs:
    4210             :     case ARM::t2LDRHs:
    4211             :     case ARM::t2LDRSHs:
    4212             :       // Thumb2 mode: lsl 0-3 only.
    4213           0 :       Latency -= 2;
    4214           0 :       break;
    4215             :     }
    4216             :   }
    4217             : 
    4218       27048 :   if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
    4219        1858 :     switch (DefMCID.getOpcode()) {
    4220             :     default: break;
    4221          19 :     case ARM::VLD1q8:
    4222             :     case ARM::VLD1q16:
    4223             :     case ARM::VLD1q32:
    4224             :     case ARM::VLD1q64:
    4225             :     case ARM::VLD1q8wb_register:
    4226             :     case ARM::VLD1q16wb_register:
    4227             :     case ARM::VLD1q32wb_register:
    4228             :     case ARM::VLD1q64wb_register:
    4229             :     case ARM::VLD1q8wb_fixed:
    4230             :     case ARM::VLD1q16wb_fixed:
    4231             :     case ARM::VLD1q32wb_fixed:
    4232             :     case ARM::VLD1q64wb_fixed:
    4233             :     case ARM::VLD2d8:
    4234             :     case ARM::VLD2d16:
    4235             :     case ARM::VLD2d32:
    4236             :     case ARM::VLD2q8Pseudo:
    4237             :     case ARM::VLD2q16Pseudo:
    4238             :     case ARM::VLD2q32Pseudo:
    4239             :     case ARM::VLD2d8wb_fixed:
    4240             :     case ARM::VLD2d16wb_fixed:
    4241             :     case ARM::VLD2d32wb_fixed:
    4242             :     case ARM::VLD2q8PseudoWB_fixed:
    4243             :     case ARM::VLD2q16PseudoWB_fixed:
    4244             :     case ARM::VLD2q32PseudoWB_fixed:
    4245             :     case ARM::VLD2d8wb_register:
    4246             :     case ARM::VLD2d16wb_register:
    4247             :     case ARM::VLD2d32wb_register:
    4248             :     case ARM::VLD2q8PseudoWB_register:
    4249             :     case ARM::VLD2q16PseudoWB_register:
    4250             :     case ARM::VLD2q32PseudoWB_register:
    4251             :     case ARM::VLD3d8Pseudo:
    4252             :     case ARM::VLD3d16Pseudo:
    4253             :     case ARM::VLD3d32Pseudo:
    4254             :     case ARM::VLD1d8TPseudo:
    4255             :     case ARM::VLD1d16TPseudo:
    4256             :     case ARM::VLD1d32TPseudo:
    4257             :     case ARM::VLD1d64TPseudo:
    4258             :     case ARM::VLD1d64TPseudoWB_fixed:
    4259             :     case ARM::VLD1d64TPseudoWB_register:
    4260             :     case ARM::VLD3d8Pseudo_UPD:
    4261             :     case ARM::VLD3d16Pseudo_UPD:
    4262             :     case ARM::VLD3d32Pseudo_UPD:
    4263             :     case ARM::VLD3q8Pseudo_UPD:
    4264             :     case ARM::VLD3q16Pseudo_UPD:
    4265             :     case ARM::VLD3q32Pseudo_UPD:
    4266             :     case ARM::VLD3q8oddPseudo:
    4267             :     case ARM::VLD3q16oddPseudo:
    4268             :     case ARM::VLD3q32oddPseudo:
    4269             :     case ARM::VLD3q8oddPseudo_UPD:
    4270             :     case ARM::VLD3q16oddPseudo_UPD:
    4271             :     case ARM::VLD3q32oddPseudo_UPD:
    4272             :     case ARM::VLD4d8Pseudo:
    4273             :     case ARM::VLD4d16Pseudo:
    4274             :     case ARM::VLD4d32Pseudo:
    4275             :     case ARM::VLD1d8QPseudo:
    4276             :     case ARM::VLD1d16QPseudo:
    4277             :     case ARM::VLD1d32QPseudo:
    4278             :     case ARM::VLD1d64QPseudo:
    4279             :     case ARM::VLD1d64QPseudoWB_fixed:
    4280             :     case ARM::VLD1d64QPseudoWB_register:
    4281             :     case ARM::VLD1q8HighQPseudo:
    4282             :     case ARM::VLD1q8LowQPseudo_UPD:
    4283             :     case ARM::VLD1q8HighTPseudo:
    4284             :     case ARM::VLD1q8LowTPseudo_UPD:
    4285             :     case ARM::VLD1q16HighQPseudo:
    4286             :     case ARM::VLD1q16LowQPseudo_UPD:
    4287             :     case ARM::VLD1q16HighTPseudo:
    4288             :     case ARM::VLD1q16LowTPseudo_UPD:
    4289             :     case ARM::VLD1q32HighQPseudo:
    4290             :     case ARM::VLD1q32LowQPseudo_UPD:
    4291             :     case ARM::VLD1q32HighTPseudo:
    4292             :     case ARM::VLD1q32LowTPseudo_UPD:
    4293             :     case ARM::VLD1q64HighQPseudo:
    4294             :     case ARM::VLD1q64LowQPseudo_UPD:
    4295             :     case ARM::VLD1q64HighTPseudo:
    4296             :     case ARM::VLD1q64LowTPseudo_UPD:
    4297             :     case ARM::VLD4d8Pseudo_UPD:
    4298             :     case ARM::VLD4d16Pseudo_UPD:
    4299             :     case ARM::VLD4d32Pseudo_UPD:
    4300             :     case ARM::VLD4q8Pseudo_UPD:
    4301             :     case ARM::VLD4q16Pseudo_UPD:
    4302             :     case ARM::VLD4q32Pseudo_UPD:
    4303             :     case ARM::VLD4q8oddPseudo:
    4304             :     case ARM::VLD4q16oddPseudo:
    4305             :     case ARM::VLD4q32oddPseudo:
    4306             :     case ARM::VLD4q8oddPseudo_UPD:
    4307             :     case ARM::VLD4q16oddPseudo_UPD:
    4308             :     case ARM::VLD4q32oddPseudo_UPD:
    4309             :     case ARM::VLD1DUPq8:
    4310             :     case ARM::VLD1DUPq16:
    4311             :     case ARM::VLD1DUPq32:
    4312             :     case ARM::VLD1DUPq8wb_fixed:
    4313             :     case ARM::VLD1DUPq16wb_fixed:
    4314             :     case ARM::VLD1DUPq32wb_fixed:
    4315             :     case ARM::VLD1DUPq8wb_register:
    4316             :     case ARM::VLD1DUPq16wb_register:
    4317             :     case ARM::VLD1DUPq32wb_register:
    4318             :     case ARM::VLD2DUPd8:
    4319             :     case ARM::VLD2DUPd16:
    4320             :     case ARM::VLD2DUPd32:
    4321             :     case ARM::VLD2DUPd8wb_fixed:
    4322             :     case ARM::VLD2DUPd16wb_fixed:
    4323             :     case ARM::VLD2DUPd32wb_fixed:
    4324             :     case ARM::VLD2DUPd8wb_register:
    4325             :     case ARM::VLD2DUPd16wb_register:
    4326             :     case ARM::VLD2DUPd32wb_register:
    4327             :     case ARM::VLD2DUPq8EvenPseudo:
    4328             :     case ARM::VLD2DUPq8OddPseudo:
    4329             :     case ARM::VLD2DUPq16EvenPseudo:
    4330             :     case ARM::VLD2DUPq16OddPseudo:
    4331             :     case ARM::VLD2DUPq32EvenPseudo:
    4332             :     case ARM::VLD2DUPq32OddPseudo:
    4333             :     case ARM::VLD3DUPq8EvenPseudo:
    4334             :     case ARM::VLD3DUPq8OddPseudo:
    4335             :     case ARM::VLD3DUPq16EvenPseudo:
    4336             :     case ARM::VLD3DUPq16OddPseudo:
    4337             :     case ARM::VLD3DUPq32EvenPseudo:
    4338             :     case ARM::VLD3DUPq32OddPseudo:
    4339             :     case ARM::VLD4DUPd8Pseudo:
    4340             :     case ARM::VLD4DUPd16Pseudo:
    4341             :     case ARM::VLD4DUPd32Pseudo:
    4342             :     case ARM::VLD4DUPd8Pseudo_UPD:
    4343             :     case ARM::VLD4DUPd16Pseudo_UPD:
    4344             :     case ARM::VLD4DUPd32Pseudo_UPD:
    4345             :     case ARM::VLD4DUPq8EvenPseudo:
    4346             :     case ARM::VLD4DUPq8OddPseudo:
    4347             :     case ARM::VLD4DUPq16EvenPseudo:
    4348             :     case ARM::VLD4DUPq16OddPseudo:
    4349             :     case ARM::VLD4DUPq32EvenPseudo:
    4350             :     case ARM::VLD4DUPq32OddPseudo:
    4351             :     case ARM::VLD1LNq8Pseudo:
    4352             :     case ARM::VLD1LNq16Pseudo:
    4353             :     case ARM::VLD1LNq32Pseudo:
    4354             :     case ARM::VLD1LNq8Pseudo_UPD:
    4355             :     case ARM::VLD1LNq16Pseudo_UPD:
    4356             :     case ARM::VLD1LNq32Pseudo_UPD:
    4357             :     case ARM::VLD2LNd8Pseudo:
    4358             :     case ARM::VLD2LNd16Pseudo:
    4359             :     case ARM::VLD2LNd32Pseudo:
    4360             :     case ARM::VLD2LNq16Pseudo:
    4361             :     case ARM::VLD2LNq32Pseudo:
    4362             :     case ARM::VLD2LNd8Pseudo_UPD:
    4363             :     case ARM::VLD2LNd16Pseudo_UPD:
    4364             :     case ARM::VLD2LNd32Pseudo_UPD:
    4365             :     case ARM::VLD2LNq16Pseudo_UPD:
    4366             :     case ARM::VLD2LNq32Pseudo_UPD:
    4367             :     case ARM::VLD4LNd8Pseudo:
    4368             :     case ARM::VLD4LNd16Pseudo:
    4369             :     case ARM::VLD4LNd32Pseudo:
    4370             :     case ARM::VLD4LNq16Pseudo:
    4371             :     case ARM::VLD4LNq32Pseudo:
    4372             :     case ARM::VLD4LNd8Pseudo_UPD:
    4373             :     case ARM::VLD4LNd16Pseudo_UPD:
    4374             :     case ARM::VLD4LNd32Pseudo_UPD:
    4375             :     case ARM::VLD4LNq16Pseudo_UPD:
    4376             :     case ARM::VLD4LNq32Pseudo_UPD:
    4377             :       // If the address is not 64-bit aligned, the latencies of these
    4378             :       // instructions increases by one.
    4379          19 :       ++Latency;
    4380          19 :       break;
    4381             :     }
    4382             : 
    4383             :   return Latency;
    4384             : }
    4385             : 
    4386       42555 : unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
    4387       42555 :   if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
    4388             :       MI.isImplicitDef())
    4389             :     return 0;
    4390             : 
    4391       42540 :   if (MI.isBundle())
    4392             :     return 0;
    4393             : 
    4394             :   const MCInstrDesc &MCID = MI.getDesc();
    4395             : 
    4396       85006 :   if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
    4397        2318 :                         !Subtarget.cheapPredicableCPSRDef())) {
    4398             :     // When predicated, CPSR is an additional source operand for CPSR updating
    4399             :     // instructions, this apparently increases their latencies.
    4400        3455 :     return 1;
    4401             :   }
    4402             :   return 0;
    4403             : }
    4404             : 
    4405      131779 : unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
    4406             :                                            const MachineInstr &MI,
    4407             :                                            unsigned *PredCost) const {
    4408      131752 :   if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
    4409             :       MI.isImplicitDef())
    4410             :     return 1;
    4411             : 
    4412             :   // An instruction scheduler typically runs on unbundled instructions, however
    4413             :   // other passes may query the latency of a bundled instruction.
    4414      131697 :   if (MI.isBundle()) {
    4415             :     unsigned Latency = 0;
    4416        1614 :     MachineBasicBlock::const_instr_iterator I = MI.getIterator();
    4417        1614 :     MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
    4418        5030 :     while (++I != E && I->isInsideBundle()) {
    4419        6832 :       if (I->getOpcode() != ARM::t2IT)
    4420        1963 :         Latency += getInstrLatency(ItinData, *I, PredCost);
    4421             :     }
    4422             :     return Latency;
    4423             :   }
    4424             : 
    4425             :   const MCInstrDesc &MCID = MI.getDesc();
    4426      130083 :   if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
    4427           0 :                                      !Subtarget.cheapPredicableCPSRDef()))) {
    4428             :     // When predicated, CPSR is an additional source operand for CPSR updating
    4429             :     // instructions, this apparently increases their latencies.
    4430           0 :     *PredCost = 1;
    4431             :   }
    4432             :   // Be sure to call getStageLatency for an empty itinerary in case it has a
    4433             :   // valid MinLatency property.
    4434      130083 :   if (!ItinData)
    4435           0 :     return MI.mayLoad() ? 3 : 1;
    4436             : 
    4437      130083 :   unsigned Class = MCID.getSchedClass();
    4438             : 
    4439             :   // For instructions with variable uops, use uops as latency.
    4440      130083 :   if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
    4441        4457 :     return getNumMicroOps(ItinData, MI);
    4442             : 
    4443             :   // For the common case, fall back on the itinerary's latency.
    4444      125626 :   unsigned Latency = ItinData->getStageLatency(Class);
    4445             : 
    4446             :   // Adjust for dynamic def-side opcode variants not captured by the itinerary.
    4447             :   unsigned DefAlign =
    4448      153588 :       MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlignment() : 0;
    4449      125626 :   int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
    4450      125626 :   if (Adj >= 0 || (int)Latency > -Adj) {
    4451      125595 :     return Latency + Adj;
    4452             :   }
    4453             :   return Latency;
    4454             : }
    4455             : 
    4456       67150 : int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
    4457             :                                       SDNode *Node) const {
    4458       67150 :   if (!Node->isMachineOpcode())
    4459             :     return 1;
    4460             : 
    4461       67150 :   if (!ItinData || ItinData->isEmpty())
    4462             :     return 1;
    4463             : 
    4464             :   unsigned Opcode = Node->getMachineOpcode();
    4465       67150 :   switch (Opcode) {
    4466       67148 :   default:
    4467      201444 :     return ItinData->getStageLatency(get(Opcode).getSchedClass());
    4468             :   case ARM::VLDMQIA:
    4469             :   case ARM::VSTMQIA:
    4470             :     return 2;
    4471             :   }
    4472             : }
    4473             : 
    4474          88 : bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
    4475             :                                              const MachineRegisterInfo *MRI,
    4476             :                                              const MachineInstr &DefMI,
    4477             :                                              unsigned DefIdx,
    4478             :                                              const MachineInstr &UseMI,
    4479             :                                              unsigned UseIdx) const {
    4480          88 :   unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
    4481          88 :   unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
    4482          88 :   if (Subtarget.nonpipelinedVFP() &&
    4483          20 :       (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
    4484             :     return true;
    4485             : 
    4486             :   // Hoist VFP / NEON instructions with 4 or higher latency.
    4487             :   unsigned Latency =
    4488          88 :       SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);
    4489          88 :   if (Latency <= 3)
    4490             :     return false;
    4491          19 :   return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
    4492          19 :          UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
    4493             : }
    4494             : 
    4495         444 : bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
    4496             :                                         const MachineInstr &DefMI,
    4497             :                                         unsigned DefIdx) const {
    4498             :   const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
    4499         428 :   if (!ItinData || ItinData->isEmpty())
    4500             :     return false;
    4501             : 
    4502         428 :   unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
    4503         428 :   if (DDomain == ARMII::DomainGeneral) {
    4504         386 :     unsigned DefClass = DefMI.getDesc().getSchedClass();
    4505             :     int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
    4506         386 :     return (DefCycle != -1 && DefCycle <= 2);
    4507             :   }
    4508             :   return false;
    4509             : }
    4510             : 
    4511     1314003 : bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
    4512             :                                          StringRef &ErrInfo) const {
    4513     2628006 :   if (convertAddSubFlagsOpcode(MI.getOpcode())) {
    4514           0 :     ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
    4515           0 :     return false;
    4516             :   }
    4517             :   return true;
    4518             : }
    4519             : 
    4520             : // LoadStackGuard has so far only been implemented for MachO. Different code
    4521             : // sequence is needed for other targets.
    4522          50 : void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
    4523             :                                                 unsigned LoadImmOpc,
    4524             :                                                 unsigned LoadOpc) const {
    4525             :   assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
    4526             :          "ROPI/RWPI not currently supported with stack guard");
    4527             : 
    4528          50 :   MachineBasicBlock &MBB = *MI->getParent();
    4529             :   DebugLoc DL = MI->getDebugLoc();
    4530          50 :   unsigned Reg = MI->getOperand(0).getReg();
    4531             :   const GlobalValue *GV =
    4532          50 :       cast<GlobalValue>((*MI->memoperands_begin())->getValue());
    4533          50 :   MachineInstrBuilder MIB;
    4534             : 
    4535         100 :   BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
    4536             :       .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY);
    4537             : 
    4538          50 :   if (Subtarget.isGVIndirectSymbol(GV)) {
    4539          84 :     MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
    4540          42 :     MIB.addReg(Reg, RegState::Kill).addImm(0);
    4541             :     auto Flags = MachineMemOperand::MOLoad |
    4542             :                  MachineMemOperand::MODereferenceable |
    4543             :                  MachineMemOperand::MOInvariant;
    4544          84 :     MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
    4545             :         MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4);
    4546          42 :     MIB.addMemOperand(MMO).add(predOps(ARMCC::AL));
    4547             :   }
    4548             : 
    4549         100 :   MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
    4550          50 :   MIB.addReg(Reg, RegState::Kill)
    4551             :       .addImm(0)
    4552             :       .cloneMemRefs(*MI)
    4553          50 :       .add(predOps(ARMCC::AL));
    4554          50 : }
    4555             : 
    4556             : bool
    4557         578 : ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
    4558             :                                      unsigned &AddSubOpc,
    4559             :                                      bool &NegAcc, bool &HasLane) const {
    4560         578 :   DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
    4561         578 :   if (I == MLxEntryMap.end())
    4562             :     return false;
    4563             : 
    4564          16 :   const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
    4565          16 :   MulOpc = Entry.MulOpc;
    4566          16 :   AddSubOpc = Entry.AddSubOpc;
    4567          16 :   NegAcc = Entry.NegAcc;
    4568          16 :   HasLane = Entry.HasLane;
    4569          16 :   return true;
    4570             : }
    4571             : 
    4572             : //===----------------------------------------------------------------------===//
    4573             : // Execution domains.
    4574             : //===----------------------------------------------------------------------===//
    4575             : //
    4576             : // Some instructions go down the NEON pipeline, some go down the VFP pipeline,
    4577             : // and some can go down both.  The vmov instructions go down the VFP pipeline,
    4578             : // but they can be changed to vorr equivalents that are executed by the NEON
    4579             : // pipeline.
    4580             : //
    4581             : // We use the following execution domain numbering:
    4582             : //
    4583             : enum ARMExeDomain {
    4584             :   ExeGeneric = 0,
    4585             :   ExeVFP = 1,
    4586             :   ExeNEON = 2
    4587             : };
    4588             : 
    4589             : //
    4590             : // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
    4591             : //
    4592             : std::pair<uint16_t, uint16_t>
    4593       98592 : ARMBaseInstrInfo::getExecutionDomain(const MachineInstr &MI) const {
    4594             :   // If we don't have access to NEON instructions then we won't be able
    4595             :   // to swizzle anything to the NEON domain. Check to make sure.
    4596       98592 :   if (Subtarget.hasNEON()) {
    4597             :     // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
    4598             :     // if they are not predicated.
    4599      116270 :     if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI))
    4600         305 :       return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
    4601             : 
    4602             :     // CortexA9 is particularly picky about mixing the two and wants these
    4603             :     // converted.
    4604       57830 :     if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) &&
    4605        1494 :         (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR ||
    4606             :          MI.getOpcode() == ARM::VMOVS))
    4607         153 :       return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
    4608             :   }
    4609             :   // No other instructions can be swizzled, so just determine their domain.
    4610       98134 :   unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;
    4611             : 
    4612       98134 :   if (Domain & ARMII::DomainNEON)
    4613       17971 :     return std::make_pair(ExeNEON, 0);
    4614             : 
    4615             :   // Certain instructions can go either way on Cortex-A8.
    4616             :   // Treat them as NEON instructions.
    4617       80163 :   if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
    4618         128 :     return std::make_pair(ExeNEON, 0);
    4619             : 
    4620       80035 :   if (Domain & ARMII::DomainVFP)
    4621        5126 :     return std::make_pair(ExeVFP, 0);
    4622             : 
    4623       74909 :   return std::make_pair(ExeGeneric, 0);
    4624             : }
    4625             : 
    4626          69 : static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI,
    4627             :                                             unsigned SReg, unsigned &Lane) {
    4628          69 :   unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
    4629          69 :   Lane = 0;
    4630             : 
    4631          69 :   if (DReg != ARM::NoRegister)
    4632             :    return DReg;
    4633             : 
    4634          24 :   Lane = 1;
    4635          24 :   DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
    4636             : 
    4637             :   assert(DReg && "S-register with no D super-register?");
    4638          24 :   return DReg;
    4639             : }
    4640             : 
    4641             : /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
    4642             : /// set ImplicitSReg to a register number that must be marked as implicit-use or
    4643             : /// zero if no register needs to be defined as implicit-use.
    4644             : ///
    4645             : /// If the function cannot determine if an SPR should be marked implicit use or
    4646             : /// not, it returns false.
    4647             : ///
    4648             : /// This function handles cases where an instruction is being modified from taking
    4649             : /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
    4650             : /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
    4651             : /// lane of the DPR).
    4652             : ///
    4653             : /// If the other SPR is defined, an implicit-use of it should be added. Else,
    4654             : /// (including the case where the DPR itself is defined), it should not.
    4655             : ///
    4656          21 : static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI,
    4657             :                                        MachineInstr &MI, unsigned DReg,
    4658             :                                        unsigned Lane, unsigned &ImplicitSReg) {
    4659             :   // If the DPR is defined or used already, the other SPR lane will be chained
    4660             :   // correctly, so there is nothing to be done.
    4661          31 :   if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) {
    4662          13 :     ImplicitSReg = 0;
    4663          13 :     return true;
    4664             :   }
    4665             : 
    4666             :   // Otherwise we need to go searching to see if the SPR is set explicitly.
    4667           8 :   ImplicitSReg = TRI->getSubReg(DReg,
    4668           8 :                                 (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
    4669             :   MachineBasicBlock::LivenessQueryResult LQR =
    4670          16 :       MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
    4671             : 
    4672           8 :   if (LQR == MachineBasicBlock::LQR_Live)
    4673             :     return true;
    4674           8 :   else if (LQR == MachineBasicBlock::LQR_Unknown)
    4675             :     return false;
    4676             : 
    4677             :   // If the register is known not to be live, there is no need to add an
    4678             :   // implicit-use.
    4679           8 :   ImplicitSReg = 0;
    4680           8 :   return true;
    4681             : }
    4682             : 
    4683         458 : void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
    4684             :                                           unsigned Domain) const {
    4685             :   unsigned DstReg, SrcReg, DReg;
    4686             :   unsigned Lane;
    4687         458 :   MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
    4688         458 :   const TargetRegisterInfo *TRI = &getRegisterInfo();
    4689         916 :   switch (MI.getOpcode()) {
    4690           0 :   default:
    4691           0 :     llvm_unreachable("cannot handle opcode!");
    4692             :     break;
    4693         305 :   case ARM::VMOVD:
    4694         305 :     if (Domain != ExeNEON)
    4695             :       break;
    4696             : 
    4697             :     // Zap the predicate operands.
    4698             :     assert(!isPredicated(MI) && "Cannot predicate a VORRd");
    4699             : 
    4700             :     // Make sure we've got NEON instructions.
    4701             :     assert(Subtarget.hasNEON() && "VORRd requires NEON");
    4702             : 
    4703             :     // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
    4704         180 :     DstReg = MI.getOperand(0).getReg();
    4705         180 :     SrcReg = MI.getOperand(1).getReg();
    4706             : 
    4707         900 :     for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
    4708         900 :       MI.RemoveOperand(i - 1);
    4709             : 
    4710             :     // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
    4711         180 :     MI.setDesc(get(ARM::VORRd));
    4712         180 :     MIB.addReg(DstReg, RegState::Define)
    4713         180 :         .addReg(SrcReg)
    4714         180 :         .addReg(SrcReg)
    4715         180 :         .add(predOps(ARMCC::AL));
    4716         180 :     break;
    4717          59 :   case ARM::VMOVRS:
    4718          59 :     if (Domain != ExeNEON)
    4719             :       break;
    4720             :     assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
    4721             : 
    4722             :     // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
    4723          37 :     DstReg = MI.getOperand(0).getReg();
    4724          37 :     SrcReg = MI.getOperand(1).getReg();
    4725             : 
    4726         185 :     for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
    4727         185 :       MI.RemoveOperand(i - 1);
    4728             : 
    4729          37 :     DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
    4730             : 
    4731             :     // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
    4732             :     // Note that DSrc has been widened and the other lane may be undef, which
    4733             :     // contaminates the entire register.
    4734          37 :     MI.setDesc(get(ARM::VGETLNi32));
    4735          37 :     MIB.addReg(DstReg, RegState::Define)
    4736          37 :         .addReg(DReg, RegState::Undef)
    4737          37 :         .addImm(Lane)
    4738          37 :         .add(predOps(ARMCC::AL));
    4739             : 
    4740             :     // The old source should be an implicit use, otherwise we might think it
    4741             :     // was dead before here.
    4742          37 :     MIB.addReg(SrcReg, RegState::Implicit);
    4743          37 :     break;
    4744          80 :   case ARM::VMOVSR: {
    4745          80 :     if (Domain != ExeNEON)
    4746             :       break;
    4747             :     assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
    4748             : 
    4749             :     // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
    4750          10 :     DstReg = MI.getOperand(0).getReg();
    4751          10 :     SrcReg = MI.getOperand(1).getReg();
    4752             : 
    4753          10 :     DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
    4754             : 
    4755             :     unsigned ImplicitSReg;
    4756          10 :     if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
    4757             :       break;
    4758             : 
    4759          50 :     for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
    4760          40 :       MI.RemoveOperand(i - 1);
    4761             : 
    4762             :     // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
    4763             :     // Again DDst may be undefined at the beginning of this instruction.
    4764          10 :     MI.setDesc(get(ARM::VSETLNi32));
    4765          10 :     MIB.addReg(DReg, RegState::Define)
    4766          10 :         .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI)))
    4767          10 :         .addReg(SrcReg)
    4768          10 :         .addImm(Lane)
    4769          10 :         .add(predOps(ARMCC::AL));
    4770             : 
    4771             :     // The narrower destination must be marked as set to keep previous chains
    4772             :     // in place.
    4773          10 :     MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
    4774          10 :     if (ImplicitSReg != 0)
    4775           0 :       MIB.addReg(ImplicitSReg, RegState::Implicit);
    4776             :     break;
    4777             :     }
    4778          14 :     case ARM::VMOVS: {
    4779          14 :       if (Domain != ExeNEON)
    4780             :         break;
    4781             : 
    4782             :       // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
    4783          11 :       DstReg = MI.getOperand(0).getReg();
    4784          11 :       SrcReg = MI.getOperand(1).getReg();
    4785             : 
    4786          11 :       unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
    4787          11 :       DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
    4788          11 :       DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
    4789             : 
    4790             :       unsigned ImplicitSReg;
    4791          11 :       if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
    4792             :         break;
    4793             : 
    4794          55 :       for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
    4795          44 :         MI.RemoveOperand(i - 1);
    4796             : 
    4797          11 :       if (DSrc == DDst) {
    4798             :         // Destination can be:
    4799             :         //     %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
    4800           1 :         MI.setDesc(get(ARM::VDUPLN32d));
    4801           1 :         MIB.addReg(DDst, RegState::Define)
    4802           1 :             .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI)))
    4803           1 :             .addImm(SrcLane)
    4804           1 :             .add(predOps(ARMCC::AL));
    4805             : 
    4806             :         // Neither the source or the destination are naturally represented any
    4807             :         // more, so add them in manually.
    4808           1 :         MIB.addReg(DstReg, RegState::Implicit | RegState::Define);
    4809           1 :         MIB.addReg(SrcReg, RegState::Implicit);
    4810           1 :         if (ImplicitSReg != 0)
    4811           0 :           MIB.addReg(ImplicitSReg, RegState::Implicit);
    4812             :         break;
    4813             :       }
    4814             : 
    4815             :       // In general there's no single instruction that can perform an S <-> S
    4816             :       // move in NEON space, but a pair of VEXT instructions *can* do the
    4817             :       // job. It turns out that the VEXTs needed will only use DSrc once, with
    4818             :       // the position based purely on the combination of lane-0 and lane-1
    4819             :       // involved. For example
    4820             :       //     vmov s0, s2 -> vext.32 d0, d0, d1, #1  vext.32 d0, d0, d0, #1
    4821             :       //     vmov s1, s3 -> vext.32 d0, d1, d0, #1  vext.32 d0, d0, d0, #1
    4822             :       //     vmov s0, s3 -> vext.32 d0, d0, d0, #1  vext.32 d0, d1, d0, #1
    4823             :       //     vmov s1, s2 -> vext.32 d0, d0, d0, #1  vext.32 d0, d0, d1, #1
    4824             :       //
    4825             :       // Pattern of the MachineInstrs is:
    4826             :       //     %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
    4827          10 :       MachineInstrBuilder NewMIB;
    4828          10 :       NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32),
    4829          20 :                        DDst);
    4830             : 
    4831             :       // On the first instruction, both DSrc and DDst may be undef if present.
    4832             :       // Specifically when the original instruction didn't have them as an
    4833             :       // <imp-use>.
    4834          10 :       unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
    4835             :       bool CurUndef = !MI.readsRegister(CurReg, TRI);
    4836          10 :       NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
    4837             : 
    4838          10 :       CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
    4839             :       CurUndef = !MI.readsRegister(CurReg, TRI);
    4840          10 :       NewMIB.addReg(CurReg, getUndefRegState(CurUndef))
    4841             :             .addImm(1)
    4842          10 :             .add(predOps(ARMCC::AL));
    4843             : 
    4844          10 :       if (SrcLane == DstLane)
    4845           7 :         NewMIB.addReg(SrcReg, RegState::Implicit);
    4846             : 
    4847          10 :       MI.setDesc(get(ARM::VEXTd32));
    4848          10 :       MIB.addReg(DDst, RegState::Define);
    4849             : 
    4850             :       // On the second instruction, DDst has definitely been defined above, so
    4851             :       // it is not undef. DSrc, if present, can be undef as above.
    4852          10 :       CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
    4853          11 :       CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
    4854          10 :       MIB.addReg(CurReg, getUndefRegState(CurUndef));
    4855             : 
    4856          10 :       CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
    4857          12 :       CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
    4858          10 :       MIB.addReg(CurReg, getUndefRegState(CurUndef))
    4859             :          .addImm(1)
    4860          10 :          .add(predOps(ARMCC::AL));
    4861             : 
    4862          10 :       if (SrcLane != DstLane)
    4863           3 :         MIB.addReg(SrcReg, RegState::Implicit);
    4864             : 
    4865             :       // As before, the original destination is no longer represented, add it
    4866             :       // implicitly.
    4867          10 :       MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
    4868          10 :       if (ImplicitSReg != 0)
    4869           0 :         MIB.addReg(ImplicitSReg, RegState::Implicit);
    4870             :       break;
    4871             :     }
    4872             :   }
    4873         458 : }
    4874             : 
    4875             : //===----------------------------------------------------------------------===//
    4876             : // Partial register updates
    4877             : //===----------------------------------------------------------------------===//
    4878             : //
    4879             : // Swift renames NEON registers with 64-bit granularity.  That means any
    4880             : // instruction writing an S-reg implicitly reads the containing D-reg.  The
    4881             : // problem is mostly avoided by translating f32 operations to v2f32 operations
    4882             : // on D-registers, but f32 loads are still a problem.
    4883             : //
    4884             : // These instructions can load an f32 into a NEON register:
    4885             : //
    4886             : // VLDRS - Only writes S, partial D update.
    4887             : // VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
    4888             : // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
    4889             : //
    4890             : // FCONSTD can be used as a dependency-breaking instruction.
    4891      102661 : unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance(
    4892             :     const MachineInstr &MI, unsigned OpNum,
    4893             :     const TargetRegisterInfo *TRI) const {
    4894      102661 :   auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance();
    4895      102661 :   if (!PartialUpdateClearance)
    4896             :     return 0;
    4897             : 
    4898             :   assert(TRI && "Need TRI instance");
    4899             : 
    4900        2894 :   const MachineOperand &MO = MI.getOperand(OpNum);
    4901             :   if (MO.readsReg())
    4902             :     return 0;
    4903        2894 :   unsigned Reg = MO.getReg();
    4904             :   int UseOp = -1;
    4905             : 
    4906        5788 :   switch (MI.getOpcode()) {
    4907             :   // Normal instructions writing only an S-register.
    4908          35 :   case ARM::VLDRS:
    4909             :   case ARM::FCONSTS:
    4910             :   case ARM::VMOVSR:
    4911             :   case ARM::VMOVv8i8:
    4912             :   case ARM::VMOVv4i16:
    4913             :   case ARM::VMOVv2i32:
    4914             :   case ARM::VMOVv2f32:
    4915             :   case ARM::VMOVv1i64:
    4916          35 :     UseOp = MI.findRegisterUseOperandIdx(Reg, false, TRI);
    4917             :     break;
    4918             : 
    4919             :     // Explicitly reads the dependency.
    4920             :   case ARM::VLD1LNd32:
    4921             :     UseOp = 3;
    4922             :     break;
    4923             :   default:
    4924             :     return 0;
    4925             :   }
    4926             : 
    4927             :   // If this instruction actually reads a value from Reg, there is no unwanted
    4928             :   // dependency.
    4929          39 :   if (UseOp != -1 && MI.getOperand(UseOp).readsReg())
    4930             :     return 0;
    4931             : 
    4932             :   // We must be able to clobber the whole D-reg.
    4933          38 :   if (TargetRegisterInfo::isVirtualRegister(Reg)) {
    4934             :     // Virtual register must be a def undef foo:ssub_0 operand.
    4935           0 :     if (!MO.getSubReg() || MI.readsVirtualRegister(Reg))
    4936           0 :       return 0;
    4937          38 :   } else if (ARM::SPRRegClass.contains(Reg)) {
    4938             :     // Physical register: MI must define the full D-reg.
    4939          17 :     unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
    4940             :                                              &ARM::DPRRegClass);
    4941          34 :     if (!DReg || !MI.definesRegister(DReg, TRI))
    4942          14 :       return 0;
    4943             :   }
    4944             : 
    4945             :   // MI has an unwanted D-register dependency.
    4946             :   // Avoid defs in the previous N instructrions.
    4947             :   return PartialUpdateClearance;
    4948             : }
    4949             : 
    4950             : // Break a partial register dependency after getPartialRegUpdateClearance
    4951             : // returned non-zero.
    4952           3 : void ARMBaseInstrInfo::breakPartialRegDependency(
    4953             :     MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
    4954             :   assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def");
    4955             :   assert(TRI && "Need TRI instance");
    4956             : 
    4957           3 :   const MachineOperand &MO = MI.getOperand(OpNum);
    4958           3 :   unsigned Reg = MO.getReg();
    4959             :   assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
    4960             :          "Can't break virtual register dependencies.");
    4961             :   unsigned DReg = Reg;
    4962             : 
    4963             :   // If MI defines an S-reg, find the corresponding D super-register.
    4964           3 :   if (ARM::SPRRegClass.contains(Reg)) {
    4965           0 :     DReg = ARM::D0 + (Reg - ARM::S0) / 2;
    4966             :     assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
    4967             :   }
    4968             : 
    4969             :   assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
    4970             :   assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
    4971             : 
    4972             :   // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
    4973             :   // the full D-register by loading the same value to both lanes.  The
    4974             :   // instruction is micro-coded with 2 uops, so don't do this until we can
    4975             :   // properly schedule micro-coded instructions.  The dispatcher stalls cause
    4976             :   // too big regressions.
    4977             : 
    4978             :   // Insert the dependency-breaking FCONSTD before MI.
    4979             :   // 96 is the encoding of 0.5, but the actual value doesn't matter here.
    4980           6 :   BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
    4981             :       .addImm(96)
    4982           3 :       .add(predOps(ARMCC::AL));
    4983           3 :   MI.addRegisterKilled(DReg, TRI, true);
    4984           3 : }
    4985             : 
    4986          16 : bool ARMBaseInstrInfo::hasNOP() const {
    4987          32 :   return Subtarget.getFeatureBits()[ARM::HasV6KOps];
    4988             : }
    4989             : 
    4990         110 : bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
    4991         110 :   if (MI->getNumOperands() < 4)
    4992             :     return true;
    4993         110 :   unsigned ShOpVal = MI->getOperand(3).getImm();
    4994             :   unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
    4995             :   // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
    4996         110 :   if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
    4997         110 :       ((ShImm == 1 || ShImm == 2) &&
    4998             :        ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
    4999          20 :     return true;
    5000             : 
    5001             :   return false;
    5002             : }
    5003             : 
    5004        1595 : bool ARMBaseInstrInfo::getRegSequenceLikeInputs(
    5005             :     const MachineInstr &MI, unsigned DefIdx,
    5006             :     SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
    5007             :   assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
    5008             :   assert(MI.isRegSequenceLike() && "Invalid kind of instruction");
    5009             : 
    5010        3190 :   switch (MI.getOpcode()) {
    5011        1595 :   case ARM::VMOVDRR:
    5012             :     // dX = VMOVDRR rY, rZ
    5013             :     // is the same as:
    5014             :     // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
    5015             :     // Populate the InputRegs accordingly.
    5016             :     // rY
    5017        1595 :     const MachineOperand *MOReg = &MI.getOperand(1);
    5018        1595 :     if (!MOReg->isUndef())
    5019        4785 :       InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
    5020             :                                               MOReg->getSubReg(), ARM::ssub_0));
    5021             :     // rZ
    5022        1595 :     MOReg = &MI.getOperand(2);
    5023        1595 :     if (!MOReg->isUndef())
    5024        4785 :       InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
    5025             :                                               MOReg->getSubReg(), ARM::ssub_1));
    5026        1595 :     return true;
    5027             :   }
    5028           0 :   llvm_unreachable("Target dependent opcode missing");
    5029             : }
    5030             : 
    5031        3121 : bool ARMBaseInstrInfo::getExtractSubregLikeInputs(
    5032             :     const MachineInstr &MI, unsigned DefIdx,
    5033             :     RegSubRegPairAndIdx &InputReg) const {
    5034             :   assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
    5035             :   assert(MI.isExtractSubregLike() && "Invalid kind of instruction");
    5036             : 
    5037        6242 :   switch (MI.getOpcode()) {
    5038        3121 :   case ARM::VMOVRRD:
    5039             :     // rX, rY = VMOVRRD dZ
    5040             :     // is the same as:
    5041             :     // rX = EXTRACT_SUBREG dZ, ssub_0
    5042             :     // rY = EXTRACT_SUBREG dZ, ssub_1
    5043        3121 :     const MachineOperand &MOReg = MI.getOperand(2);
    5044        3121 :     if (MOReg.isUndef())
    5045             :       return false;
    5046        3121 :     InputReg.Reg = MOReg.getReg();
    5047        3121 :     InputReg.SubReg = MOReg.getSubReg();
    5048        3121 :     InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
    5049        3121 :     return true;
    5050             :   }
    5051           0 :   llvm_unreachable("Target dependent opcode missing");
    5052             : }
    5053             : 
    5054         193 : bool ARMBaseInstrInfo::getInsertSubregLikeInputs(
    5055             :     const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
    5056             :     RegSubRegPairAndIdx &InsertedReg) const {
    5057             :   assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
    5058             :   assert(MI.isInsertSubregLike() && "Invalid kind of instruction");
    5059             : 
    5060         386 :   switch (MI.getOpcode()) {
    5061         193 :   case ARM::VSETLNi32:
    5062             :     // dX = VSETLNi32 dY, rZ, imm
    5063         193 :     const MachineOperand &MOBaseReg = MI.getOperand(1);
    5064             :     const MachineOperand &MOInsertedReg = MI.getOperand(2);
    5065         193 :     if (MOInsertedReg.isUndef())
    5066             :       return false;
    5067             :     const MachineOperand &MOIndex = MI.getOperand(3);
    5068         193 :     BaseReg.Reg = MOBaseReg.getReg();
    5069         193 :     BaseReg.SubReg = MOBaseReg.getSubReg();
    5070             : 
    5071         193 :     InsertedReg.Reg = MOInsertedReg.getReg();
    5072         193 :     InsertedReg.SubReg = MOInsertedReg.getSubReg();
    5073         193 :     InsertedReg.SubIdx = MOIndex.getImm() == 0 ? ARM::ssub_0 : ARM::ssub_1;
    5074         193 :     return true;
    5075             :   }
    5076           0 :   llvm_unreachable("Target dependent opcode missing");
    5077             : }
    5078             : 
    5079             : std::pair<unsigned, unsigned>
    5080         258 : ARMBaseInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
    5081             :   const unsigned Mask = ARMII::MO_OPTION_MASK;
    5082         258 :   return std::make_pair(TF & Mask, TF & ~Mask);
    5083             : }
    5084             : 
    5085             : ArrayRef<std::pair<unsigned, const char *>>
    5086         243 : ARMBaseInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
    5087             :   using namespace ARMII;
    5088             : 
    5089             :   static const std::pair<unsigned, const char *> TargetFlags[] = {
    5090             :       {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"}};
    5091         243 :   return makeArrayRef(TargetFlags);
    5092             : }
    5093             : 
    5094             : ArrayRef<std::pair<unsigned, const char *>>
    5095          16 : ARMBaseInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
    5096             :   using namespace ARMII;
    5097             : 
    5098             :   static const std::pair<unsigned, const char *> TargetFlags[] = {
    5099             :       {MO_COFFSTUB, "arm-coffstub"},
    5100             :       {MO_GOT, "arm-got"},
    5101             :       {MO_SBREL, "arm-sbrel"},
    5102             :       {MO_DLLIMPORT, "arm-dllimport"},
    5103             :       {MO_SECREL, "arm-secrel"},
    5104             :       {MO_NONLAZY, "arm-nonlazy"}};
    5105          16 :   return makeArrayRef(TargetFlags);
    5106             : }

Generated by: LCOV version 1.13