LCOV - code coverage report
Current view: top level - lib/Target/ARM - ARMBaseInstrInfo.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 1409 1874 75.2 %
Date: 2018-06-17 00:07:59 Functions: 89 101 88.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file contains the Base ARM implementation of the TargetInstrInfo class.
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #include "ARMBaseInstrInfo.h"
      15             : #include "ARMBaseRegisterInfo.h"
      16             : #include "ARMConstantPoolValue.h"
      17             : #include "ARMFeatures.h"
      18             : #include "ARMHazardRecognizer.h"
      19             : #include "ARMMachineFunctionInfo.h"
      20             : #include "ARMSubtarget.h"
      21             : #include "MCTargetDesc/ARMAddressingModes.h"
      22             : #include "MCTargetDesc/ARMBaseInfo.h"
      23             : #include "llvm/ADT/DenseMap.h"
      24             : #include "llvm/ADT/STLExtras.h"
      25             : #include "llvm/ADT/SmallSet.h"
      26             : #include "llvm/ADT/SmallVector.h"
      27             : #include "llvm/ADT/Triple.h"
      28             : #include "llvm/CodeGen/LiveVariables.h"
      29             : #include "llvm/CodeGen/MachineBasicBlock.h"
      30             : #include "llvm/CodeGen/MachineConstantPool.h"
      31             : #include "llvm/CodeGen/MachineFrameInfo.h"
      32             : #include "llvm/CodeGen/MachineFunction.h"
      33             : #include "llvm/CodeGen/MachineInstr.h"
      34             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      35             : #include "llvm/CodeGen/MachineMemOperand.h"
      36             : #include "llvm/CodeGen/MachineOperand.h"
      37             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      38             : #include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
      39             : #include "llvm/CodeGen/SelectionDAGNodes.h"
      40             : #include "llvm/CodeGen/TargetInstrInfo.h"
      41             : #include "llvm/CodeGen/TargetRegisterInfo.h"
      42             : #include "llvm/CodeGen/TargetSchedule.h"
      43             : #include "llvm/IR/Attributes.h"
      44             : #include "llvm/IR/Constants.h"
      45             : #include "llvm/IR/DebugLoc.h"
      46             : #include "llvm/IR/Function.h"
      47             : #include "llvm/IR/GlobalValue.h"
      48             : #include "llvm/MC/MCAsmInfo.h"
      49             : #include "llvm/MC/MCInstrDesc.h"
      50             : #include "llvm/MC/MCInstrItineraries.h"
      51             : #include "llvm/Support/BranchProbability.h"
      52             : #include "llvm/Support/Casting.h"
      53             : #include "llvm/Support/CommandLine.h"
      54             : #include "llvm/Support/Compiler.h"
      55             : #include "llvm/Support/Debug.h"
      56             : #include "llvm/Support/ErrorHandling.h"
      57             : #include "llvm/Support/raw_ostream.h"
      58             : #include "llvm/Target/TargetMachine.h"
      59             : #include <algorithm>
      60             : #include <cassert>
      61             : #include <cstdint>
      62             : #include <iterator>
      63             : #include <new>
      64             : #include <utility>
      65             : #include <vector>
      66             : 
      67             : using namespace llvm;
      68             : 
      69             : #define DEBUG_TYPE "arm-instrinfo"
      70             : 
      71             : #define GET_INSTRINFO_CTOR_DTOR
      72             : #include "ARMGenInstrInfo.inc"
      73             : 
      74             : static cl::opt<bool>
      75      101169 : EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
      76      101169 :                cl::desc("Enable ARM 2-addr to 3-addr conv"));
      77             : 
      78             : /// ARM_MLxEntry - Record information about MLA / MLS instructions.
      79             : struct ARM_MLxEntry {
      80             :   uint16_t MLxOpc;     // MLA / MLS opcode
      81             :   uint16_t MulOpc;     // Expanded multiplication opcode
      82             :   uint16_t AddSubOpc;  // Expanded add / sub opcode
      83             :   bool NegAcc;         // True if the acc is negated before the add / sub.
      84             :   bool HasLane;        // True if instruction has an extra "lane" operand.
      85             : };
      86             : 
      87             : static const ARM_MLxEntry ARM_MLxTable[] = {
      88             :   // MLxOpc,          MulOpc,           AddSubOpc,       NegAcc, HasLane
      89             :   // fp scalar ops
      90             :   { ARM::VMLAS,       ARM::VMULS,       ARM::VADDS,      false,  false },
      91             :   { ARM::VMLSS,       ARM::VMULS,       ARM::VSUBS,      false,  false },
      92             :   { ARM::VMLAD,       ARM::VMULD,       ARM::VADDD,      false,  false },
      93             :   { ARM::VMLSD,       ARM::VMULD,       ARM::VSUBD,      false,  false },
      94             :   { ARM::VNMLAS,      ARM::VNMULS,      ARM::VSUBS,      true,   false },
      95             :   { ARM::VNMLSS,      ARM::VMULS,       ARM::VSUBS,      true,   false },
      96             :   { ARM::VNMLAD,      ARM::VNMULD,      ARM::VSUBD,      true,   false },
      97             :   { ARM::VNMLSD,      ARM::VMULD,       ARM::VSUBD,      true,   false },
      98             : 
      99             :   // fp SIMD ops
     100             :   { ARM::VMLAfd,      ARM::VMULfd,      ARM::VADDfd,     false,  false },
     101             :   { ARM::VMLSfd,      ARM::VMULfd,      ARM::VSUBfd,     false,  false },
     102             :   { ARM::VMLAfq,      ARM::VMULfq,      ARM::VADDfq,     false,  false },
     103             :   { ARM::VMLSfq,      ARM::VMULfq,      ARM::VSUBfq,     false,  false },
     104             :   { ARM::VMLAslfd,    ARM::VMULslfd,    ARM::VADDfd,     false,  true  },
     105             :   { ARM::VMLSslfd,    ARM::VMULslfd,    ARM::VSUBfd,     false,  true  },
     106             :   { ARM::VMLAslfq,    ARM::VMULslfq,    ARM::VADDfq,     false,  true  },
     107             :   { ARM::VMLSslfq,    ARM::VMULslfq,    ARM::VSUBfq,     false,  true  },
     108             : };
     109             : 
     110        4804 : ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
     111             :   : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
     112        9608 :     Subtarget(STI) {
     113      158532 :   for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
     114      153728 :     if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
     115           0 :       llvm_unreachable("Duplicated entries?");
     116       76864 :     MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
     117       76864 :     MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
     118             :   }
     119        4804 : }
     120             : 
     121             : // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
     122             : // currently defaults to no prepass hazard recognizer.
     123             : ScheduleHazardRecognizer *
     124       10493 : ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
     125             :                                                const ScheduleDAG *DAG) const {
     126       10493 :   if (usePreRAHazardRecognizer()) {
     127             :     const InstrItineraryData *II =
     128       10493 :         static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
     129       10493 :     return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
     130             :   }
     131           0 :   return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);
     132             : }
     133             : 
     134       10405 : ScheduleHazardRecognizer *ARMBaseInstrInfo::
     135             : CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
     136             :                                    const ScheduleDAG *DAG) const {
     137       17185 :   if (Subtarget.isThumb2() || Subtarget.hasVFP2())
     138       17552 :     return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG);
     139        1629 :   return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
     140             : }
     141             : 
     142           0 : MachineInstr *ARMBaseInstrInfo::convertToThreeAddress(
     143             :     MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const {
     144             :   // FIXME: Thumb2 support.
     145             : 
     146           0 :   if (!EnableARM3Addr)
     147             :     return nullptr;
     148             : 
     149           0 :   MachineFunction &MF = *MI.getParent()->getParent();
     150           0 :   uint64_t TSFlags = MI.getDesc().TSFlags;
     151             :   bool isPre = false;
     152           0 :   switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
     153             :   default: return nullptr;
     154           0 :   case ARMII::IndexModePre:
     155             :     isPre = true;
     156           0 :     break;
     157             :   case ARMII::IndexModePost:
     158             :     break;
     159             :   }
     160             : 
     161             :   // Try splitting an indexed load/store to an un-indexed one plus an add/sub
     162             :   // operation.
     163           0 :   unsigned MemOpc = getUnindexedOpcode(MI.getOpcode());
     164           0 :   if (MemOpc == 0)
     165             :     return nullptr;
     166             : 
     167           0 :   MachineInstr *UpdateMI = nullptr;
     168           0 :   MachineInstr *MemMI = nullptr;
     169           0 :   unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
     170           0 :   const MCInstrDesc &MCID = MI.getDesc();
     171           0 :   unsigned NumOps = MCID.getNumOperands();
     172           0 :   bool isLoad = !MI.mayStore();
     173           0 :   const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0);
     174           0 :   const MachineOperand &Base = MI.getOperand(2);
     175           0 :   const MachineOperand &Offset = MI.getOperand(NumOps - 3);
     176           0 :   unsigned WBReg = WB.getReg();
     177           0 :   unsigned BaseReg = Base.getReg();
     178           0 :   unsigned OffReg = Offset.getReg();
     179           0 :   unsigned OffImm = MI.getOperand(NumOps - 2).getImm();
     180           0 :   ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm();
     181           0 :   switch (AddrMode) {
     182           0 :   default: llvm_unreachable("Unknown indexed op!");
     183             :   case ARMII::AddrMode2: {
     184             :     bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
     185             :     unsigned Amt = ARM_AM::getAM2Offset(OffImm);
     186           0 :     if (OffReg == 0) {
     187           0 :       if (ARM_AM::getSOImmVal(Amt) == -1)
     188             :         // Can't encode it in a so_imm operand. This transformation will
     189             :         // add more than 1 instruction. Abandon!
     190             :         return nullptr;
     191           0 :       UpdateMI = BuildMI(MF, MI.getDebugLoc(),
     192           0 :                          get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
     193           0 :                      .addReg(BaseReg)
     194           0 :                      .addImm(Amt)
     195           0 :                      .add(predOps(Pred))
     196           0 :                      .add(condCodeOp());
     197           0 :     } else if (Amt != 0) {
     198             :       ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
     199             :       unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
     200           0 :       UpdateMI = BuildMI(MF, MI.getDebugLoc(),
     201           0 :                          get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
     202           0 :                      .addReg(BaseReg)
     203           0 :                      .addReg(OffReg)
     204           0 :                      .addReg(0)
     205           0 :                      .addImm(SOOpc)
     206           0 :                      .add(predOps(Pred))
     207           0 :                      .add(condCodeOp());
     208             :     } else
     209           0 :       UpdateMI = BuildMI(MF, MI.getDebugLoc(),
     210           0 :                          get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
     211           0 :                      .addReg(BaseReg)
     212           0 :                      .addReg(OffReg)
     213           0 :                      .add(predOps(Pred))
     214           0 :                      .add(condCodeOp());
     215             :     break;
     216             :   }
     217             :   case ARMII::AddrMode3 : {
     218             :     bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
     219             :     unsigned Amt = ARM_AM::getAM3Offset(OffImm);
     220           0 :     if (OffReg == 0)
     221             :       // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
     222           0 :       UpdateMI = BuildMI(MF, MI.getDebugLoc(),
     223           0 :                          get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
     224           0 :                      .addReg(BaseReg)
     225             :                      .addImm(Amt)
     226           0 :                      .add(predOps(Pred))
     227           0 :                      .add(condCodeOp());
     228             :     else
     229           0 :       UpdateMI = BuildMI(MF, MI.getDebugLoc(),
     230           0 :                          get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
     231           0 :                      .addReg(BaseReg)
     232           0 :                      .addReg(OffReg)
     233           0 :                      .add(predOps(Pred))
     234           0 :                      .add(condCodeOp());
     235             :     break;
     236             :   }
     237             :   }
     238             : 
     239             :   std::vector<MachineInstr*> NewMIs;
     240           0 :   if (isPre) {
     241           0 :     if (isLoad)
     242           0 :       MemMI =
     243           0 :           BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
     244           0 :               .addReg(WBReg)
     245             :               .addImm(0)
     246           0 :               .addImm(Pred);
     247             :     else
     248           0 :       MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
     249           0 :                   .addReg(MI.getOperand(1).getReg())
     250           0 :                   .addReg(WBReg)
     251           0 :                   .addReg(0)
     252             :                   .addImm(0)
     253           0 :                   .addImm(Pred);
     254           0 :     NewMIs.push_back(MemMI);
     255           0 :     NewMIs.push_back(UpdateMI);
     256             :   } else {
     257           0 :     if (isLoad)
     258           0 :       MemMI =
     259           0 :           BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
     260           0 :               .addReg(BaseReg)
     261             :               .addImm(0)
     262           0 :               .addImm(Pred);
     263             :     else
     264           0 :       MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
     265           0 :                   .addReg(MI.getOperand(1).getReg())
     266           0 :                   .addReg(BaseReg)
     267           0 :                   .addReg(0)
     268             :                   .addImm(0)
     269           0 :                   .addImm(Pred);
     270           0 :     if (WB.isDead())
     271           0 :       UpdateMI->getOperand(0).setIsDead();
     272           0 :     NewMIs.push_back(UpdateMI);
     273           0 :     NewMIs.push_back(MemMI);
     274             :   }
     275             : 
     276             :   // Transfer LiveVariables states, kill / dead info.
     277           0 :   if (LV) {
     278           0 :     for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     279           0 :       MachineOperand &MO = MI.getOperand(i);
     280           0 :       if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
     281             :         unsigned Reg = MO.getReg();
     282             : 
     283           0 :         LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
     284           0 :         if (MO.isDef()) {
     285           0 :           MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
     286           0 :           if (MO.isDead())
     287           0 :             LV->addVirtualRegisterDead(Reg, *NewMI);
     288             :         }
     289           0 :         if (MO.isUse() && MO.isKill()) {
     290           0 :           for (unsigned j = 0; j < 2; ++j) {
     291             :             // Look at the two new MI's in reverse order.
     292           0 :             MachineInstr *NewMI = NewMIs[j];
     293           0 :             if (!NewMI->readsRegister(Reg))
     294           0 :               continue;
     295           0 :             LV->addVirtualRegisterKilled(Reg, *NewMI);
     296           0 :             if (VI.removeKill(MI))
     297           0 :               VI.Kills.push_back(NewMI);
     298           0 :             break;
     299             :           }
     300             :         }
     301             :       }
     302             :     }
     303             :   }
     304             : 
     305           0 :   MachineBasicBlock::iterator MBBI = MI.getIterator();
     306           0 :   MFI->insert(MBBI, NewMIs[1]);
     307           0 :   MFI->insert(MBBI, NewMIs[0]);
     308           0 :   return NewMIs[0];
     309             : }
     310             : 
     311             : // Branch analysis.
     312      281875 : bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
     313             :                                      MachineBasicBlock *&TBB,
     314             :                                      MachineBasicBlock *&FBB,
     315             :                                      SmallVectorImpl<MachineOperand> &Cond,
     316             :                                      bool AllowModify) const {
     317      281875 :   TBB = nullptr;
     318      281875 :   FBB = nullptr;
     319             : 
     320      281875 :   MachineBasicBlock::iterator I = MBB.end();
     321      281875 :   if (I == MBB.begin())
     322             :     return false; // Empty blocks are easy.
     323             :   --I;
     324             : 
     325             :   // Walk backwards from the end of the basic block until the branch is
     326             :   // analyzed or we give up.
     327     1205185 :   while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
     328             :     // Flag to be raised on unanalyzeable instructions. This is useful in cases
     329             :     // where we want to clean up on the end of the basic block before we bail
     330             :     // out.
     331             :     bool CantAnalyze = false;
     332             : 
     333             :     // Skip over DEBUG values and predicated nonterminators.
     334      251660 :     while (I->isDebugInstr() || !I->isTerminator()) {
     335       12343 :       if (I == MBB.begin())
     336             :         return false;
     337             :       --I;
     338             :     }
     339             : 
     340      479148 :     if (isIndirectBranchOpcode(I->getOpcode()) ||
     341             :         isJumpTableBranchOpcode(I->getOpcode())) {
     342             :       // Indirect branches and jump tables can't be analyzed, but we still want
     343             :       // to clean up any instructions at the tail of the basic block.
     344             :       CantAnalyze = true;
     345             :     } else if (isUncondBranchOpcode(I->getOpcode())) {
     346       30087 :       TBB = I->getOperand(0).getMBB();
     347             :     } else if (isCondBranchOpcode(I->getOpcode())) {
     348             :       // Bail out if we encounter multiple conditional branches.
     349       72939 :       if (!Cond.empty())
     350             :         return true;
     351             : 
     352             :       assert(!FBB && "FBB should have been null.");
     353       72893 :       FBB = TBB;
     354       72893 :       TBB = I->getOperand(0).getMBB();
     355      145786 :       Cond.push_back(I->getOperand(1));
     356      145786 :       Cond.push_back(I->getOperand(2));
     357      132291 :     } else if (I->isReturn()) {
     358             :       // Returns can't be analyzed, but we should run cleanup.
     359      252318 :       CantAnalyze = !isPredicated(*I);
     360             :     } else {
     361             :       // We encountered other unrecognized terminator. Bail out immediately.
     362             :       return true;
     363             :     }
     364             : 
     365             :     // Cleanup code - to be run for unpredicated unconditional branches and
     366             :     //                returns.
     367      466792 :     if (!isPredicated(*I) &&
     368      318282 :           (isUncondBranchOpcode(I->getOpcode()) ||
     369             :            isIndirectBranchOpcode(I->getOpcode()) ||
     370      124838 :            isJumpTableBranchOpcode(I->getOpcode()) ||
     371             :            I->isReturn())) {
     372             :       // Forget any previous condition branch information - it no longer applies.
     373             :       Cond.clear();
     374      159141 :       FBB = nullptr;
     375             : 
     376             :       // If we can modify the function, delete everything below this
     377             :       // unconditional branch.
     378      159141 :       if (AllowModify) {
     379       61868 :         MachineBasicBlock::iterator DI = std::next(I);
     380       61880 :         while (DI != MBB.end()) {
     381             :           MachineInstr &InstToDelete = *DI;
     382             :           ++DI;
     383           6 :           InstToDelete.eraseFromParent();
     384             :         }
     385             :       }
     386             :     }
     387             : 
     388      233396 :     if (CantAnalyze)
     389             :       return true;
     390             : 
     391      104301 :     if (I == MBB.begin())
     392             :       return false;
     393             : 
     394             :     --I;
     395             :   }
     396             : 
     397             :   // We made it past the terminators without bailing out - we must have
     398             :   // analyzed this branch successfully.
     399             :   return false;
     400             : }
     401             : 
     402        8870 : unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB,
     403             :                                         int *BytesRemoved) const {
     404             :   assert(!BytesRemoved && "code size not handled");
     405             : 
     406        8870 :   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
     407        8870 :   if (I == MBB.end())
     408             :     return 0;
     409             : 
     410       17736 :   if (!isUncondBranchOpcode(I->getOpcode()) &&
     411             :       !isCondBranchOpcode(I->getOpcode()))
     412             :     return 0;
     413             : 
     414             :   // Remove the branch.
     415        8521 :   I->eraseFromParent();
     416             : 
     417        8521 :   I = MBB.end();
     418             : 
     419        8521 :   if (I == MBB.begin()) return 1;
     420             :   --I;
     421       16188 :   if (!isCondBranchOpcode(I->getOpcode()))
     422             :     return 1;
     423             : 
     424             :   // Remove the branch.
     425        2109 :   I->eraseFromParent();
     426        2109 :   return 2;
     427             : }
     428             : 
     429        8181 : unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB,
     430             :                                         MachineBasicBlock *TBB,
     431             :                                         MachineBasicBlock *FBB,
     432             :                                         ArrayRef<MachineOperand> Cond,
     433             :                                         const DebugLoc &DL,
     434             :                                         int *BytesAdded) const {
     435             :   assert(!BytesAdded && "code size not handled");
     436        8181 :   ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
     437        8181 :   int BOpc   = !AFI->isThumbFunction()
     438        8181 :     ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
     439             :   int BccOpc = !AFI->isThumbFunction()
     440        8181 :     ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
     441        8181 :   bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
     442             : 
     443             :   // Shouldn't be a fall through.
     444             :   assert(TBB && "insertBranch must not be told to insert a fallthrough");
     445             :   assert((Cond.size() == 2 || Cond.size() == 0) &&
     446             :          "ARM branch conditions have two components!");
     447             : 
     448             :   // For conditional branches, we use addOperand to preserve CPSR flags.
     449             : 
     450        8181 :   if (!FBB) {
     451        8020 :     if (Cond.empty()) { // Unconditional branch?
     452        1906 :       if (isThumb)
     453        2470 :         BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL));
     454             :       else
     455         671 :         BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
     456             :     } else
     457        6114 :       BuildMI(&MBB, DL, get(BccOpc))
     458             :           .addMBB(TBB)
     459        6114 :           .addImm(Cond[0].getImm())
     460             :           .add(Cond[1]);
     461             :     return 1;
     462             :   }
     463             : 
     464             :   // Two-way conditional branch.
     465         161 :   BuildMI(&MBB, DL, get(BccOpc))
     466             :       .addMBB(TBB)
     467         161 :       .addImm(Cond[0].getImm())
     468             :       .add(Cond[1]);
     469         161 :   if (isThumb)
     470         192 :     BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
     471             :   else
     472          65 :     BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
     473             :   return 2;
     474             : }
     475             : 
     476        8479 : bool ARMBaseInstrInfo::
     477             : reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
     478        8479 :   ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
     479        8479 :   Cond[0].setImm(ARMCC::getOppositeCondition(CC));
     480        8479 :   return false;
     481             : }
     482             : 
     483      924089 : bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const {
     484      924089 :   if (MI.isBundle()) {
     485         787 :     MachineBasicBlock::const_instr_iterator I = MI.getIterator();
     486         787 :     MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
     487        3210 :     while (++I != E && I->isInsideBundle()) {
     488        1574 :       int PIdx = I->findFirstPredOperandIdx();
     489        2392 :       if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
     490             :         return true;
     491             :     }
     492             :     return false;
     493             :   }
     494             : 
     495      923302 :   int PIdx = MI.findFirstPredOperandIdx();
     496     1743486 :   return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
     497             : }
     498             : 
     499         721 : bool ARMBaseInstrInfo::PredicateInstruction(
     500             :     MachineInstr &MI, ArrayRef<MachineOperand> Pred) const {
     501         721 :   unsigned Opc = MI.getOpcode();
     502         721 :   if (isUncondBranchOpcode(Opc)) {
     503           0 :     MI.setDesc(get(getMatchingCondBranchOpcode(Opc)));
     504           0 :     MachineInstrBuilder(*MI.getParent()->getParent(), MI)
     505           0 :       .addImm(Pred[0].getImm())
     506           0 :       .addReg(Pred[1].getReg());
     507           0 :     return true;
     508             :   }
     509             : 
     510         721 :   int PIdx = MI.findFirstPredOperandIdx();
     511         721 :   if (PIdx != -1) {
     512         721 :     MachineOperand &PMO = MI.getOperand(PIdx);
     513         721 :     PMO.setImm(Pred[0].getImm());
     514        1442 :     MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
     515         721 :     return true;
     516             :   }
     517             :   return false;
     518             : }
     519             : 
     520         109 : bool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
     521             :                                          ArrayRef<MachineOperand> Pred2) const {
     522         109 :   if (Pred1.size() > 2 || Pred2.size() > 2)
     523             :     return false;
     524             : 
     525         109 :   ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
     526         109 :   ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
     527         109 :   if (CC1 == CC2)
     528             :     return true;
     529             : 
     530          48 :   switch (CC1) {
     531             :   default:
     532             :     return false;
     533           0 :   case ARMCC::AL:
     534           0 :     return true;
     535           5 :   case ARMCC::HS:
     536           5 :     return CC2 == ARMCC::HI;
     537           6 :   case ARMCC::LS:
     538           6 :     return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
     539           7 :   case ARMCC::GE:
     540           7 :     return CC2 == ARMCC::GT;
     541           2 :   case ARMCC::LE:
     542           2 :     return CC2 == ARMCC::LT;
     543             :   }
     544             : }
     545             : 
     546       38992 : bool ARMBaseInstrInfo::DefinesPredicate(
     547             :     MachineInstr &MI, std::vector<MachineOperand> &Pred) const {
     548             :   bool Found = false;
     549      220205 :   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     550      181213 :     const MachineOperand &MO = MI.getOperand(i);
     551      181803 :     if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) ||
     552      123705 :         (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) {
     553        2979 :       Pred.push_back(MO);
     554             :       Found = true;
     555             :     }
     556             :   }
     557             : 
     558       38992 :   return Found;
     559             : }
     560             : 
     561          37 : bool ARMBaseInstrInfo::isCPSRDefined(const MachineInstr &MI) {
     562         423 :   for (const auto &MO : MI.operands())
     563         274 :     if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
     564             :       return true;
     565             :   return false;
     566             : }
     567             : 
     568           0 : bool ARMBaseInstrInfo::isAddrMode3OpImm(const MachineInstr &MI,
     569             :                                         unsigned Op) const {
     570           0 :   const MachineOperand &Offset = MI.getOperand(Op + 1);
     571           0 :   return Offset.getReg() != 0;
     572             : }
     573             : 
     574             : // Load with negative register offset requires additional 1cyc and +I unit
     575             : // for Cortex A57
     576           0 : bool ARMBaseInstrInfo::isAddrMode3OpMinusReg(const MachineInstr &MI,
     577             :                                              unsigned Op) const {
     578           0 :   const MachineOperand &Offset = MI.getOperand(Op + 1);
     579           0 :   const MachineOperand &Opc = MI.getOperand(Op + 2);
     580             :   assert(Opc.isImm());
     581             :   assert(Offset.isReg());
     582           0 :   int64_t OpcImm = Opc.getImm();
     583             : 
     584           0 :   bool isSub = ARM_AM::getAM3Op(OpcImm) == ARM_AM::sub;
     585           0 :   return (isSub && Offset.getReg() != 0);
     586             : }
     587             : 
     588           0 : bool ARMBaseInstrInfo::isLdstScaledReg(const MachineInstr &MI,
     589             :                                        unsigned Op) const {
     590           0 :   const MachineOperand &Opc = MI.getOperand(Op + 2);
     591           0 :   unsigned OffImm = Opc.getImm();
     592           0 :   return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
     593             : }
     594             : 
     595             : // Load, scaled register offset, not plus LSL2
     596           3 : bool ARMBaseInstrInfo::isLdstScaledRegNotPlusLsl2(const MachineInstr &MI,
     597             :                                                   unsigned Op) const {
     598           3 :   const MachineOperand &Opc = MI.getOperand(Op + 2);
     599           3 :   unsigned OffImm = Opc.getImm();
     600             : 
     601           3 :   bool isAdd = ARM_AM::getAM2Op(OffImm) == ARM_AM::add;
     602             :   unsigned Amt = ARM_AM::getAM2Offset(OffImm);
     603             :   ARM_AM::ShiftOpc ShiftOpc = ARM_AM::getAM2ShiftOpc(OffImm);
     604           3 :   if (ShiftOpc == ARM_AM::no_shift) return false; // not scaled
     605           3 :   bool SimpleScaled = (isAdd && ShiftOpc == ARM_AM::lsl && Amt == 2);
     606           3 :   return !SimpleScaled;
     607             : }
     608             : 
     609             : // Minus reg for ldstso addr mode
     610           3 : bool ARMBaseInstrInfo::isLdstSoMinusReg(const MachineInstr &MI,
     611             :                                         unsigned Op) const {
     612           6 :   unsigned OffImm = MI.getOperand(Op + 2).getImm();
     613           3 :   return ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
     614             : }
     615             : 
     616             : // Load, scaled register offset
     617           0 : bool ARMBaseInstrInfo::isAm2ScaledReg(const MachineInstr &MI,
     618             :                                       unsigned Op) const {
     619           0 :   unsigned OffImm = MI.getOperand(Op + 2).getImm();
     620           0 :   return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
     621             : }
     622             : 
     623       34291 : static bool isEligibleForITBlock(const MachineInstr *MI) {
     624       68582 :   switch (MI->getOpcode()) {
     625             :   default: return true;
     626          37 :   case ARM::tADC:   // ADC (register) T1
     627             :   case ARM::tADDi3: // ADD (immediate) T1
     628             :   case ARM::tADDi8: // ADD (immediate) T2
     629             :   case ARM::tADDrr: // ADD (register) T1
     630             :   case ARM::tAND:   // AND (register) T1
     631             :   case ARM::tASRri: // ASR (immediate) T1
     632             :   case ARM::tASRrr: // ASR (register) T1
     633             :   case ARM::tBIC:   // BIC (register) T1
     634             :   case ARM::tEOR:   // EOR (register) T1
     635             :   case ARM::tLSLri: // LSL (immediate) T1
     636             :   case ARM::tLSLrr: // LSL (register) T1
     637             :   case ARM::tLSRri: // LSR (immediate) T1
     638             :   case ARM::tLSRrr: // LSR (register) T1
     639             :   case ARM::tMUL:   // MUL T1
     640             :   case ARM::tMVN:   // MVN (register) T1
     641             :   case ARM::tORR:   // ORR (register) T1
     642             :   case ARM::tROR:   // ROR (register) T1
     643             :   case ARM::tRSB:   // RSB (immediate) T1
     644             :   case ARM::tSBC:   // SBC (register) T1
     645             :   case ARM::tSUBi3: // SUB (immediate) T1
     646             :   case ARM::tSUBi8: // SUB (immediate) T2
     647             :   case ARM::tSUBrr: // SUB (register) T1
     648          37 :     return !ARMBaseInstrInfo::isCPSRDefined(*MI);
     649             :   }
     650             : }
     651             : 
     652             : /// isPredicable - Return true if the specified instruction can be predicated.
     653             : /// By default, this returns true for every instruction with a
     654             : /// PredicateOperand.
     655       38975 : bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const {
     656       38975 :   if (!MI.isPredicable())
     657             :     return false;
     658             : 
     659       34320 :   if (MI.isBundle())
     660             :     return false;
     661             : 
     662       34291 :   if (!isEligibleForITBlock(&MI))
     663             :     return false;
     664             : 
     665             :   const ARMFunctionInfo *AFI =
     666       34286 :       MI.getParent()->getParent()->getInfo<ARMFunctionInfo>();
     667             : 
     668             :   // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
     669             :   // In their ARM encoding, they can't be encoded in a conditional form.
     670       34286 :   if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
     671             :     return false;
     672             : 
     673       32102 :   if (AFI->isThumb2Function()) {
     674       12626 :     if (getSubtarget().restrictIT())
     675         699 :       return isV8EligibleForIT(&MI);
     676             :   }
     677             : 
     678             :   return true;
     679             : }
     680             : 
     681             : namespace llvm {
     682             : 
     683          62 : template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
     684         400 :   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     685         338 :     const MachineOperand &MO = MI->getOperand(i);
     686         788 :     if (!MO.isReg() || MO.isUndef() || MO.isUse())
     687         214 :       continue;
     688         124 :     if (MO.getReg() != ARM::CPSR)
     689          62 :       continue;
     690          62 :     if (!MO.isDead())
     691             :       return false;
     692             :   }
     693             :   // all definitions of CPSR are dead
     694             :   return true;
     695             : }
     696             : 
     697             : } // end namespace llvm
     698             : 
     699             : /// GetInstSize - Return the size of the specified MachineInstr.
     700             : ///
     701      880442 : unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
     702      880442 :   const MachineBasicBlock &MBB = *MI.getParent();
     703      880442 :   const MachineFunction *MF = MBB.getParent();
     704      880442 :   const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
     705             : 
     706      880442 :   const MCInstrDesc &MCID = MI.getDesc();
     707     1760884 :   if (MCID.getSize())
     708             :     return MCID.getSize();
     709             : 
     710             :   // If this machine instr is an inline asm, measure it.
     711      306193 :   if (MI.getOpcode() == ARM::INLINEASM)
     712      172297 :     return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
     713             :   unsigned Opc = MI.getOpcode();
     714      133896 :   switch (Opc) {
     715             :   default:
     716             :     // pseudo-instruction sizes are zero.
     717             :     return 0;
     718           0 :   case TargetOpcode::BUNDLE:
     719           0 :     return getInstBundleLength(MI);
     720        2026 :   case ARM::MOVi16_ga_pcrel:
     721             :   case ARM::MOVTi16_ga_pcrel:
     722             :   case ARM::t2MOVi16_ga_pcrel:
     723             :   case ARM::t2MOVTi16_ga_pcrel:
     724        2026 :     return 4;
     725          11 :   case ARM::MOVi32imm:
     726             :   case ARM::t2MOVi32imm:
     727          11 :     return 8;
     728       23023 :   case ARM::CONSTPOOL_ENTRY:
     729             :   case ARM::JUMPTABLE_INSTS:
     730             :   case ARM::JUMPTABLE_ADDRS:
     731             :   case ARM::JUMPTABLE_TBB:
     732             :   case ARM::JUMPTABLE_TBH:
     733             :     // If this machine instr is a constant pool entry, its size is recorded as
     734             :     // operand #2.
     735       23023 :     return MI.getOperand(2).getImm();
     736           4 :   case ARM::Int_eh_sjlj_longjmp:
     737           4 :     return 16;
     738           2 :   case ARM::tInt_eh_sjlj_longjmp:
     739           2 :     return 10;
     740           3 :   case ARM::tInt_WIN_eh_sjlj_longjmp:
     741           3 :     return 12;
     742           7 :   case ARM::Int_eh_sjlj_setjmp:
     743             :   case ARM::Int_eh_sjlj_setjmp_nofp:
     744           7 :     return 20;
     745          13 :   case ARM::tInt_eh_sjlj_setjmp:
     746             :   case ARM::t2Int_eh_sjlj_setjmp:
     747             :   case ARM::t2Int_eh_sjlj_setjmp_nofp:
     748          13 :     return 12;
     749          41 :   case ARM::SPACE:
     750          41 :     return MI.getOperand(1).getImm();
     751             :   }
     752             : }
     753             : 
     754           0 : unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
     755             :   unsigned Size = 0;
     756           0 :   MachineBasicBlock::const_instr_iterator I = MI.getIterator();
     757           0 :   MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
     758           0 :   while (++I != E && I->isInsideBundle()) {
     759             :     assert(!I->isBundle() && "No nested bundle!");
     760           0 :     Size += getInstSizeInBytes(*I);
     761             :   }
     762           0 :   return Size;
     763             : }
     764             : 
     765           4 : void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB,
     766             :                                     MachineBasicBlock::iterator I,
     767             :                                     unsigned DestReg, bool KillSrc,
     768             :                                     const ARMSubtarget &Subtarget) const {
     769           4 :   unsigned Opc = Subtarget.isThumb()
     770           4 :                      ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
     771             :                      : ARM::MRS;
     772             : 
     773             :   MachineInstrBuilder MIB =
     774           8 :       BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
     775             : 
     776             :   // There is only 1 A/R class MRS instruction, and it always refers to
     777             :   // APSR. However, there are lots of other possibilities on M-class cores.
     778           4 :   if (Subtarget.isMClass())
     779             :     MIB.addImm(0x800);
     780             : 
     781           4 :   MIB.add(predOps(ARMCC::AL))
     782           4 :      .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
     783           4 : }
     784             : 
     785           4 : void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,
     786             :                                   MachineBasicBlock::iterator I,
     787             :                                   unsigned SrcReg, bool KillSrc,
     788             :                                   const ARMSubtarget &Subtarget) const {
     789           4 :   unsigned Opc = Subtarget.isThumb()
     790           4 :                      ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
     791             :                      : ARM::MSR;
     792             : 
     793           8 :   MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
     794             : 
     795           4 :   if (Subtarget.isMClass())
     796             :     MIB.addImm(0x800);
     797             :   else
     798             :     MIB.addImm(8);
     799             : 
     800           4 :   MIB.addReg(SrcReg, getKillRegState(KillSrc))
     801           4 :      .add(predOps(ARMCC::AL))
     802           4 :      .addReg(ARM::CPSR, RegState::Implicit | RegState::Define);
     803           4 : }
     804             : 
     805        8226 : void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     806             :                                    MachineBasicBlock::iterator I,
     807             :                                    const DebugLoc &DL, unsigned DestReg,
     808             :                                    unsigned SrcReg, bool KillSrc) const {
     809        8226 :   bool GPRDest = ARM::GPRRegClass.contains(DestReg);
     810             :   bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
     811             : 
     812        8226 :   if (GPRDest && GPRSrc) {
     813       14205 :     BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
     814        4735 :         .addReg(SrcReg, getKillRegState(KillSrc))
     815        4735 :         .add(predOps(ARMCC::AL))
     816        4735 :         .add(condCodeOp());
     817       12947 :     return;
     818             :   }
     819             : 
     820        3491 :   bool SPRDest = ARM::SPRRegClass.contains(DestReg);
     821             :   bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
     822             : 
     823             :   unsigned Opc = 0;
     824        3491 :   if (SPRDest && SPRSrc)
     825             :     Opc = ARM::VMOVS;
     826        2157 :   else if (GPRDest && SPRSrc)
     827             :     Opc = ARM::VMOVRS;
     828        1852 :   else if (SPRDest && GPRSrc)
     829             :     Opc = ARM::VMOVSR;
     830        3044 :   else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP())
     831             :     Opc = ARM::VMOVD;
     832         834 :   else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
     833             :     Opc = ARM::VORRq;
     834             : 
     835             :   if (Opc) {
     836        6938 :     MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
     837        3469 :     MIB.addReg(SrcReg, getKillRegState(KillSrc));
     838        3469 :     if (Opc == ARM::VORRq)
     839         395 :       MIB.addReg(SrcReg, getKillRegState(KillSrc));
     840        3469 :     MIB.add(predOps(ARMCC::AL));
     841             :     return;
     842             :   }
     843             : 
     844             :   // Handle register classes that require multiple instructions.
     845             :   unsigned BeginIdx = 0;
     846             :   unsigned SubRegs = 0;
     847             :   int Spacing = 1;
     848             : 
     849             :   // Use VORRq when possible.
     850          44 :   if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
     851             :     Opc = ARM::VORRq;
     852             :     BeginIdx = ARM::qsub_0;
     853             :     SubRegs = 2;
     854          44 :   } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
     855             :     Opc = ARM::VORRq;
     856             :     BeginIdx = ARM::qsub_0;
     857             :     SubRegs = 4;
     858             :   // Fall back to VMOVD.
     859          34 :   } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
     860             :     Opc = ARM::VMOVD;
     861             :     BeginIdx = ARM::dsub_0;
     862             :     SubRegs = 2;
     863          26 :   } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
     864             :     Opc = ARM::VMOVD;
     865             :     BeginIdx = ARM::dsub_0;
     866             :     SubRegs = 3;
     867          26 :   } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
     868             :     Opc = ARM::VMOVD;
     869             :     BeginIdx = ARM::dsub_0;
     870             :     SubRegs = 4;
     871          26 :   } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
     872           2 :     Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
     873             :     BeginIdx = ARM::gsub_0;
     874             :     SubRegs = 2;
     875          22 :   } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
     876             :     Opc = ARM::VMOVD;
     877             :     BeginIdx = ARM::dsub_0;
     878             :     SubRegs = 2;
     879             :     Spacing = 2;
     880          22 :   } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
     881             :     Opc = ARM::VMOVD;
     882             :     BeginIdx = ARM::dsub_0;
     883             :     SubRegs = 3;
     884             :     Spacing = 2;
     885          22 :   } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
     886             :     Opc = ARM::VMOVD;
     887             :     BeginIdx = ARM::dsub_0;
     888             :     SubRegs = 4;
     889             :     Spacing = 2;
     890          11 :   } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) {
     891             :     Opc = ARM::VMOVS;
     892             :     BeginIdx = ARM::ssub_0;
     893             :     SubRegs = 2;
     894           8 :   } else if (SrcReg == ARM::CPSR) {
     895           4 :     copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
     896           4 :     return;
     897           4 :   } else if (DestReg == ARM::CPSR) {
     898           4 :     copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
     899           4 :     return;
     900             :   }
     901             : 
     902             :   assert(Opc && "Impossible reg-to-reg copy");
     903             : 
     904          14 :   const TargetRegisterInfo *TRI = &getRegisterInfo();
     905          14 :   MachineInstrBuilder Mov;
     906             : 
     907             :   // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
     908          14 :   if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
     909           0 :     BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
     910           0 :     Spacing = -Spacing;
     911             :   }
     912             : #ifndef NDEBUG
     913             :   SmallSet<unsigned, 4> DstRegs;
     914             : #endif
     915          90 :   for (unsigned i = 0; i != SubRegs; ++i) {
     916          38 :     unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
     917          38 :     unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
     918             :     assert(Dst && Src && "Bad sub-register");
     919             : #ifndef NDEBUG
     920             :     assert(!DstRegs.count(Src) && "destructive vector copy");
     921             :     DstRegs.insert(Dst);
     922             : #endif
     923          76 :     Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
     924             :     // VORR takes two source operands.
     925          38 :     if (Opc == ARM::VORRq)
     926          20 :       Mov.addReg(Src);
     927          38 :     Mov = Mov.add(predOps(ARMCC::AL));
     928             :     // MOVr can set CC.
     929          38 :     if (Opc == ARM::MOVr)
     930           2 :       Mov = Mov.add(condCodeOp());
     931             :   }
     932             :   // Add implicit super-register defs and kills to the last instruction.
     933          14 :   Mov->addRegisterDefined(DestReg, TRI);
     934          14 :   if (KillSrc)
     935           1 :     Mov->addRegisterKilled(SrcReg, TRI);
     936             : }
     937             : 
     938           0 : bool ARMBaseInstrInfo::isCopyInstr(const MachineInstr &MI,
     939             :                                    const MachineOperand *&Src,
     940             :                                    const MachineOperand *&Dest) const {
     941             :   // VMOVRRD is also a copy instruction but it requires
     942             :   // special way of handling. It is more complex copy version
     943             :   // and since that we are not considering it. For recognition
     944             :   // of such instruction isExtractSubregLike MI interface fuction
     945             :   // could be used.
     946             :   // VORRq is considered as a move only if two inputs are
     947             :   // the same register.
     948           0 :   if (!MI.isMoveReg() ||
     949           0 :       (MI.getOpcode() == ARM::VORRq &&
     950           0 :        MI.getOperand(1).getReg() != MI.getOperand(2).getReg()))
     951             :     return false;
     952           0 :   Dest = &MI.getOperand(0);
     953           0 :   Src = &MI.getOperand(1);
     954           0 :   return true;
     955             : }
     956             : 
     957             : const MachineInstrBuilder &
     958          70 : ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
     959             :                           unsigned SubIdx, unsigned State,
     960             :                           const TargetRegisterInfo *TRI) const {
     961          70 :   if (!SubIdx)
     962           0 :     return MIB.addReg(Reg, State);
     963             : 
     964          70 :   if (TargetRegisterInfo::isPhysicalRegister(Reg))
     965           8 :     return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
     966          62 :   return MIB.addReg(Reg, State, SubIdx);
     967             : }
     968             : 
     969        2344 : void ARMBaseInstrInfo::
     970             : storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     971             :                     unsigned SrcReg, bool isKill, int FI,
     972             :                     const TargetRegisterClass *RC,
     973             :                     const TargetRegisterInfo *TRI) const {
     974        2344 :   DebugLoc DL;
     975        2344 :   if (I != MBB.end()) DL = I->getDebugLoc();
     976        2344 :   MachineFunction &MF = *MBB.getParent();
     977        2344 :   MachineFrameInfo &MFI = MF.getFrameInfo();
     978             :   unsigned Align = MFI.getObjectAlignment(FI);
     979             : 
     980        4688 :   MachineMemOperand *MMO = MF.getMachineMemOperand(
     981             :       MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
     982        2344 :       MFI.getObjectSize(FI), Align);
     983             : 
     984        2344 :   switch (TRI->getSpillSize(*RC)) {
     985           2 :     case 2:
     986           4 :       if (ARM::HPRRegClass.hasSubClassEq(RC)) {
     987           6 :         BuildMI(MBB, I, DL, get(ARM::VSTRH))
     988           2 :             .addReg(SrcReg, getKillRegState(isKill))
     989             :             .addFrameIndex(FI)
     990             :             .addImm(0)
     991             :             .addMemOperand(MMO)
     992           2 :             .add(predOps(ARMCC::AL));
     993             :       } else
     994           0 :         llvm_unreachable("Unknown reg class!");
     995           2 :       break;
     996        1686 :     case 4:
     997        3372 :       if (ARM::GPRRegClass.hasSubClassEq(RC)) {
     998        4590 :         BuildMI(MBB, I, DL, get(ARM::STRi12))
     999        1530 :             .addReg(SrcReg, getKillRegState(isKill))
    1000             :             .addFrameIndex(FI)
    1001             :             .addImm(0)
    1002             :             .addMemOperand(MMO)
    1003        1530 :             .add(predOps(ARMCC::AL));
    1004         312 :       } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
    1005         468 :         BuildMI(MBB, I, DL, get(ARM::VSTRS))
    1006         156 :             .addReg(SrcReg, getKillRegState(isKill))
    1007             :             .addFrameIndex(FI)
    1008             :             .addImm(0)
    1009             :             .addMemOperand(MMO)
    1010         156 :             .add(predOps(ARMCC::AL));
    1011             :       } else
    1012           0 :         llvm_unreachable("Unknown reg class!");
    1013             :       break;
    1014         257 :     case 8:
    1015         514 :       if (ARM::DPRRegClass.hasSubClassEq(RC)) {
    1016         750 :         BuildMI(MBB, I, DL, get(ARM::VSTRD))
    1017         250 :             .addReg(SrcReg, getKillRegState(isKill))
    1018             :             .addFrameIndex(FI)
    1019             :             .addImm(0)
    1020             :             .addMemOperand(MMO)
    1021         250 :             .add(predOps(ARMCC::AL));
    1022          14 :       } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
    1023           7 :         if (Subtarget.hasV5TEOps()) {
    1024          10 :           MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD));
    1025           5 :           AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
    1026           5 :           AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
    1027           5 :           MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
    1028           5 :              .add(predOps(ARMCC::AL));
    1029             :         } else {
    1030             :           // Fallback to STM instruction, which has existed since the dawn of
    1031             :           // time.
    1032           4 :           MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STMIA))
    1033             :                                         .addFrameIndex(FI)
    1034             :                                         .addMemOperand(MMO)
    1035           2 :                                         .add(predOps(ARMCC::AL));
    1036           2 :           AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
    1037           2 :           AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
    1038             :         }
    1039             :       } else
    1040           0 :         llvm_unreachable("Unknown reg class!");
    1041             :       break;
    1042         396 :     case 16:
    1043         792 :       if (ARM::DPairRegClass.hasSubClassEq(RC)) {
    1044             :         // Use aligned spills if the stack can be realigned.
    1045         396 :         if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
    1046        1167 :           BuildMI(MBB, I, DL, get(ARM::VST1q64))
    1047             :               .addFrameIndex(FI)
    1048             :               .addImm(16)
    1049         389 :               .addReg(SrcReg, getKillRegState(isKill))
    1050             :               .addMemOperand(MMO)
    1051         389 :               .add(predOps(ARMCC::AL));
    1052             :         } else {
    1053          21 :           BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
    1054           7 :               .addReg(SrcReg, getKillRegState(isKill))
    1055             :               .addFrameIndex(FI)
    1056             :               .addMemOperand(MMO)
    1057           7 :               .add(predOps(ARMCC::AL));
    1058             :         }
    1059             :       } else
    1060           0 :         llvm_unreachable("Unknown reg class!");
    1061             :       break;
    1062           1 :     case 24:
    1063           2 :       if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
    1064             :         // Use aligned spills if the stack can be realigned.
    1065           1 :         if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
    1066           0 :           BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
    1067             :               .addFrameIndex(FI)
    1068             :               .addImm(16)
    1069           0 :               .addReg(SrcReg, getKillRegState(isKill))
    1070             :               .addMemOperand(MMO)
    1071           0 :               .add(predOps(ARMCC::AL));
    1072             :         } else {
    1073           2 :           MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
    1074             :                                         .addFrameIndex(FI)
    1075           1 :                                         .add(predOps(ARMCC::AL))
    1076           1 :                                         .addMemOperand(MMO);
    1077           1 :           MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
    1078           1 :           MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
    1079           1 :           AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
    1080             :         }
    1081             :       } else
    1082           0 :         llvm_unreachable("Unknown reg class!");
    1083             :       break;
    1084           0 :     case 32:
    1085           0 :       if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
    1086           0 :         if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
    1087             :           // FIXME: It's possible to only store part of the QQ register if the
    1088             :           // spilled def has a sub-register index.
    1089           0 :           BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
    1090             :               .addFrameIndex(FI)
    1091             :               .addImm(16)
    1092           0 :               .addReg(SrcReg, getKillRegState(isKill))
    1093             :               .addMemOperand(MMO)
    1094           0 :               .add(predOps(ARMCC::AL));
    1095             :         } else {
    1096           0 :           MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
    1097             :                                         .addFrameIndex(FI)
    1098           0 :                                         .add(predOps(ARMCC::AL))
    1099           0 :                                         .addMemOperand(MMO);
    1100           0 :           MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
    1101           0 :           MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
    1102           0 :           MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
    1103           0 :                 AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
    1104             :         }
    1105             :       } else
    1106           0 :         llvm_unreachable("Unknown reg class!");
    1107             :       break;
    1108           2 :     case 64:
    1109           4 :       if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
    1110           4 :         MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
    1111             :                                       .addFrameIndex(FI)
    1112           2 :                                       .add(predOps(ARMCC::AL))
    1113           2 :                                       .addMemOperand(MMO);
    1114           2 :         MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
    1115           2 :         MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
    1116           2 :         MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
    1117           2 :         MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
    1118           2 :         MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
    1119           2 :         MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
    1120           2 :         MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
    1121           2 :               AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
    1122             :       } else
    1123           0 :         llvm_unreachable("Unknown reg class!");
    1124           2 :       break;
    1125           0 :     default:
    1126           0 :       llvm_unreachable("Unknown reg class!");
    1127             :   }
    1128        2344 : }
    1129             : 
    1130       12590 : unsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
    1131             :                                               int &FrameIndex) const {
    1132       25180 :   switch (MI.getOpcode()) {
    1133             :   default: break;
    1134          17 :   case ARM::STRrs:
    1135             :   case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
    1136          17 :     if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
    1137          17 :         MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
    1138           0 :         MI.getOperand(3).getImm() == 0) {
    1139           0 :       FrameIndex = MI.getOperand(1).getIndex();
    1140           0 :       return MI.getOperand(0).getReg();
    1141             :     }
    1142             :     break;
    1143        1082 :   case ARM::STRi12:
    1144             :   case ARM::t2STRi12:
    1145             :   case ARM::tSTRspi:
    1146             :   case ARM::VSTRD:
    1147             :   case ARM::VSTRS:
    1148        2880 :     if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
    1149         358 :         MI.getOperand(2).getImm() == 0) {
    1150         320 :       FrameIndex = MI.getOperand(1).getIndex();
    1151         320 :       return MI.getOperand(0).getReg();
    1152             :     }
    1153             :     break;
    1154          12 :   case ARM::VST1q64:
    1155             :   case ARM::VST1d64TPseudo:
    1156             :   case ARM::VST1d64QPseudo:
    1157          24 :     if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
    1158           0 :       FrameIndex = MI.getOperand(0).getIndex();
    1159           0 :       return MI.getOperand(2).getReg();
    1160             :     }
    1161             :     break;
    1162           0 :   case ARM::VSTMQIA:
    1163           0 :     if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
    1164           0 :       FrameIndex = MI.getOperand(1).getIndex();
    1165           0 :       return MI.getOperand(0).getReg();
    1166             :     }
    1167             :     break;
    1168             :   }
    1169             : 
    1170             :   return 0;
    1171             : }
    1172             : 
    1173      129516 : unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
    1174             :                                                     int &FrameIndex) const {
    1175             :   const MachineMemOperand *Dummy;
    1176      129516 :   return MI.mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
    1177             : }
    1178             : 
    1179        2018 : void ARMBaseInstrInfo::
    1180             : loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
    1181             :                      unsigned DestReg, int FI,
    1182             :                      const TargetRegisterClass *RC,
    1183             :                      const TargetRegisterInfo *TRI) const {
    1184        2018 :   DebugLoc DL;
    1185        2018 :   if (I != MBB.end()) DL = I->getDebugLoc();
    1186        2018 :   MachineFunction &MF = *MBB.getParent();
    1187        2018 :   MachineFrameInfo &MFI = MF.getFrameInfo();
    1188             :   unsigned Align = MFI.getObjectAlignment(FI);
    1189        4036 :   MachineMemOperand *MMO = MF.getMachineMemOperand(
    1190             :       MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
    1191        2018 :       MFI.getObjectSize(FI), Align);
    1192             : 
    1193        2018 :   switch (TRI->getSpillSize(*RC)) {
    1194           2 :   case 2:
    1195           4 :     if (ARM::HPRRegClass.hasSubClassEq(RC)) {
    1196           4 :       BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg)
    1197             :           .addFrameIndex(FI)
    1198             :           .addImm(0)
    1199             :           .addMemOperand(MMO)
    1200           2 :           .add(predOps(ARMCC::AL));
    1201             :     } else
    1202           0 :       llvm_unreachable("Unknown reg class!");
    1203           2 :     break;
    1204        1192 :   case 4:
    1205        2384 :     if (ARM::GPRRegClass.hasSubClassEq(RC)) {
    1206        2352 :       BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
    1207             :           .addFrameIndex(FI)
    1208             :           .addImm(0)
    1209             :           .addMemOperand(MMO)
    1210        1176 :           .add(predOps(ARMCC::AL));
    1211          32 :     } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
    1212          32 :       BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
    1213             :           .addFrameIndex(FI)
    1214             :           .addImm(0)
    1215             :           .addMemOperand(MMO)
    1216          16 :           .add(predOps(ARMCC::AL));
    1217             :     } else
    1218           0 :       llvm_unreachable("Unknown reg class!");
    1219             :     break;
    1220         434 :   case 8:
    1221         868 :     if (ARM::DPRRegClass.hasSubClassEq(RC)) {
    1222         860 :       BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
    1223             :           .addFrameIndex(FI)
    1224             :           .addImm(0)
    1225             :           .addMemOperand(MMO)
    1226         430 :           .add(predOps(ARMCC::AL));
    1227           8 :     } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
    1228           4 :       MachineInstrBuilder MIB;
    1229             : 
    1230           4 :       if (Subtarget.hasV5TEOps()) {
    1231           4 :         MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
    1232           2 :         AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
    1233           2 :         AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
    1234           2 :         MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
    1235           2 :            .add(predOps(ARMCC::AL));
    1236             :       } else {
    1237             :         // Fallback to LDM instruction, which has existed since the dawn of
    1238             :         // time.
    1239           6 :         MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
    1240             :                   .addFrameIndex(FI)
    1241             :                   .addMemOperand(MMO)
    1242           2 :                   .add(predOps(ARMCC::AL));
    1243           2 :         MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
    1244           2 :         MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
    1245             :       }
    1246             : 
    1247           4 :       if (TargetRegisterInfo::isPhysicalRegister(DestReg))
    1248           0 :         MIB.addReg(DestReg, RegState::ImplicitDefine);
    1249             :     } else
    1250           0 :       llvm_unreachable("Unknown reg class!");
    1251             :     break;
    1252         387 :   case 16:
    1253         774 :     if (ARM::DPairRegClass.hasSubClassEq(RC)) {
    1254         387 :       if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
    1255         770 :         BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
    1256             :             .addFrameIndex(FI)
    1257             :             .addImm(16)
    1258             :             .addMemOperand(MMO)
    1259         385 :             .add(predOps(ARMCC::AL));
    1260             :       } else {
    1261           4 :         BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
    1262             :             .addFrameIndex(FI)
    1263             :             .addMemOperand(MMO)
    1264           2 :             .add(predOps(ARMCC::AL));
    1265             :       }
    1266             :     } else
    1267           0 :       llvm_unreachable("Unknown reg class!");
    1268             :     break;
    1269           1 :   case 24:
    1270           2 :     if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
    1271           1 :       if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
    1272           0 :         BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
    1273             :             .addFrameIndex(FI)
    1274             :             .addImm(16)
    1275             :             .addMemOperand(MMO)
    1276           0 :             .add(predOps(ARMCC::AL));
    1277             :       } else {
    1278           2 :         MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
    1279             :                                       .addFrameIndex(FI)
    1280             :                                       .addMemOperand(MMO)
    1281           1 :                                       .add(predOps(ARMCC::AL));
    1282           1 :         MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
    1283           1 :         MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
    1284           1 :         MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
    1285           1 :         if (TargetRegisterInfo::isPhysicalRegister(DestReg))
    1286           0 :           MIB.addReg(DestReg, RegState::ImplicitDefine);
    1287             :       }
    1288             :     } else
    1289           0 :       llvm_unreachable("Unknown reg class!");
    1290             :     break;
    1291           0 :    case 32:
    1292           0 :     if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
    1293           0 :       if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
    1294           0 :         BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
    1295             :             .addFrameIndex(FI)
    1296             :             .addImm(16)
    1297             :             .addMemOperand(MMO)
    1298           0 :             .add(predOps(ARMCC::AL));
    1299             :       } else {
    1300           0 :         MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
    1301             :                                       .addFrameIndex(FI)
    1302           0 :                                       .add(predOps(ARMCC::AL))
    1303           0 :                                       .addMemOperand(MMO);
    1304           0 :         MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
    1305           0 :         MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
    1306           0 :         MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
    1307           0 :         MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
    1308           0 :         if (TargetRegisterInfo::isPhysicalRegister(DestReg))
    1309           0 :           MIB.addReg(DestReg, RegState::ImplicitDefine);
    1310             :       }
    1311             :     } else
    1312           0 :       llvm_unreachable("Unknown reg class!");
    1313             :     break;
    1314           2 :   case 64:
    1315           4 :     if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
    1316           4 :       MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
    1317             :                                     .addFrameIndex(FI)
    1318           2 :                                     .add(predOps(ARMCC::AL))
    1319           2 :                                     .addMemOperand(MMO);
    1320           2 :       MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
    1321           2 :       MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
    1322           2 :       MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
    1323           2 :       MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
    1324           2 :       MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
    1325           2 :       MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
    1326           2 :       MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
    1327           2 :       MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
    1328           2 :       if (TargetRegisterInfo::isPhysicalRegister(DestReg))
    1329           0 :         MIB.addReg(DestReg, RegState::ImplicitDefine);
    1330             :     } else
    1331           0 :       llvm_unreachable("Unknown reg class!");
    1332           2 :     break;
    1333           0 :   default:
    1334           0 :     llvm_unreachable("Unknown regclass!");
    1335             :   }
    1336        2018 : }
    1337             : 
    1338       33403 : unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
    1339             :                                                int &FrameIndex) const {
    1340       66806 :   switch (MI.getOpcode()) {
    1341             :   default: break;
    1342         136 :   case ARM::LDRrs:
    1343             :   case ARM::t2LDRs:  // FIXME: don't use t2LDRs to access frame.
    1344         136 :     if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
    1345         136 :         MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
    1346           0 :         MI.getOperand(3).getImm() == 0) {
    1347           0 :       FrameIndex = MI.getOperand(1).getIndex();
    1348           0 :       return MI.getOperand(0).getReg();
    1349             :     }
    1350             :     break;
    1351        7927 :   case ARM::LDRi12:
    1352             :   case ARM::t2LDRi12:
    1353             :   case ARM::tLDRspi:
    1354             :   case ARM::VLDRD:
    1355             :   case ARM::VLDRS:
    1356       24778 :     if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
    1357        4462 :         MI.getOperand(2).getImm() == 0) {
    1358        2692 :       FrameIndex = MI.getOperand(1).getIndex();
    1359        2692 :       return MI.getOperand(0).getReg();
    1360             :     }
    1361             :     break;
    1362           8 :   case ARM::VLD1q64:
    1363             :   case ARM::VLD1d8TPseudo:
    1364             :   case ARM::VLD1d16TPseudo:
    1365             :   case ARM::VLD1d32TPseudo:
    1366             :   case ARM::VLD1d64TPseudo:
    1367             :   case ARM::VLD1d8QPseudo:
    1368             :   case ARM::VLD1d16QPseudo:
    1369             :   case ARM::VLD1d32QPseudo:
    1370             :   case ARM::VLD1d64QPseudo:
    1371          18 :     if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
    1372           2 :       FrameIndex = MI.getOperand(1).getIndex();
    1373           2 :       return MI.getOperand(0).getReg();
    1374             :     }
    1375             :     break;
    1376           0 :   case ARM::VLDMQIA:
    1377           0 :     if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
    1378           0 :       FrameIndex = MI.getOperand(1).getIndex();
    1379           0 :       return MI.getOperand(0).getReg();
    1380             :     }
    1381             :     break;
    1382             :   }
    1383             : 
    1384             :   return 0;
    1385             : }
    1386             : 
    1387      132727 : unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
    1388             :                                                      int &FrameIndex) const {
    1389             :   const MachineMemOperand *Dummy;
    1390      132727 :   return MI.mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
    1391             : }
    1392             : 
    1393             : /// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
    1394             : /// depending on whether the result is used.
    1395          56 : void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
    1396          56 :   bool isThumb1 = Subtarget.isThumb1Only();
    1397             :   bool isThumb2 = Subtarget.isThumb2();
    1398          56 :   const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
    1399             : 
    1400             :   DebugLoc dl = MI->getDebugLoc();
    1401          56 :   MachineBasicBlock *BB = MI->getParent();
    1402             : 
    1403          56 :   MachineInstrBuilder LDM, STM;
    1404          84 :   if (isThumb1 || !MI->getOperand(1).isDead()) {
    1405          41 :     MachineOperand LDWb(MI->getOperand(1));
    1406         116 :     LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
    1407             :                                                  : isThumb1 ? ARM::tLDMIA_UPD
    1408          82 :                                                             : ARM::LDMIA_UPD))
    1409             :               .add(LDWb);
    1410             :   } else {
    1411          45 :     LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
    1412             :   }
    1413             : 
    1414          84 :   if (isThumb1 || !MI->getOperand(0).isDead()) {
    1415          41 :     MachineOperand STWb(MI->getOperand(0));
    1416         116 :     STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
    1417             :                                                  : isThumb1 ? ARM::tSTMIA_UPD
    1418          82 :                                                             : ARM::STMIA_UPD))
    1419             :               .add(STWb);
    1420             :   } else {
    1421          45 :     STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
    1422             :   }
    1423             : 
    1424          56 :   MachineOperand LDBase(MI->getOperand(3));
    1425          56 :   LDM.add(LDBase).add(predOps(ARMCC::AL));
    1426             : 
    1427          56 :   MachineOperand STBase(MI->getOperand(2));
    1428          56 :   STM.add(STBase).add(predOps(ARMCC::AL));
    1429             : 
    1430             :   // Sort the scratch registers into ascending order.
    1431          56 :   const TargetRegisterInfo &TRI = getRegisterInfo();
    1432             :   SmallVector<unsigned, 6> ScratchRegs;
    1433         514 :   for(unsigned I = 5; I < MI->getNumOperands(); ++I)
    1434         458 :     ScratchRegs.push_back(MI->getOperand(I).getReg());
    1435             :   llvm::sort(ScratchRegs.begin(), ScratchRegs.end(),
    1436             :              [&TRI](const unsigned &Reg1,
    1437         173 :                     const unsigned &Reg2) -> bool {
    1438         173 :                return TRI.getEncodingValue(Reg1) <
    1439             :                       TRI.getEncodingValue(Reg2);
    1440             :              });
    1441             : 
    1442         514 :   for (const auto &Reg : ScratchRegs) {
    1443         229 :     LDM.addReg(Reg, RegState::Define);
    1444         229 :     STM.addReg(Reg, RegState::Kill);
    1445             :   }
    1446             : 
    1447          56 :   BB->erase(MI);
    1448          56 : }
    1449             : 
    1450       49807 : bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
    1451       99614 :   if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
    1452             :     assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() &&
    1453             :            "LOAD_STACK_GUARD currently supported only for MachO.");
    1454         120 :     expandLoadStackGuard(MI);
    1455         120 :     MI.getParent()->erase(MI);
    1456          60 :     return true;
    1457             :   }
    1458             : 
    1459       49747 :   if (MI.getOpcode() == ARM::MEMCPY) {
    1460          56 :     expandMEMCPY(MI);
    1461          56 :     return true;
    1462             :   }
    1463             : 
    1464             :   // This hook gets to expand COPY instructions before they become
    1465             :   // copyPhysReg() calls.  Look for VMOVS instructions that can legally be
    1466             :   // widened to VMOVD.  We prefer the VMOVD when possible because it may be
    1467             :   // changed into a VORR that can go down the NEON pipeline.
    1468       49691 :   if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || Subtarget.isFPOnlySP())
    1469             :     return false;
    1470             : 
    1471             :   // Look for a copy between even S-registers.  That is where we keep floats
    1472             :   // when using NEON v2f32 instructions for f32 arithmetic.
    1473       11470 :   unsigned DstRegS = MI.getOperand(0).getReg();
    1474       11470 :   unsigned SrcRegS = MI.getOperand(1).getReg();
    1475       22940 :   if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
    1476             :     return false;
    1477             : 
    1478        1333 :   const TargetRegisterInfo *TRI = &getRegisterInfo();
    1479             :   unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
    1480        1333 :                                               &ARM::DPRRegClass);
    1481             :   unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
    1482        1333 :                                               &ARM::DPRRegClass);
    1483        1333 :   if (!DstRegD || !SrcRegD)
    1484             :     return false;
    1485             : 
    1486             :   // We want to widen this into a DstRegD = VMOVD SrcRegD copy.  This is only
    1487             :   // legal if the COPY already defines the full DstRegD, and it isn't a
    1488             :   // sub-register insertion.
    1489         850 :   if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
    1490             :     return false;
    1491             : 
    1492             :   // A dead copy shouldn't show up here, but reject it just in case.
    1493          16 :   if (MI.getOperand(0).isDead())
    1494             :     return false;
    1495             : 
    1496             :   // All clear, widen the COPY.
    1497             :   LLVM_DEBUG(dbgs() << "widening:    " << MI);
    1498           8 :   MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
    1499             : 
    1500             :   // Get rid of the old implicit-def of DstRegD.  Leave it if it defines a Q-reg
    1501             :   // or some other super-register.
    1502           8 :   int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD);
    1503           8 :   if (ImpDefIdx != -1)
    1504           1 :     MI.RemoveOperand(ImpDefIdx);
    1505             : 
    1506             :   // Change the opcode and operands.
    1507           8 :   MI.setDesc(get(ARM::VMOVD));
    1508           8 :   MI.getOperand(0).setReg(DstRegD);
    1509          16 :   MI.getOperand(1).setReg(SrcRegD);
    1510           8 :   MIB.add(predOps(ARMCC::AL));
    1511             : 
    1512             :   // We are now reading SrcRegD instead of SrcRegS.  This may upset the
    1513             :   // register scavenger and machine verifier, so we need to indicate that we
    1514             :   // are reading an undefined value from SrcRegD, but a proper value from
    1515             :   // SrcRegS.
    1516           8 :   MI.getOperand(1).setIsUndef();
    1517           8 :   MIB.addReg(SrcRegS, RegState::Implicit);
    1518             : 
    1519             :   // SrcRegD may actually contain an unrelated value in the ssub_1
    1520             :   // sub-register.  Don't kill it.  Only kill the ssub_0 sub-register.
    1521          16 :   if (MI.getOperand(1).isKill()) {
    1522             :     MI.getOperand(1).setIsKill(false);
    1523           7 :     MI.addRegisterKilled(SrcRegS, TRI, true);
    1524             :   }
    1525             : 
    1526             :   LLVM_DEBUG(dbgs() << "replaced by: " << MI);
    1527             :   return true;
    1528             : }
    1529             : 
    1530             : /// Create a copy of a const pool value. Update CPI to the new index and return
    1531             : /// the label UID.
    1532           0 : static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
    1533           0 :   MachineConstantPool *MCP = MF.getConstantPool();
    1534           0 :   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
    1535             : 
    1536           0 :   const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
    1537             :   assert(MCPE.isMachineConstantPoolEntry() &&
    1538             :          "Expecting a machine constantpool entry!");
    1539           0 :   ARMConstantPoolValue *ACPV =
    1540             :     static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
    1541             : 
    1542             :   unsigned PCLabelId = AFI->createPICLabelUId();
    1543             :   ARMConstantPoolValue *NewCPV = nullptr;
    1544             : 
    1545             :   // FIXME: The below assumes PIC relocation model and that the function
    1546             :   // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
    1547             :   // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
    1548             :   // instructions, so that's probably OK, but is PIC always correct when
    1549             :   // we get here?
    1550           0 :   if (ACPV->isGlobalValue())
    1551           0 :     NewCPV = ARMConstantPoolConstant::Create(
    1552           0 :         cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
    1553           0 :         4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
    1554           0 :   else if (ACPV->isExtSymbol())
    1555           0 :     NewCPV = ARMConstantPoolSymbol::
    1556           0 :       Create(MF.getFunction().getContext(),
    1557             :              cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
    1558           0 :   else if (ACPV->isBlockAddress())
    1559           0 :     NewCPV = ARMConstantPoolConstant::
    1560           0 :       Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
    1561             :              ARMCP::CPBlockAddress, 4);
    1562           0 :   else if (ACPV->isLSDA())
    1563           0 :     NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId,
    1564             :                                              ARMCP::CPLSDA, 4);
    1565           0 :   else if (ACPV->isMachineBasicBlock())
    1566           0 :     NewCPV = ARMConstantPoolMBB::
    1567           0 :       Create(MF.getFunction().getContext(),
    1568             :              cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
    1569             :   else
    1570           0 :     llvm_unreachable("Unexpected ARM constantpool value type!!");
    1571           0 :   CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
    1572           0 :   return PCLabelId;
    1573             : }
    1574             : 
    1575        2830 : void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB,
    1576             :                                      MachineBasicBlock::iterator I,
    1577             :                                      unsigned DestReg, unsigned SubIdx,
    1578             :                                      const MachineInstr &Orig,
    1579             :                                      const TargetRegisterInfo &TRI) const {
    1580        2830 :   unsigned Opcode = Orig.getOpcode();
    1581        2830 :   switch (Opcode) {
    1582        2830 :   default: {
    1583        2830 :     MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
    1584        2830 :     MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
    1585             :     MBB.insert(I, MI);
    1586             :     break;
    1587             :   }
    1588           0 :   case ARM::tLDRpci_pic:
    1589             :   case ARM::t2LDRpci_pic: {
    1590           0 :     MachineFunction &MF = *MBB.getParent();
    1591           0 :     unsigned CPI = Orig.getOperand(1).getIndex();
    1592           0 :     unsigned PCLabelId = duplicateCPV(MF, CPI);
    1593             :     MachineInstrBuilder MIB =
    1594           0 :         BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
    1595             :             .addConstantPoolIndex(CPI)
    1596           0 :             .addImm(PCLabelId);
    1597           0 :     MIB->setMemRefs(Orig.memoperands_begin(), Orig.memoperands_end());
    1598             :     break;
    1599             :   }
    1600             :   }
    1601        2830 : }
    1602             : 
    1603             : MachineInstr &
    1604         334 : ARMBaseInstrInfo::duplicate(MachineBasicBlock &MBB,
    1605             :     MachineBasicBlock::iterator InsertBefore,
    1606             :     const MachineInstr &Orig) const {
    1607         334 :   MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
    1608         334 :   MachineBasicBlock::instr_iterator I = Cloned.getIterator();
    1609             :   for (;;) {
    1610         676 :     switch (I->getOpcode()) {
    1611           0 :     case ARM::tLDRpci_pic:
    1612             :     case ARM::t2LDRpci_pic: {
    1613           0 :       MachineFunction &MF = *MBB.getParent();
    1614           0 :       unsigned CPI = I->getOperand(1).getIndex();
    1615           0 :       unsigned PCLabelId = duplicateCPV(MF, CPI);
    1616           0 :       I->getOperand(1).setIndex(CPI);
    1617           0 :       I->getOperand(2).setImm(PCLabelId);
    1618             :       break;
    1619             :     }
    1620             :     }
    1621         338 :     if (!I->isBundledWithSucc())
    1622             :       break;
    1623             :     ++I;
    1624             :   }
    1625         334 :   return Cloned;
    1626             : }
    1627             : 
    1628         432 : bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0,
    1629             :                                         const MachineInstr &MI1,
    1630             :                                         const MachineRegisterInfo *MRI) const {
    1631         432 :   unsigned Opcode = MI0.getOpcode();
    1632         864 :   if (Opcode == ARM::t2LDRpci ||
    1633         432 :       Opcode == ARM::t2LDRpci_pic ||
    1634         862 :       Opcode == ARM::tLDRpci ||
    1635         431 :       Opcode == ARM::tLDRpci_pic ||
    1636         425 :       Opcode == ARM::LDRLIT_ga_pcrel ||
    1637             :       Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
    1638         840 :       Opcode == ARM::tLDRLIT_ga_pcrel ||
    1639         420 :       Opcode == ARM::MOV_ga_pcrel ||
    1640         830 :       Opcode == ARM::MOV_ga_pcrel_ldr ||
    1641         415 :       Opcode == ARM::t2MOV_ga_pcrel) {
    1642         128 :     if (MI1.getOpcode() != Opcode)
    1643             :       return false;
    1644          64 :     if (MI0.getNumOperands() != MI1.getNumOperands())
    1645             :       return false;
    1646             : 
    1647          64 :     const MachineOperand &MO0 = MI0.getOperand(1);
    1648          64 :     const MachineOperand &MO1 = MI1.getOperand(1);
    1649         192 :     if (MO0.getOffset() != MO1.getOffset())
    1650             :       return false;
    1651             : 
    1652         128 :     if (Opcode == ARM::LDRLIT_ga_pcrel ||
    1653          64 :         Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
    1654          64 :         Opcode == ARM::tLDRLIT_ga_pcrel ||
    1655          54 :         Opcode == ARM::MOV_ga_pcrel ||
    1656          53 :         Opcode == ARM::MOV_ga_pcrel_ldr ||
    1657             :         Opcode == ARM::t2MOV_ga_pcrel)
    1658             :       // Ignore the PC labels.
    1659          57 :       return MO0.getGlobal() == MO1.getGlobal();
    1660             : 
    1661           7 :     const MachineFunction *MF = MI0.getParent()->getParent();
    1662           7 :     const MachineConstantPool *MCP = MF->getConstantPool();
    1663           7 :     int CPI0 = MO0.getIndex();
    1664           7 :     int CPI1 = MO1.getIndex();
    1665           7 :     const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
    1666           7 :     const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
    1667           7 :     bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
    1668           7 :     bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
    1669           7 :     if (isARMCP0 && isARMCP1) {
    1670           1 :       ARMConstantPoolValue *ACPV0 =
    1671             :         static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
    1672           1 :       ARMConstantPoolValue *ACPV1 =
    1673             :         static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
    1674           1 :       return ACPV0->hasSameValue(ACPV1);
    1675           6 :     } else if (!isARMCP0 && !isARMCP1) {
    1676           6 :       return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
    1677             :     }
    1678             :     return false;
    1679         368 :   } else if (Opcode == ARM::PICLDR) {
    1680           0 :     if (MI1.getOpcode() != Opcode)
    1681             :       return false;
    1682           0 :     if (MI0.getNumOperands() != MI1.getNumOperands())
    1683             :       return false;
    1684             : 
    1685           0 :     unsigned Addr0 = MI0.getOperand(1).getReg();
    1686           0 :     unsigned Addr1 = MI1.getOperand(1).getReg();
    1687           0 :     if (Addr0 != Addr1) {
    1688           0 :       if (!MRI ||
    1689           0 :           !TargetRegisterInfo::isVirtualRegister(Addr0) ||
    1690             :           !TargetRegisterInfo::isVirtualRegister(Addr1))
    1691             :         return false;
    1692             : 
    1693             :       // This assumes SSA form.
    1694           0 :       MachineInstr *Def0 = MRI->getVRegDef(Addr0);
    1695           0 :       MachineInstr *Def1 = MRI->getVRegDef(Addr1);
    1696             :       // Check if the loaded value, e.g. a constantpool of a global address, are
    1697             :       // the same.
    1698           0 :       if (!produceSameValue(*Def0, *Def1, MRI))
    1699             :         return false;
    1700             :     }
    1701             : 
    1702           0 :     for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
    1703             :       // %12 = PICLDR %11, 0, 14, %noreg
    1704           0 :       const MachineOperand &MO0 = MI0.getOperand(i);
    1705           0 :       const MachineOperand &MO1 = MI1.getOperand(i);
    1706           0 :       if (!MO0.isIdenticalTo(MO1))
    1707             :         return false;
    1708             :     }
    1709             :     return true;
    1710             :   }
    1711             : 
    1712         368 :   return MI0.isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
    1713             : }
    1714             : 
    1715             : /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
    1716             : /// determine if two loads are loading from the same base address. It should
    1717             : /// only return true if the base pointers are the same and the only differences
    1718             : /// between the two addresses is the offset. It also returns the offsets by
    1719             : /// reference.
    1720             : ///
    1721             : /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
    1722             : /// is permanently disabled.
    1723      146654 : bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
    1724             :                                                int64_t &Offset1,
    1725             :                                                int64_t &Offset2) const {
    1726             :   // Don't worry about Thumb: just ARM and Thumb2.
    1727      146654 :   if (Subtarget.isThumb1Only()) return false;
    1728             : 
    1729      127612 :   if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
    1730             :     return false;
    1731             : 
    1732       91843 :   switch (Load1->getMachineOpcode()) {
    1733             :   default:
    1734             :     return false;
    1735             :   case ARM::LDRi12:
    1736             :   case ARM::LDRBi12:
    1737             :   case ARM::LDRD:
    1738             :   case ARM::LDRH:
    1739             :   case ARM::LDRSB:
    1740             :   case ARM::LDRSH:
    1741             :   case ARM::VLDRD:
    1742             :   case ARM::VLDRS:
    1743             :   case ARM::t2LDRi8:
    1744             :   case ARM::t2LDRBi8:
    1745             :   case ARM::t2LDRDi8:
    1746             :   case ARM::t2LDRSHi8:
    1747             :   case ARM::t2LDRi12:
    1748             :   case ARM::t2LDRBi12:
    1749             :   case ARM::t2LDRSHi12:
    1750             :     break;
    1751             :   }
    1752             : 
    1753       71384 :   switch (Load2->getMachineOpcode()) {
    1754             :   default:
    1755             :     return false;
    1756             :   case ARM::LDRi12:
    1757             :   case ARM::LDRBi12:
    1758             :   case ARM::LDRD:
    1759             :   case ARM::LDRH:
    1760             :   case ARM::LDRSB:
    1761             :   case ARM::LDRSH:
    1762             :   case ARM::VLDRD:
    1763             :   case ARM::VLDRS:
    1764             :   case ARM::t2LDRi8:
    1765             :   case ARM::t2LDRBi8:
    1766             :   case ARM::t2LDRSHi8:
    1767             :   case ARM::t2LDRi12:
    1768             :   case ARM::t2LDRBi12:
    1769             :   case ARM::t2LDRSHi12:
    1770             :     break;
    1771             :   }
    1772             : 
    1773             :   // Check if base addresses and chain operands match.
    1774       63942 :   if (Load1->getOperand(0) != Load2->getOperand(0) ||
    1775             :       Load1->getOperand(4) != Load2->getOperand(4))
    1776             :     return false;
    1777             : 
    1778             :   // Index should be Reg0.
    1779             :   if (Load1->getOperand(3) != Load2->getOperand(3))
    1780             :     return false;
    1781             : 
    1782             :   // Determine the offsets.
    1783             :   if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
    1784             :       isa<ConstantSDNode>(Load2->getOperand(1))) {
    1785       29600 :     Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
    1786       44400 :     Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
    1787       14800 :     return true;
    1788             :   }
    1789             : 
    1790             :   return false;
    1791             : }
    1792             : 
    1793             : /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
    1794             : /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
    1795             : /// be scheduled togther. On some targets if two loads are loading from
    1796             : /// addresses in the same cache line, it's better if they are scheduled
    1797             : /// together. This function takes two integers that represent the load offsets
    1798             : /// from the common base address. It returns true if it decides it's desirable
    1799             : /// to schedule the two loads together. "NumLoads" is the number of loads that
    1800             : /// have already been scheduled after Load1.
    1801             : ///
    1802             : /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
    1803             : /// is permanently disabled.
    1804        3172 : bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
    1805             :                                                int64_t Offset1, int64_t Offset2,
    1806             :                                                unsigned NumLoads) const {
    1807             :   // Don't worry about Thumb: just ARM and Thumb2.
    1808        3172 :   if (Subtarget.isThumb1Only()) return false;
    1809             : 
    1810             :   assert(Offset2 > Offset1);
    1811             : 
    1812        3172 :   if ((Offset2 - Offset1) / 8 > 64)
    1813             :     return false;
    1814             : 
    1815             :   // Check if the machine opcodes are different. If they are different
    1816             :   // then we consider them to not be of the same base address,
    1817             :   // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
    1818             :   // In this case, they are considered to be the same because they are different
    1819             :   // encoding forms of the same basic instruction.
    1820        3172 :   if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
    1821           3 :       !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
    1822             :          Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
    1823           2 :         (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
    1824             :          Load2->getMachineOpcode() == ARM::t2LDRBi8)))
    1825             :     return false;  // FIXME: overly conservative?
    1826             : 
    1827             :   // Four loads in a row should be sufficient.
    1828        3151 :   if (NumLoads >= 3)
    1829             :     return false;
    1830             : 
    1831        2665 :   return true;
    1832             : }
    1833             : 
    1834       95940 : bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
    1835             :                                             const MachineBasicBlock *MBB,
    1836             :                                             const MachineFunction &MF) const {
    1837             :   // Debug info is never a scheduling boundary. It's necessary to be explicit
    1838             :   // due to the special treatment of IT instructions below, otherwise a
    1839             :   // dbg_value followed by an IT will result in the IT instruction being
    1840             :   // considered a scheduling hazard, which is wrong. It should be the actual
    1841             :   // instruction preceding the dbg_value instruction(s), just like it is
    1842             :   // when debug info is not present.
    1843             :   if (MI.isDebugInstr())
    1844             :     return false;
    1845             : 
    1846             :   // Terminators and labels can't be scheduled around.
    1847       95814 :   if (MI.isTerminator() || MI.isPosition())
    1848             :     return true;
    1849             : 
    1850             :   // Treat the start of the IT block as a scheduling boundary, but schedule
    1851             :   // t2IT along with all instructions following it.
    1852             :   // FIXME: This is a big hammer. But the alternative is to add all potential
    1853             :   // true and anti dependencies to IT block instructions as implicit operands
    1854             :   // to the t2IT instruction. The added compile time and complexity does not
    1855             :   // seem worth it.
    1856             :   MachineBasicBlock::const_iterator I = MI;
    1857             :   // Make sure to skip any debug instructions
    1858       70109 :   while (++I != MBB->end() && I->isDebugInstr())
    1859             :     ;
    1860      139311 :   if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
    1861             :     return true;
    1862             : 
    1863             :   // Don't attempt to schedule around any instruction that defines
    1864             :   // a stack-oriented pointer, as it's unlikely to be profitable. This
    1865             :   // saves compile time, because it doesn't require every single
    1866             :   // stack slot reference to depend on the instruction that does the
    1867             :   // modification.
    1868             :   // Calls don't actually change the stack pointer, even if they have imp-defs.
    1869             :   // No ARM calling conventions change the stack pointer. (X86 calling
    1870             :   // conventions sometimes do).
    1871      140122 :   if (!MI.isCall() && MI.definesRegister(ARM::SP))
    1872             :     return true;
    1873             : 
    1874             :   return false;
    1875             : }
    1876             : 
    1877        1376 : bool ARMBaseInstrInfo::
    1878             : isProfitableToIfCvt(MachineBasicBlock &MBB,
    1879             :                     unsigned NumCycles, unsigned ExtraPredCycles,
    1880             :                     BranchProbability Probability) const {
    1881        1376 :   if (!NumCycles)
    1882             :     return false;
    1883             : 
    1884             :   // If we are optimizing for size, see if the branch in the predecessor can be
    1885             :   // lowered to cbn?z by the constant island lowering pass, and return false if
    1886             :   // so. This results in a shorter instruction sequence.
    1887        1376 :   if (MBB.getParent()->getFunction().optForSize()) {
    1888         127 :     MachineBasicBlock *Pred = *MBB.pred_begin();
    1889         127 :     if (!Pred->empty()) {
    1890             :       MachineInstr *LastMI = &*Pred->rbegin();
    1891         254 :       if (LastMI->getOpcode() == ARM::t2Bcc) {
    1892             :         MachineBasicBlock::iterator CmpMI = LastMI;
    1893         118 :         if (CmpMI != Pred->begin()) {
    1894             :           --CmpMI;
    1895         236 :           if (CmpMI->getOpcode() == ARM::tCMPi8 ||
    1896             :               CmpMI->getOpcode() == ARM::t2CMPri) {
    1897          85 :             unsigned Reg = CmpMI->getOperand(0).getReg();
    1898          85 :             unsigned PredReg = 0;
    1899          85 :             ARMCC::CondCodes P = getInstrPredicate(*CmpMI, PredReg);
    1900         168 :             if (P == ARMCC::AL && CmpMI->getOperand(1).getImm() == 0 &&
    1901             :                 isARMLowRegister(Reg))
    1902          43 :               return false;
    1903             :           }
    1904             :         }
    1905             :       }
    1906             :     }
    1907             :   }
    1908             :   return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
    1909        1333 :                              MBB, 0, 0, Probability);
    1910             : }
    1911             : 
    1912        1417 : bool ARMBaseInstrInfo::
    1913             : isProfitableToIfCvt(MachineBasicBlock &TBB,
    1914             :                     unsigned TCycles, unsigned TExtra,
    1915             :                     MachineBasicBlock &FBB,
    1916             :                     unsigned FCycles, unsigned FExtra,
    1917             :                     BranchProbability Probability) const {
    1918        1417 :   if (!TCycles)
    1919             :     return false;
    1920             : 
    1921             :   // Attempt to estimate the relative costs of predication versus branching.
    1922             :   // Here we scale up each component of UnpredCost to avoid precision issue when
    1923             :   // scaling TCycles/FCycles by Probability.
    1924             :   const unsigned ScalingUpFactor = 1024;
    1925             : 
    1926        1417 :   unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
    1927             :   unsigned UnpredCost;
    1928        1417 :   if (!Subtarget.hasBranchPredictor()) {
    1929             :     // When we don't have a branch predictor it's always cheaper to not take a
    1930             :     // branch than take it, so we have to take that into account.
    1931             :     unsigned NotTakenBranchCost = 1;
    1932          10 :     unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
    1933             :     unsigned TUnpredCycles, FUnpredCycles;
    1934          10 :     if (!FCycles) {
    1935             :       // Triangle: TBB is the fallthrough
    1936           7 :       TUnpredCycles = TCycles + NotTakenBranchCost;
    1937             :       FUnpredCycles = TakenBranchCost;
    1938             :     } else {
    1939             :       // Diamond: TBB is the block that is branched to, FBB is the fallthrough
    1940           3 :       TUnpredCycles = TCycles + TakenBranchCost;
    1941           3 :       FUnpredCycles = FCycles + NotTakenBranchCost;
    1942             :       // The branch at the end of FBB will disappear when it's predicated, so
    1943             :       // discount it from PredCost.
    1944           3 :       PredCost -= 1 * ScalingUpFactor;
    1945             :     }
    1946             :     // The total cost is the cost of each path scaled by their probabilites
    1947          10 :     unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
    1948          10 :     unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
    1949          10 :     UnpredCost = TUnpredCost + FUnpredCost;
    1950             :     // When predicating assume that the first IT can be folded away but later
    1951             :     // ones cost one cycle each
    1952          20 :     if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
    1953           4 :       PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
    1954             :     }
    1955             :   } else {
    1956        1407 :     unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
    1957             :     unsigned FUnpredCost =
    1958        1407 :       Probability.getCompl().scale(FCycles * ScalingUpFactor);
    1959        1407 :     UnpredCost = TUnpredCost + FUnpredCost;
    1960        1407 :     UnpredCost += 1 * ScalingUpFactor; // The branch itself
    1961        1407 :     UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
    1962             :   }
    1963             : 
    1964        1417 :   return PredCost <= UnpredCost;
    1965             : }
    1966             : 
    1967             : bool
    1968          55 : ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
    1969             :                                             MachineBasicBlock &FMBB) const {
    1970             :   // Reduce false anti-dependencies to let the target's out-of-order execution
    1971             :   // engine do its thing.
    1972          55 :   return Subtarget.isProfitableToUnpredicate();
    1973             : }
    1974             : 
    1975             : /// getInstrPredicate - If instruction is predicated, returns its predicate
    1976             : /// condition, otherwise returns AL. It also returns the condition code
    1977             : /// register by reference.
    1978       87798 : ARMCC::CondCodes llvm::getInstrPredicate(const MachineInstr &MI,
    1979             :                                          unsigned &PredReg) {
    1980       87798 :   int PIdx = MI.findFirstPredOperandIdx();
    1981       87798 :   if (PIdx == -1) {
    1982       13636 :     PredReg = 0;
    1983       13636 :     return ARMCC::AL;
    1984             :   }
    1985             : 
    1986      148324 :   PredReg = MI.getOperand(PIdx+1).getReg();
    1987      148324 :   return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
    1988             : }
    1989             : 
    1990           0 : unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {
    1991           0 :   if (Opc == ARM::B)
    1992             :     return ARM::Bcc;
    1993           0 :   if (Opc == ARM::tB)
    1994             :     return ARM::tBcc;
    1995           0 :   if (Opc == ARM::t2B)
    1996             :     return ARM::t2Bcc;
    1997             : 
    1998           0 :   llvm_unreachable("Unknown unconditional branch opcode!");
    1999             : }
    2000             : 
    2001        9991 : MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI,
    2002             :                                                        bool NewMI,
    2003             :                                                        unsigned OpIdx1,
    2004             :                                                        unsigned OpIdx2) const {
    2005       19982 :   switch (MI.getOpcode()) {
    2006         844 :   case ARM::MOVCCr:
    2007             :   case ARM::t2MOVCCr: {
    2008             :     // MOVCC can be commuted by inverting the condition.
    2009         844 :     unsigned PredReg = 0;
    2010         844 :     ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
    2011             :     // MOVCC AL can't be inverted. Shouldn't happen.
    2012         844 :     if (CC == ARMCC::AL || PredReg != ARM::CPSR)
    2013             :       return nullptr;
    2014             :     MachineInstr *CommutedMI =
    2015         844 :         TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
    2016         844 :     if (!CommutedMI)
    2017             :       return nullptr;
    2018             :     // After swapping the MOVCC operands, also invert the condition.
    2019         844 :     CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
    2020         844 :         .setImm(ARMCC::getOppositeCondition(CC));
    2021         844 :     return CommutedMI;
    2022             :   }
    2023             :   }
    2024        9147 :   return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
    2025             : }
    2026             : 
    2027             : /// Identify instructions that can be folded into a MOVCC instruction, and
    2028             : /// return the defining instruction.
    2029         731 : static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
    2030             :                                       const MachineRegisterInfo &MRI,
    2031             :                                       const TargetInstrInfo *TII) {
    2032         731 :   if (!TargetRegisterInfo::isVirtualRegister(Reg))
    2033             :     return nullptr;
    2034         731 :   if (!MRI.hasOneNonDBGUse(Reg))
    2035             :     return nullptr;
    2036         304 :   MachineInstr *MI = MRI.getVRegDef(Reg);
    2037         304 :   if (!MI)
    2038             :     return nullptr;
    2039             :   // MI is folded into the MOVCC by predicating it.
    2040         304 :   if (!MI->isPredicable())
    2041             :     return nullptr;
    2042             :   // Check if MI has any non-dead defs or physreg uses. This also detects
    2043             :   // predicated instructions which will be reading CPSR.
    2044         912 :   for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
    2045         780 :     const MachineOperand &MO = MI->getOperand(i);
    2046             :     // Reject frame index operands, PEI can't handle the predicated pseudos.
    2047         780 :     if (MO.isFI() || MO.isCPI() || MO.isJTI())
    2048             :       return nullptr;
    2049         733 :     if (!MO.isReg())
    2050             :       continue;
    2051             :     // MI can't have any tied operands, that would conflict with predication.
    2052         470 :     if (MO.isTied())
    2053             :       return nullptr;
    2054         880 :     if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
    2055             :       return nullptr;
    2056         429 :     if (MO.isDef() && !MO.isDead())
    2057             :       return nullptr;
    2058             :   }
    2059         132 :   bool DontMoveAcrossStores = true;
    2060         132 :   if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
    2061             :     return nullptr;
    2062             :   return MI;
    2063             : }
    2064             : 
    2065         394 : bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr &MI,
    2066             :                                      SmallVectorImpl<MachineOperand> &Cond,
    2067             :                                      unsigned &TrueOp, unsigned &FalseOp,
    2068             :                                      bool &Optimizable) const {
    2069             :   assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
    2070             :          "Unknown select instruction");
    2071             :   // MOVCC operands:
    2072             :   // 0: Def.
    2073             :   // 1: True use.
    2074             :   // 2: False use.
    2075             :   // 3: Condition code.
    2076             :   // 4: CPSR use.
    2077         394 :   TrueOp = 1;
    2078         394 :   FalseOp = 2;
    2079         788 :   Cond.push_back(MI.getOperand(3));
    2080         788 :   Cond.push_back(MI.getOperand(4));
    2081             :   // We can always fold a def.
    2082         394 :   Optimizable = true;
    2083         394 :   return false;
    2084             : }
    2085             : 
    2086             : MachineInstr *
    2087         394 : ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI,
    2088             :                                  SmallPtrSetImpl<MachineInstr *> &SeenMIs,
    2089             :                                  bool PreferFalse) const {
    2090             :   assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
    2091             :          "Unknown select instruction");
    2092         394 :   MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
    2093         394 :   MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this);
    2094             :   bool Invert = !DefMI;
    2095         394 :   if (!DefMI)
    2096         337 :     DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
    2097         394 :   if (!DefMI)
    2098             :     return nullptr;
    2099             : 
    2100             :   // Find new register class to use.
    2101         264 :   MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
    2102         132 :   unsigned DestReg = MI.getOperand(0).getReg();
    2103         132 :   const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
    2104         132 :   if (!MRI.constrainRegClass(DestReg, PreviousClass))
    2105             :     return nullptr;
    2106             : 
    2107             :   // Create a new predicated version of DefMI.
    2108             :   // Rfalse is the first use.
    2109             :   MachineInstrBuilder NewMI =
    2110         264 :       BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
    2111             : 
    2112             :   // Copy all the DefMI operands, excluding its (null) predicate.
    2113         132 :   const MCInstrDesc &DefDesc = DefMI->getDesc();
    2114         368 :   for (unsigned i = 1, e = DefDesc.getNumOperands();
    2115         368 :        i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
    2116         236 :     NewMI.add(DefMI->getOperand(i));
    2117             : 
    2118         132 :   unsigned CondCode = MI.getOperand(3).getImm();
    2119         132 :   if (Invert)
    2120          75 :     NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode)));
    2121             :   else
    2122             :     NewMI.addImm(CondCode);
    2123         132 :   NewMI.add(MI.getOperand(4));
    2124             : 
    2125             :   // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
    2126         132 :   if (NewMI->hasOptionalDef())
    2127         124 :     NewMI.add(condCodeOp());
    2128             : 
    2129             :   // The output register value when the predicate is false is an implicit
    2130             :   // register operand tied to the first def.
    2131             :   // The tie makes the register allocator ensure the FalseReg is allocated the
    2132             :   // same register as operand 0.
    2133             :   FalseReg.setImplicit();
    2134             :   NewMI.add(FalseReg);
    2135         132 :   NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
    2136             : 
    2137             :   // Update SeenMIs set: register newly created MI and erase removed DefMI.
    2138         132 :   SeenMIs.insert(NewMI);
    2139             :   SeenMIs.erase(DefMI);
    2140             : 
    2141             :   // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
    2142             :   // DefMI would be invalid when tranferred inside the loop.  Checking for a
    2143             :   // loop is expensive, but at least remove kill flags if they are in different
    2144             :   // BBs.
    2145         132 :   if (DefMI->getParent() != MI.getParent())
    2146          17 :     NewMI->clearKillInfo();
    2147             : 
    2148             :   // The caller will erase MI, but not DefMI.
    2149         132 :   DefMI->eraseFromParent();
    2150         132 :   return NewMI;
    2151             : }
    2152             : 
    2153             : /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
    2154             : /// instruction is encoded with an 'S' bit is determined by the optional CPSR
    2155             : /// def operand.
    2156             : ///
    2157             : /// This will go away once we can teach tblgen how to set the optional CPSR def
    2158             : /// operand itself.
    2159             : struct AddSubFlagsOpcodePair {
    2160             :   uint16_t PseudoOpc;
    2161             :   uint16_t MachineOpc;
    2162             : };
    2163             : 
    2164             : static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
    2165             :   {ARM::ADDSri, ARM::ADDri},
    2166             :   {ARM::ADDSrr, ARM::ADDrr},
    2167             :   {ARM::ADDSrsi, ARM::ADDrsi},
    2168             :   {ARM::ADDSrsr, ARM::ADDrsr},
    2169             : 
    2170             :   {ARM::SUBSri, ARM::SUBri},
    2171             :   {ARM::SUBSrr, ARM::SUBrr},
    2172             :   {ARM::SUBSrsi, ARM::SUBrsi},
    2173             :   {ARM::SUBSrsr, ARM::SUBrsr},
    2174             : 
    2175             :   {ARM::RSBSri, ARM::RSBri},
    2176             :   {ARM::RSBSrsi, ARM::RSBrsi},
    2177             :   {ARM::RSBSrsr, ARM::RSBrsr},
    2178             : 
    2179             :   {ARM::tADDSi3, ARM::tADDi3},
    2180             :   {ARM::tADDSi8, ARM::tADDi8},
    2181             :   {ARM::tADDSrr, ARM::tADDrr},
    2182             :   {ARM::tADCS, ARM::tADC},
    2183             : 
    2184             :   {ARM::tSUBSi3, ARM::tSUBi3},
    2185             :   {ARM::tSUBSi8, ARM::tSUBi8},
    2186             :   {ARM::tSUBSrr, ARM::tSUBrr},
    2187             :   {ARM::tSBCS, ARM::tSBC},
    2188             : 
    2189             :   {ARM::t2ADDSri, ARM::t2ADDri},
    2190             :   {ARM::t2ADDSrr, ARM::t2ADDrr},
    2191             :   {ARM::t2ADDSrs, ARM::t2ADDrs},
    2192             : 
    2193             :   {ARM::t2SUBSri, ARM::t2SUBri},
    2194             :   {ARM::t2SUBSrr, ARM::t2SUBrr},
    2195             :   {ARM::t2SUBSrs, ARM::t2SUBrs},
    2196             : 
    2197             :   {ARM::t2RSBSri, ARM::t2RSBri},
    2198             :   {ARM::t2RSBSrs, ARM::t2RSBrs},
    2199             : };
    2200             : 
    2201     1297145 : unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
    2202    71323737 :   for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i)
    2203    35013919 :     if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc)
    2204         623 :       return AddSubFlagsOpcodeMap[i].MachineOpc;
    2205             :   return 0;
    2206             : }
    2207             : 
    2208        2917 : void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
    2209             :                                    MachineBasicBlock::iterator &MBBI,
    2210             :                                    const DebugLoc &dl, unsigned DestReg,
    2211             :                                    unsigned BaseReg, int NumBytes,
    2212             :                                    ARMCC::CondCodes Pred, unsigned PredReg,
    2213             :                                    const ARMBaseInstrInfo &TII,
    2214             :                                    unsigned MIFlags) {
    2215        2917 :   if (NumBytes == 0 && DestReg != BaseReg) {
    2216         912 :     BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
    2217         304 :         .addReg(BaseReg, RegState::Kill)
    2218         304 :         .add(predOps(Pred, PredReg))
    2219         304 :         .add(condCodeOp())
    2220             :         .setMIFlags(MIFlags);
    2221         304 :     return;
    2222             :   }
    2223             : 
    2224             :   bool isSub = NumBytes < 0;
    2225        2613 :   if (isSub) NumBytes = -NumBytes;
    2226             : 
    2227        7951 :   while (NumBytes) {
    2228        2669 :     unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
    2229        2669 :     unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
    2230             :     assert(ThisVal && "Didn't extract field correctly");
    2231             : 
    2232             :     // We will handle these bits from offset, clear them.
    2233        2669 :     NumBytes &= ~ThisVal;
    2234             : 
    2235             :     assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
    2236             : 
    2237             :     // Build the new ADD / SUB.
    2238        2669 :     unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
    2239        8007 :     BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
    2240        2669 :         .addReg(BaseReg, RegState::Kill)
    2241        2669 :         .addImm(ThisVal)
    2242        2669 :         .add(predOps(Pred, PredReg))
    2243        2669 :         .add(condCodeOp())
    2244             :         .setMIFlags(MIFlags);
    2245             :     BaseReg = DestReg;
    2246             :   }
    2247             : }
    2248             : 
    2249        3012 : bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
    2250             :                                       MachineFunction &MF, MachineInstr *MI,
    2251             :                                       unsigned NumBytes) {
    2252             :   // This optimisation potentially adds lots of load and store
    2253             :   // micro-operations, it's only really a great benefit to code-size.
    2254        6024 :   if (!MF.getFunction().optForMinSize())
    2255             :     return false;
    2256             : 
    2257             :   // If only one register is pushed/popped, LLVM can use an LDR/STR
    2258             :   // instead. We can't modify those so make sure we're dealing with an
    2259             :   // instruction we understand.
    2260         170 :   bool IsPop = isPopOpcode(MI->getOpcode());
    2261             :   bool IsPush = isPushOpcode(MI->getOpcode());
    2262          85 :   if (!IsPush && !IsPop)
    2263             :     return false;
    2264             : 
    2265          81 :   bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
    2266             :                       MI->getOpcode() == ARM::VLDMDIA_UPD;
    2267          70 :   bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
    2268         149 :                      MI->getOpcode() == ARM::tPOP ||
    2269             :                      MI->getOpcode() == ARM::tPOP_RET;
    2270             : 
    2271             :   assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
    2272             :                           MI->getOperand(1).getReg() == ARM::SP)) &&
    2273             :          "trying to fold sp update into non-sp-updating push/pop");
    2274             : 
    2275             :   // The VFP push & pop act on D-registers, so we can only fold an adjustment
    2276             :   // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
    2277             :   // if this is violated.
    2278          81 :   if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
    2279             :     return false;
    2280             : 
    2281             :   // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
    2282             :   // pred) so the list starts at 4. Thumb1 starts after the predicate.
    2283          79 :   int RegListIdx = IsT1PushPop ? 2 : 4;
    2284             : 
    2285             :   // Calculate the space we'll need in terms of registers.
    2286             :   unsigned RegsNeeded;
    2287             :   const TargetRegisterClass *RegClass;
    2288          79 :   if (IsVFPPushPop) {
    2289          18 :     RegsNeeded = NumBytes / 8;
    2290             :     RegClass = &ARM::DPRRegClass;
    2291             :   } else {
    2292          61 :     RegsNeeded = NumBytes / 4;
    2293             :     RegClass = &ARM::GPRRegClass;
    2294             :   }
    2295             : 
    2296             :   // We're going to have to strip all list operands off before
    2297             :   // re-adding them since the order matters, so save the existing ones
    2298             :   // for later.
    2299             :   SmallVector<MachineOperand, 4> RegList;
    2300             : 
    2301             :   // We're also going to need the first register transferred by this
    2302             :   // instruction, which won't necessarily be the first register in the list.
    2303             :   unsigned FirstRegEnc = -1;
    2304             : 
    2305          79 :   const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo();
    2306         328 :   for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
    2307         249 :     MachineOperand &MO = MI->getOperand(i);
    2308         249 :     RegList.push_back(MO);
    2309             : 
    2310         498 :     if (MO.isReg() && TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
    2311             :       FirstRegEnc = TRI->getEncodingValue(MO.getReg());
    2312             :   }
    2313             : 
    2314          79 :   const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
    2315             : 
    2316             :   // Now try to find enough space in the reglist to allocate NumBytes.
    2317         392 :   for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
    2318             :        --CurRegEnc) {
    2319         317 :     unsigned CurReg = RegClass->getRegister(CurRegEnc);
    2320         473 :     if (!IsPop) {
    2321             :       // Pushing any register is completely harmless, mark the register involved
    2322             :       // as undef since we don't care about its value and must not restore it
    2323             :       // during stack unwinding.
    2324         156 :       RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
    2325             :                                                   false, false, true));
    2326         156 :       --RegsNeeded;
    2327         156 :       continue;
    2328             :     }
    2329             : 
    2330             :     // However, we can only pop an extra register if it's not live. For
    2331             :     // registers live within the function we might clobber a return value
    2332             :     // register; the other way a register can be live here is if it's
    2333             :     // callee-saved.
    2334         267 :     if (isCalleeSavedRegister(CurReg, CSRegs) ||
    2335         373 :         MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
    2336             :         MachineBasicBlock::LQR_Dead) {
    2337             :       // VFP pops don't allow holes in the register list, so any skip is fatal
    2338             :       // for our transformation. GPR pops do, so we should just keep looking.
    2339          59 :       if (IsVFPPushPop)
    2340             :         return false;
    2341             :       else
    2342          55 :         continue;
    2343             :     }
    2344             : 
    2345             :     // Mark the unimportant registers as <def,dead> in the POP.
    2346         102 :     RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
    2347             :                                                 true));
    2348         102 :     --RegsNeeded;
    2349             :   }
    2350             : 
    2351          75 :   if (RegsNeeded > 0)
    2352             :     return false;
    2353             : 
    2354             :   // Finally we know we can profitably perform the optimisation so go
    2355             :   // ahead: strip all existing registers off and add them back again
    2356             :   // in the right order.
    2357         203 :   for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
    2358         152 :     MI->RemoveOperand(i);
    2359             : 
    2360             :   // Add the complete list back in.
    2361             :   MachineInstrBuilder MIB(MF, &*MI);
    2362         354 :   for (int i = RegList.size() - 1; i >= 0; --i)
    2363         303 :     MIB.add(RegList[i]);
    2364             : 
    2365             :   return true;
    2366             : }
    2367             : 
    2368        7940 : bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
    2369             :                                 unsigned FrameReg, int &Offset,
    2370             :                                 const ARMBaseInstrInfo &TII) {
    2371        7940 :   unsigned Opcode = MI.getOpcode();
    2372             :   const MCInstrDesc &Desc = MI.getDesc();
    2373        7940 :   unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
    2374             :   bool isSub = false;
    2375             : 
    2376             :   // Memory operands in inline assembly always use AddrMode2.
    2377        7940 :   if (Opcode == ARM::INLINEASM)
    2378             :     AddrMode = ARMII::AddrMode2;
    2379             : 
    2380        7940 :   if (Opcode == ARM::ADDri) {
    2381        2508 :     Offset += MI.getOperand(FrameRegIdx+1).getImm();
    2382        1254 :     if (Offset == 0) {
    2383             :       // Turn it into a move.
    2384         159 :       MI.setDesc(TII.get(ARM::MOVr));
    2385         159 :       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
    2386         159 :       MI.RemoveOperand(FrameRegIdx+1);
    2387         159 :       Offset = 0;
    2388         159 :       return true;
    2389        1095 :     } else if (Offset < 0) {
    2390          51 :       Offset = -Offset;
    2391             :       isSub = true;
    2392          51 :       MI.setDesc(TII.get(ARM::SUBri));
    2393             :     }
    2394             : 
    2395             :     // Common case: small offset, fits into instruction.
    2396        1095 :     if (ARM_AM::getSOImmVal(Offset) != -1) {
    2397             :       // Replace the FrameIndex with sp / fp
    2398         773 :       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
    2399        1546 :       MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
    2400         773 :       Offset = 0;
    2401         773 :       return true;
    2402             :     }
    2403             : 
    2404             :     // Otherwise, pull as much of the immedidate into this ADDri/SUBri
    2405             :     // as possible.
    2406         322 :     unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
    2407         322 :     unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
    2408             : 
    2409             :     // We will handle these bits from offset, clear them.
    2410         322 :     Offset &= ~ThisImmVal;
    2411             : 
    2412             :     // Get the properly encoded SOImmVal field.
    2413             :     assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
    2414             :            "Bit extraction didn't work?");
    2415         322 :     MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
    2416             :  } else {
    2417             :     unsigned ImmIdx = 0;
    2418             :     int InstrOffs = 0;
    2419             :     unsigned NumBits = 0;
    2420             :     unsigned Scale = 1;
    2421        6686 :     switch (AddrMode) {
    2422        5955 :     case ARMII::AddrMode_i12:
    2423        5955 :       ImmIdx = FrameRegIdx + 1;
    2424       11910 :       InstrOffs = MI.getOperand(ImmIdx).getImm();
    2425             :       NumBits = 12;
    2426        5955 :       break;
    2427           0 :     case ARMII::AddrMode2:
    2428           0 :       ImmIdx = FrameRegIdx+2;
    2429           0 :       InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
    2430             :       if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
    2431           0 :         InstrOffs *= -1;
    2432             :       NumBits = 12;
    2433             :       break;
    2434          70 :     case ARMII::AddrMode3:
    2435          70 :       ImmIdx = FrameRegIdx+2;
    2436         210 :       InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
    2437             :       if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
    2438           0 :         InstrOffs *= -1;
    2439             :       NumBits = 8;
    2440             :       break;
    2441             :     case ARMII::AddrMode4:
    2442             :     case ARMII::AddrMode6:
    2443             :       // Can't fold any offset even if it's zero.
    2444             :       return false;
    2445         610 :     case ARMII::AddrMode5:
    2446         610 :       ImmIdx = FrameRegIdx+1;
    2447        1830 :       InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
    2448             :       if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
    2449           0 :         InstrOffs *= -1;
    2450             :       NumBits = 8;
    2451             :       Scale = 4;
    2452             :       break;
    2453          22 :     case ARMII::AddrMode5FP16:
    2454          22 :       ImmIdx = FrameRegIdx+1;
    2455          66 :       InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
    2456             :       if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
    2457           0 :         InstrOffs *= -1;
    2458             :       NumBits = 8;
    2459             :       Scale = 2;
    2460             :       break;
    2461           0 :     default:
    2462           0 :       llvm_unreachable("Unsupported addressing mode!");
    2463             :     }
    2464             : 
    2465        6657 :     Offset += InstrOffs * Scale;
    2466             :     assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
    2467        6657 :     if (Offset < 0) {
    2468         215 :       Offset = -Offset;
    2469             :       isSub = true;
    2470             :     }
    2471             : 
    2472             :     // Attempt to fold address comp. if opcode has offset bits
    2473             :     if (NumBits > 0) {
    2474             :       // Common case: small offset, fits into instruction.
    2475        6657 :       MachineOperand &ImmOp = MI.getOperand(ImmIdx);
    2476        6657 :       int ImmedOffset = Offset / Scale;
    2477        6657 :       unsigned Mask = (1 << NumBits) - 1;
    2478        6657 :       if ((unsigned)Offset <= Mask * Scale) {
    2479             :         // Replace the FrameIndex with sp
    2480        6611 :         MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
    2481             :         // FIXME: When addrmode2 goes away, this will simplify (like the
    2482             :         // T2 version), as the LDR.i12 versions don't need the encoding
    2483             :         // tricks for the offset value.
    2484        6611 :         if (isSub) {
    2485         215 :           if (AddrMode == ARMII::AddrMode_i12)
    2486         169 :             ImmedOffset = -ImmedOffset;
    2487             :           else
    2488          46 :             ImmedOffset |= 1 << NumBits;
    2489             :         }
    2490        6611 :         ImmOp.ChangeToImmediate(ImmedOffset);
    2491        6611 :         Offset = 0;
    2492        6611 :         return true;
    2493             :       }
    2494             : 
    2495             :       // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
    2496          46 :       ImmedOffset = ImmedOffset & Mask;
    2497          46 :       if (isSub) {
    2498           0 :         if (AddrMode == ARMII::AddrMode_i12)
    2499           0 :           ImmedOffset = -ImmedOffset;
    2500             :         else
    2501           0 :           ImmedOffset |= 1 << NumBits;
    2502             :       }
    2503          46 :       ImmOp.ChangeToImmediate(ImmedOffset);
    2504          46 :       Offset &= ~(Mask*Scale);
    2505             :     }
    2506             :   }
    2507             : 
    2508         368 :   Offset = (isSub) ? -Offset : Offset;
    2509         368 :   return Offset == 0;
    2510             : }
    2511             : 
    2512             : /// analyzeCompare - For a comparison instruction, return the source registers
    2513             : /// in SrcReg and SrcReg2 if having two register operands, and the value it
    2514             : /// compares against in CmpValue. Return true if the comparison instruction
    2515             : /// can be analyzed.
    2516       35596 : bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
    2517             :                                       unsigned &SrcReg2, int &CmpMask,
    2518             :                                       int &CmpValue) const {
    2519       71192 :   switch (MI.getOpcode()) {
    2520             :   default: break;
    2521        4598 :   case ARM::CMPri:
    2522             :   case ARM::t2CMPri:
    2523             :   case ARM::tCMPi8:
    2524        4598 :     SrcReg = MI.getOperand(0).getReg();
    2525        4598 :     SrcReg2 = 0;
    2526        4598 :     CmpMask = ~0;
    2527        4598 :     CmpValue = MI.getOperand(1).getImm();
    2528        4598 :     return true;
    2529         770 :   case ARM::CMPrr:
    2530             :   case ARM::t2CMPrr:
    2531         770 :     SrcReg = MI.getOperand(0).getReg();
    2532         770 :     SrcReg2 = MI.getOperand(1).getReg();
    2533         770 :     CmpMask = ~0;
    2534         770 :     CmpValue = 0;
    2535         770 :     return true;
    2536         265 :   case ARM::TSTri:
    2537             :   case ARM::t2TSTri:
    2538         265 :     SrcReg = MI.getOperand(0).getReg();
    2539         265 :     SrcReg2 = 0;
    2540         265 :     CmpMask = MI.getOperand(1).getImm();
    2541         265 :     CmpValue = 0;
    2542         265 :     return true;
    2543             :   }
    2544             : 
    2545             :   return false;
    2546             : }
    2547             : 
    2548             : /// isSuitableForMask - Identify a suitable 'and' instruction that
    2549             : /// operates on the given source register and applies the same mask
    2550             : /// as a 'tst' instruction. Provide a limited look-through for copies.
    2551             : /// When successful, MI will hold the found instruction.
    2552             : static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
    2553             :                               int CmpMask, bool CommonUse) {
    2554         188 :   switch (MI->getOpcode()) {
    2555           0 :     case ARM::ANDri:
    2556             :     case ARM::t2ANDri:
    2557           0 :       if (CmpMask != MI->getOperand(2).getImm())
    2558             :         return false;
    2559           0 :       if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
    2560             :         return true;
    2561             :       break;
    2562             :   }
    2563             : 
    2564             :   return false;
    2565             : }
    2566             : 
    2567             : /// getSwappedCondition - assume the flags are set by MI(a,b), return
    2568             : /// the condition code if we modify the instructions such that flags are
    2569             : /// set by MI(b,a).
    2570             : inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) {
    2571             :   switch (CC) {
    2572             :   default: return ARMCC::AL;
    2573             :   case ARMCC::EQ: return ARMCC::EQ;
    2574             :   case ARMCC::NE: return ARMCC::NE;
    2575             :   case ARMCC::HS: return ARMCC::LS;
    2576             :   case ARMCC::LO: return ARMCC::HI;
    2577             :   case ARMCC::HI: return ARMCC::LO;
    2578             :   case ARMCC::LS: return ARMCC::HS;
    2579             :   case ARMCC::GE: return ARMCC::LE;
    2580             :   case ARMCC::LT: return ARMCC::GT;
    2581             :   case ARMCC::GT: return ARMCC::LT;
    2582             :   case ARMCC::LE: return ARMCC::GE;
    2583             :   }
    2584             : }
    2585             : 
    2586             : /// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
    2587             : /// the condition code if we modify the instructions such that flags are
    2588             : /// set by ADD(a,b,X).
    2589             : inline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) {
    2590             :   switch (CC) {
    2591             :   default: return ARMCC::AL;
    2592             :   case ARMCC::HS: return ARMCC::LO;
    2593             :   case ARMCC::LO: return ARMCC::HS;
    2594             :   case ARMCC::VS: return ARMCC::VS;
    2595             :   case ARMCC::VC: return ARMCC::VC;
    2596             :   }
    2597             : }
    2598             : 
    2599             : /// isRedundantFlagInstr - check whether the first instruction, whose only
    2600             : /// purpose is to update flags, can be made redundant.
    2601             : /// CMPrr can be made redundant by SUBrr if the operands are the same.
    2602             : /// CMPri can be made redundant by SUBri if the operands are the same.
    2603             : /// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
    2604             : /// This function can be extended later on.
    2605        5154 : inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
    2606             :                                         unsigned SrcReg, unsigned SrcReg2,
    2607             :                                         int ImmValue, const MachineInstr *OI) {
    2608        9821 :   if ((CmpI->getOpcode() == ARM::CMPrr ||
    2609        1158 :        CmpI->getOpcode() == ARM::t2CMPrr) &&
    2610        2284 :       (OI->getOpcode() == ARM::SUBrr ||
    2611        5202 :        OI->getOpcode() == ARM::t2SUBrr) &&
    2612          64 :       ((OI->getOperand(1).getReg() == SrcReg &&
    2613          48 :         OI->getOperand(2).getReg() == SrcReg2) ||
    2614          14 :        (OI->getOperand(1).getReg() == SrcReg2 &&
    2615          14 :         OI->getOperand(2).getReg() == SrcReg)))
    2616             :     return true;
    2617             : 
    2618        3662 :   if ((CmpI->getOpcode() == ARM::CMPri ||
    2619        3414 :        CmpI->getOpcode() == ARM::t2CMPri) &&
    2620        6705 :       (OI->getOpcode() == ARM::SUBri ||
    2621         207 :        OI->getOpcode() == ARM::t2SUBri) &&
    2622        5341 :       OI->getOperand(1).getReg() == SrcReg &&
    2623          10 :       OI->getOperand(2).getImm() == ImmValue)
    2624             :     return true;
    2625             : 
    2626        5784 :   if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
    2627        2244 :       (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
    2628        1138 :        OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
    2629         178 :       OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
    2630        5199 :       OI->getOperand(0).getReg() == SrcReg &&
    2631          28 :       OI->getOperand(1).getReg() == SrcReg2)
    2632             :     return true;
    2633             :   return false;
    2634             : }
    2635             : 
    2636        1381 : static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
    2637        2762 :   switch (MI->getOpcode()) {
    2638             :   default: return false;
    2639          61 :   case ARM::tLSLri:
    2640             :   case ARM::tLSRri:
    2641             :   case ARM::tLSLrr:
    2642             :   case ARM::tLSRrr:
    2643             :   case ARM::tSUBrr:
    2644             :   case ARM::tADDrr:
    2645             :   case ARM::tADDi3:
    2646             :   case ARM::tADDi8:
    2647             :   case ARM::tSUBi3:
    2648             :   case ARM::tSUBi8:
    2649             :   case ARM::tMUL:
    2650          61 :     IsThumb1 = true;
    2651             :     LLVM_FALLTHROUGH;
    2652             :   case ARM::RSBrr:
    2653             :   case ARM::RSBri:
    2654             :   case ARM::RSCrr:
    2655             :   case ARM::RSCri:
    2656             :   case ARM::ADDrr:
    2657             :   case ARM::ADDri:
    2658             :   case ARM::ADCrr:
    2659             :   case ARM::ADCri:
    2660             :   case ARM::SUBrr:
    2661             :   case ARM::SUBri:
    2662             :   case ARM::SBCrr:
    2663             :   case ARM::SBCri:
    2664             :   case ARM::t2RSBri:
    2665             :   case ARM::t2ADDrr:
    2666             :   case ARM::t2ADDri:
    2667             :   case ARM::t2ADCrr:
    2668             :   case ARM::t2ADCri:
    2669             :   case ARM::t2SUBrr:
    2670             :   case ARM::t2SUBri:
    2671             :   case ARM::t2SBCrr:
    2672             :   case ARM::t2SBCri:
    2673             :   case ARM::ANDrr:
    2674             :   case ARM::ANDri:
    2675             :   case ARM::t2ANDrr:
    2676             :   case ARM::t2ANDri:
    2677             :   case ARM::ORRrr:
    2678             :   case ARM::ORRri:
    2679             :   case ARM::t2ORRrr:
    2680             :   case ARM::t2ORRri:
    2681             :   case ARM::EORrr:
    2682             :   case ARM::EORri:
    2683             :   case ARM::t2EORrr:
    2684             :   case ARM::t2EORri:
    2685             :   case ARM::t2LSRri:
    2686             :   case ARM::t2LSRrr:
    2687             :   case ARM::t2LSLri:
    2688             :   case ARM::t2LSLrr:
    2689             :     return true;
    2690             :   }
    2691             : }
    2692             : 
    2693             : /// optimizeCompareInstr - Convert the instruction supplying the argument to the
    2694             : /// comparison into one that sets the zero bit in the flags register;
    2695             : /// Remove a redundant Compare instruction if an earlier instruction can set the
    2696             : /// flags in the same way as Compare.
    2697             : /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
    2698             : /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
    2699             : /// condition code of instructions which use the flags.
    2700        2357 : bool ARMBaseInstrInfo::optimizeCompareInstr(
    2701             :     MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
    2702             :     int CmpValue, const MachineRegisterInfo *MRI) const {
    2703             :   // Get the unique definition of SrcReg.
    2704        2357 :   MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
    2705        2357 :   if (!MI) return false;
    2706             : 
    2707             :   // Masked compares sometimes use the same register as the corresponding 'and'.
    2708        2357 :   if (CmpMask != ~0) {
    2709           0 :     if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) {
    2710             :       MI = nullptr;
    2711          65 :       for (MachineRegisterInfo::use_instr_iterator
    2712          46 :            UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
    2713         111 :            UI != UE; ++UI) {
    2714          65 :         if (UI->getParent() != CmpInstr.getParent())
    2715          17 :           continue;
    2716             :         MachineInstr *PotentialAND = &*UI;
    2717           0 :         if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
    2718           0 :             isPredicated(*PotentialAND))
    2719          48 :           continue;
    2720             :         MI = PotentialAND;
    2721             :         break;
    2722             :       }
    2723          46 :       if (!MI) return false;
    2724             :     }
    2725             :   }
    2726             : 
    2727             :   // Get ready to iterate backward from CmpInstr.
    2728             :   MachineBasicBlock::iterator I = CmpInstr, E = MI,
    2729        2311 :                               B = CmpInstr.getParent()->begin();
    2730             : 
    2731             :   // Early exit if CmpInstr is at the beginning of the BB.
    2732        2311 :   if (I == B) return false;
    2733             : 
    2734             :   // There are two possible candidates which can be changed to set CPSR:
    2735             :   // One is MI, the other is a SUB or ADD instruction.
    2736             :   // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
    2737             :   // ADDr[ri](r1, r2, X).
    2738             :   // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
    2739             :   MachineInstr *SubAdd = nullptr;
    2740        2156 :   if (SrcReg2 != 0)
    2741             :     // MI is not a candidate for CMPrr.
    2742             :     MI = nullptr;
    2743        1780 :   else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
    2744             :     // Conservatively refuse to convert an instruction which isn't in the same
    2745             :     // BB as the comparison.
    2746             :     // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
    2747             :     // Thus we cannot return here.
    2748         798 :     if (CmpInstr.getOpcode() == ARM::CMPri ||
    2749             :         CmpInstr.getOpcode() == ARM::t2CMPri)
    2750             :       MI = nullptr;
    2751             :     else
    2752             :       return false;
    2753             :   }
    2754             : 
    2755         347 :   bool IsThumb1 = false;
    2756        1381 :   if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
    2757             :     return false;
    2758             : 
    2759             :   // We also want to do this peephole for cases like this: if (a*b == 0),
    2760             :   // and optimise away the CMP instruction from the generated code sequence:
    2761             :   // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
    2762             :   // resulting from the select instruction, but these MOVS instructions for
    2763             :   // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
    2764             :   // However, if we only have MOVS instructions in between the CMP and the
    2765             :   // other instruction (the MULS in this example), then the CPSR is dead so we
    2766             :   // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
    2767             :   // reordering and then continue the analysis hoping we can eliminate the
    2768             :   // CMP. This peephole works on the vregs, so is still in SSA form. As a
    2769             :   // consequence, the movs won't redefine/kill the MUL operands which would
    2770             :   // make this reordering illegal.
    2771        1060 :   if (MI && IsThumb1) {
    2772             :     --I;
    2773             :     bool CanReorder = true;
    2774             :     const bool HasStmts = I != E;
    2775          65 :     for (; I != E; --I) {
    2776          14 :       if (I->getOpcode() != ARM::tMOVi8) {
    2777             :         CanReorder = false;
    2778             :         break;
    2779             :       }
    2780             :     }
    2781          61 :     if (HasStmts && CanReorder) {
    2782           2 :       MI = MI->removeFromParent();
    2783             :       E = CmpInstr;
    2784           2 :       CmpInstr.getParent()->insert(E, MI);
    2785             :     }
    2786          61 :     I = CmpInstr;
    2787             :     E = MI;
    2788             :   }
    2789             : 
    2790             :   // Check that CPSR isn't set between the comparison instruction and the one we
    2791             :   // want to change. At the same time, search for SubAdd.
    2792        1060 :   const TargetRegisterInfo *TRI = &getRegisterInfo();
    2793             :   do {
    2794             :     const MachineInstr &Instr = *--I;
    2795             : 
    2796             :     // Check whether CmpInstr can be made redundant by the current instruction.
    2797        1878 :     if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr)) {
    2798             :       SubAdd = &*I;
    2799             :       break;
    2800             :     }
    2801             : 
    2802             :     // Allow E (which was initially MI) to be SubAdd but do not search before E.
    2803        1839 :     if (I == E)
    2804             :       break;
    2805             : 
    2806        1884 :     if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
    2807             :         Instr.readsRegister(ARM::CPSR, TRI))
    2808             :       // This instruction modifies or uses CPSR after the one we want to
    2809             :       // change. We can't do this transformation.
    2810             :       return false;
    2811             : 
    2812         879 :   } while (I != B);
    2813             : 
    2814             :   // Return false if no candidates exist.
    2815         984 :   if (!MI && !SubAdd)
    2816             :     return false;
    2817             : 
    2818             :   // The single candidate is called MI.
    2819         373 :   if (!MI) MI = SubAdd;
    2820             : 
    2821             :   // We can't use a predicated instruction - it doesn't always write the flags.
    2822         373 :   if (isPredicated(*MI))
    2823             :     return false;
    2824             : 
    2825             :   // Scan forward for the use of CPSR
    2826             :   // When checking against MI: if it's a conditional code that requires
    2827             :   // checking of the V bit or C bit, then this is not safe to do.
    2828             :   // It is safe to remove CmpInstr if CPSR is redefined or killed.
    2829             :   // If we are done with the basic block, we need to check whether CPSR is
    2830             :   // live-out.
    2831             :   SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
    2832             :       OperandsToUpdate;
    2833             :   bool isSafe = false;
    2834         369 :   I = CmpInstr;
    2835         369 :   E = CmpInstr.getParent()->end();
    2836        2401 :   while (!isSafe && ++I != E) {
    2837             :     const MachineInstr &Instr = *I;
    2838        3494 :     for (unsigned IO = 0, EO = Instr.getNumOperands();
    2839        3494 :          !isSafe && IO != EO; ++IO) {
    2840        2670 :       const MachineOperand &MO = Instr.getOperand(IO);
    2841        2674 :       if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
    2842             :         isSafe = true;
    2843             :         break;
    2844             :       }
    2845        4937 :       if (!MO.isReg() || MO.getReg() != ARM::CPSR)
    2846        2271 :         continue;
    2847         395 :       if (MO.isDef()) {
    2848             :         isSafe = true;
    2849             :         break;
    2850             :       }
    2851             :       // Condition code is after the operand before CPSR except for VSELs.
    2852             :       ARMCC::CondCodes CC;
    2853             :       bool IsInstrVSel = true;
    2854         768 :       switch (Instr.getOpcode()) {
    2855         380 :       default:
    2856             :         IsInstrVSel = false;
    2857         760 :         CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
    2858         380 :         break;
    2859             :       case ARM::VSELEQD:
    2860             :       case ARM::VSELEQS:
    2861             :         CC = ARMCC::EQ;
    2862             :         break;
    2863           0 :       case ARM::VSELGTD:
    2864             :       case ARM::VSELGTS:
    2865             :         CC = ARMCC::GT;
    2866           0 :         break;
    2867           0 :       case ARM::VSELGED:
    2868             :       case ARM::VSELGES:
    2869             :         CC = ARMCC::GE;
    2870           0 :         break;
    2871           0 :       case ARM::VSELVSS:
    2872             :       case ARM::VSELVSD:
    2873             :         CC = ARMCC::VS;
    2874           0 :         break;
    2875             :       }
    2876             : 
    2877         384 :       if (SubAdd) {
    2878             :         // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
    2879             :         // on CMP needs to be updated to be based on SUB.
    2880             :         // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
    2881             :         // needs to be modified.
    2882             :         // Push the condition code operands to OperandsToUpdate.
    2883             :         // If it is safe to remove CmpInstr, the condition code of these
    2884             :         // operands will be modified.
    2885          43 :         unsigned Opc = SubAdd->getOpcode();
    2886          43 :         bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
    2887          43 :                      Opc == ARM::SUBri || Opc == ARM::t2SUBri;
    2888          50 :         if (!IsSub || (SrcReg2 != 0 && SubAdd->getOperand(1).getReg() == SrcReg2 &&
    2889          13 :                        SubAdd->getOperand(2).getReg() == SrcReg)) {
    2890             :           // VSel doesn't support condition code update.
    2891          19 :           if (IsInstrVSel)
    2892             :             return false;
    2893             :           // Ensure we can swap the condition.
    2894          19 :           ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC));
    2895          19 :           if (NewCC == ARMCC::AL)
    2896             :             return false;
    2897          19 :           OperandsToUpdate.push_back(
    2898          57 :               std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
    2899             :         }
    2900             :       } else {
    2901             :         // No SubAdd, so this is x = <op> y, z; cmp x, 0.
    2902             :         switch (CC) {
    2903             :         case ARMCC::EQ: // Z
    2904             :         case ARMCC::NE: // Z
    2905             :         case ARMCC::MI: // N
    2906             :         case ARMCC::PL: // N
    2907             :         case ARMCC::AL: // none
    2908             :           // CPSR can be used multiple times, we should continue.
    2909             :           break;
    2910             :         case ARMCC::HS: // C
    2911             :         case ARMCC::LO: // C
    2912             :         case ARMCC::VS: // V
    2913             :         case ARMCC::VC: // V
    2914             :         case ARMCC::HI: // C Z
    2915             :         case ARMCC::LS: // C Z
    2916             :         case ARMCC::GE: // N V
    2917             :         case ARMCC::LT: // N V
    2918             :         case ARMCC::GT: // Z N V
    2919             :         case ARMCC::LE: // Z N V
    2920             :           // The instruction uses the V bit or C bit which is not safe.
    2921             :           return false;
    2922             :         }
    2923             :       }
    2924             :     }
    2925             :   }
    2926             : 
    2927             :   // If CPSR is not killed nor re-defined, we should check whether it is
    2928             :   // live-out. If it is live-out, do not optimize.
    2929         352 :   if (!isSafe) {
    2930         337 :     MachineBasicBlock *MBB = CmpInstr.getParent();
    2931             :     for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
    2932         868 :              SE = MBB->succ_end(); SI != SE; ++SI)
    2933         534 :       if ((*SI)->isLiveIn(ARM::CPSR))
    2934             :         return false;
    2935             :   }
    2936             : 
    2937             :   // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
    2938             :   // set CPSR so this is represented as an explicit output)
    2939         349 :   if (!IsThumb1) {
    2940         578 :     MI->getOperand(5).setReg(ARM::CPSR);
    2941         578 :     MI->getOperand(5).setIsDef(true);
    2942             :   }
    2943             :   assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
    2944         349 :   CmpInstr.eraseFromParent();
    2945             : 
    2946             :   // Modify the condition code of operands in OperandsToUpdate.
    2947             :   // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
    2948             :   // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
    2949         365 :   for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
    2950          32 :     OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
    2951             : 
    2952             :   return true;
    2953             : }
    2954             : 
    2955       39084 : bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const {
    2956             :   // Do not sink MI if it might be used to optimize a redundant compare.
    2957             :   // We heuristically only look at the instruction immediately following MI to
    2958             :   // avoid potentially searching the entire basic block.
    2959       39084 :   if (isPredicated(MI))
    2960             :     return true;
    2961             :   MachineBasicBlock::const_iterator Next = &MI;
    2962             :   ++Next;
    2963             :   unsigned SrcReg, SrcReg2;
    2964             :   int CmpMask, CmpValue;
    2965       68916 :   if (Next != MI.getParent()->end() &&
    2966       39098 :       analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
    2967        6552 :       isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI))
    2968             :     return false;
    2969             :   return true;
    2970             : }
    2971             : 
    2972        5511 : bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
    2973             :                                      unsigned Reg,
    2974             :                                      MachineRegisterInfo *MRI) const {
    2975             :   // Fold large immediates into add, sub, or, xor.
    2976        5511 :   unsigned DefOpc = DefMI.getOpcode();
    2977        5511 :   if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
    2978             :     return false;
    2979        2546 :   if (!DefMI.getOperand(1).isImm())
    2980             :     // Could be t2MOVi32imm @xx
    2981             :     return false;
    2982             : 
    2983         556 :   if (!MRI->hasOneNonDBGUse(Reg))
    2984             :     return false;
    2985             : 
    2986         340 :   const MCInstrDesc &DefMCID = DefMI.getDesc();
    2987         680 :   if (DefMCID.hasOptionalDef()) {
    2988           0 :     unsigned NumOps = DefMCID.getNumOperands();
    2989           0 :     const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
    2990           0 :     if (MO.getReg() == ARM::CPSR && !MO.isDead())
    2991             :       // If DefMI defines CPSR and it is not dead, it's obviously not safe
    2992             :       // to delete DefMI.
    2993             :       return false;
    2994             :   }
    2995             : 
    2996         340 :   const MCInstrDesc &UseMCID = UseMI.getDesc();
    2997         680 :   if (UseMCID.hasOptionalDef()) {
    2998         154 :     unsigned NumOps = UseMCID.getNumOperands();
    2999         308 :     if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
    3000             :       // If the instruction sets the flag, do not attempt this optimization
    3001             :       // since it may change the semantics of the code.
    3002             :       return false;
    3003             :   }
    3004             : 
    3005             :   unsigned UseOpc = UseMI.getOpcode();
    3006             :   unsigned NewUseOpc = 0;
    3007         335 :   uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();
    3008             :   uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
    3009             :   bool Commute = false;
    3010         335 :   switch (UseOpc) {
    3011             :   default: return false;
    3012          28 :   case ARM::SUBrr:
    3013             :   case ARM::ADDrr:
    3014             :   case ARM::ORRrr:
    3015             :   case ARM::EORrr:
    3016             :   case ARM::t2SUBrr:
    3017             :   case ARM::t2ADDrr:
    3018             :   case ARM::t2ORRrr:
    3019             :   case ARM::t2EORrr: {
    3020          28 :     Commute = UseMI.getOperand(2).getReg() != Reg;
    3021          28 :     switch (UseOpc) {
    3022             :     default: break;
    3023           5 :     case ARM::ADDrr:
    3024             :     case ARM::SUBrr:
    3025           5 :       if (UseOpc == ARM::SUBrr && Commute)
    3026             :         return false;
    3027             : 
    3028             :       // ADD/SUB are special because they're essentially the same operation, so
    3029             :       // we can handle a larger range of immediates.
    3030           5 :       if (ARM_AM::isSOImmTwoPartVal(ImmVal))
    3031           3 :         NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
    3032           2 :       else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
    3033             :         ImmVal = -ImmVal;
    3034           2 :         NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
    3035             :       } else
    3036             :         return false;
    3037             :       SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
    3038             :       SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
    3039           5 :       break;
    3040           2 :     case ARM::ORRrr:
    3041             :     case ARM::EORrr:
    3042           2 :       if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
    3043             :         return false;
    3044             :       SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
    3045             :       SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
    3046           1 :       switch (UseOpc) {
    3047             :       default: break;
    3048           1 :       case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
    3049           0 :       case ARM::EORrr: NewUseOpc = ARM::EORri; break;
    3050             :       }
    3051             :       break;
    3052          17 :     case ARM::t2ADDrr:
    3053             :     case ARM::t2SUBrr:
    3054          17 :       if (UseOpc == ARM::t2SUBrr && Commute)
    3055             :         return false;
    3056             : 
    3057             :       // ADD/SUB are special because they're essentially the same operation, so
    3058             :       // we can handle a larger range of immediates.
    3059          17 :       if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
    3060           9 :         NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2ADDri : ARM::t2SUBri;
    3061           8 :       else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
    3062             :         ImmVal = -ImmVal;
    3063           2 :         NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2SUBri : ARM::t2ADDri;
    3064             :       } else
    3065             :         return false;
    3066          11 :       SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
    3067             :       SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
    3068          11 :       break;
    3069           4 :     case ARM::t2ORRrr:
    3070             :     case ARM::t2EORrr:
    3071           4 :       if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
    3072             :         return false;
    3073           0 :       SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
    3074             :       SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
    3075           0 :       switch (UseOpc) {
    3076             :       default: break;
    3077           0 :       case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
    3078           0 :       case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
    3079             :       }
    3080             :       break;
    3081             :     }
    3082             :   }
    3083             :   }
    3084             : 
    3085          17 :   unsigned OpIdx = Commute ? 2 : 1;
    3086          17 :   unsigned Reg1 = UseMI.getOperand(OpIdx).getReg();
    3087             :   bool isKill = UseMI.getOperand(OpIdx).isKill();
    3088          17 :   unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
    3089          34 :   BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
    3090          17 :           NewReg)
    3091          17 :       .addReg(Reg1, getKillRegState(isKill))
    3092          17 :       .addImm(SOImmValV1)
    3093          17 :       .add(predOps(ARMCC::AL))
    3094          17 :       .add(condCodeOp());
    3095          17 :   UseMI.setDesc(get(NewUseOpc));
    3096          34 :   UseMI.getOperand(1).setReg(NewReg);
    3097          17 :   UseMI.getOperand(1).setIsKill();
    3098          34 :   UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
    3099          17 :   DefMI.eraseFromParent();
    3100          17 :   return true;
    3101             : }
    3102             : 
    3103           0 : static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
    3104             :                                         const MachineInstr &MI) {
    3105           0 :   switch (MI.getOpcode()) {
    3106           0 :   default: {
    3107             :     const MCInstrDesc &Desc = MI.getDesc();
    3108           0 :     int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
    3109             :     assert(UOps >= 0 && "bad # UOps");
    3110           0 :     return UOps;
    3111             :   }
    3112             : 
    3113           0 :   case ARM::LDRrs:
    3114             :   case ARM::LDRBrs:
    3115             :   case ARM::STRrs:
    3116             :   case ARM::STRBrs: {
    3117           0 :     unsigned ShOpVal = MI.getOperand(3).getImm();
    3118             :     bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
    3119             :     unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
    3120           0 :     if (!isSub &&
    3121           0 :         (ShImm == 0 ||
    3122           0 :          ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
    3123             :           ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
    3124             :       return 1;
    3125             :     return 2;
    3126             :   }
    3127             : 
    3128           0 :   case ARM::LDRH:
    3129             :   case ARM::STRH: {
    3130           0 :     if (!MI.getOperand(2).getReg())
    3131             :       return 1;
    3132             : 
    3133           0 :     unsigned ShOpVal = MI.getOperand(3).getImm();
    3134             :     bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
    3135             :     unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
    3136           0 :     if (!isSub &&
    3137           0 :         (ShImm == 0 ||
    3138           0 :          ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
    3139             :           ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
    3140             :       return 1;
    3141             :     return 2;
    3142             :   }
    3143             : 
    3144           0 :   case ARM::LDRSB:
    3145             :   case ARM::LDRSH:
    3146           0 :     return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;
    3147             : 
    3148           0 :   case ARM::LDRSB_POST:
    3149             :   case ARM::LDRSH_POST: {
    3150           0 :     unsigned Rt = MI.getOperand(0).getReg();
    3151           0 :     unsigned Rm = MI.getOperand(3).getReg();
    3152           0 :     return (Rt == Rm) ? 4 : 3;
    3153             :   }
    3154             : 
    3155           0 :   case ARM::LDR_PRE_REG:
    3156             :   case ARM::LDRB_PRE_REG: {
    3157           0 :     unsigned Rt = MI.getOperand(0).getReg();
    3158           0 :     unsigned Rm = MI.getOperand(3).getReg();
    3159           0 :     if (Rt == Rm)
    3160             :       return 3;
    3161           0 :     unsigned ShOpVal = MI.getOperand(4).getImm();
    3162             :     bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
    3163             :     unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
    3164           0 :     if (!isSub &&
    3165           0 :         (ShImm == 0 ||
    3166           0 :          ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
    3167             :           ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
    3168             :       return 2;
    3169             :     return 3;
    3170             :   }
    3171             : 
    3172           0 :   case ARM::STR_PRE_REG:
    3173             :   case ARM::STRB_PRE_REG: {
    3174           0 :     unsigned ShOpVal = MI.getOperand(4).getImm();
    3175             :     bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
    3176             :     unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
    3177           0 :     if (!isSub &&
    3178           0 :         (ShImm == 0 ||
    3179           0 :          ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
    3180             :           ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
    3181             :       return 2;
    3182             :     return 3;
    3183             :   }
    3184             : 
    3185           0 :   case ARM::LDRH_PRE:
    3186             :   case ARM::STRH_PRE: {
    3187           0 :     unsigned Rt = MI.getOperand(0).getReg();
    3188           0 :     unsigned Rm = MI.getOperand(3).getReg();
    3189           0 :     if (!Rm)
    3190             :       return 2;
    3191           0 :     if (Rt == Rm)
    3192             :       return 3;
    3193           0 :     return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;
    3194             :   }
    3195             : 
    3196           0 :   case ARM::LDR_POST_REG:
    3197             :   case ARM::LDRB_POST_REG:
    3198             :   case ARM::LDRH_POST: {
    3199           0 :     unsigned Rt = MI.getOperand(0).getReg();
    3200           0 :     unsigned Rm = MI.getOperand(3).getReg();
    3201           0 :     return (Rt == Rm) ? 3 : 2;
    3202             :   }
    3203             : 
    3204             :   case ARM::LDR_PRE_IMM:
    3205             :   case ARM::LDRB_PRE_IMM:
    3206             :   case ARM::LDR_POST_IMM:
    3207             :   case ARM::LDRB_POST_IMM:
    3208             :   case ARM::STRB_POST_IMM:
    3209             :   case ARM::STRB_POST_REG:
    3210             :   case ARM::STRB_PRE_IMM:
    3211             :   case ARM::STRH_POST:
    3212             :   case ARM::STR_POST_IMM:
    3213             :   case ARM::STR_POST_REG:
    3214             :   case ARM::STR_PRE_IMM:
    3215             :     return 2;
    3216             : 
    3217           0 :   case ARM::LDRSB_PRE:
    3218             :   case ARM::LDRSH_PRE: {
    3219           0 :     unsigned Rm = MI.getOperand(3).getReg();
    3220           0 :     if (Rm == 0)
    3221             :       return 3;
    3222           0 :     unsigned Rt = MI.getOperand(0).getReg();
    3223           0 :     if (Rt == Rm)
    3224             :       return 4;
    3225           0 :     unsigned ShOpVal = MI.getOperand(4).getImm();
    3226             :     bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
    3227             :     unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
    3228           0 :     if (!isSub &&
    3229           0 :         (ShImm == 0 ||
    3230           0 :          ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
    3231             :           ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
    3232             :       return 3;
    3233             :     return 4;
    3234             :   }
    3235             : 
    3236           0 :   case ARM::LDRD: {
    3237           0 :     unsigned Rt = MI.getOperand(0).getReg();
    3238           0 :     unsigned Rn = MI.getOperand(2).getReg();
    3239           0 :     unsigned Rm = MI.getOperand(3).getReg();
    3240           0 :     if (Rm)
    3241           0 :       return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
    3242             :                                                                           : 3;
    3243           0 :     return (Rt == Rn) ? 3 : 2;
    3244             :   }
    3245             : 
    3246           0 :   case ARM::STRD: {
    3247           0 :     unsigned Rm = MI.getOperand(3).getReg();
    3248           0 :     if (Rm)
    3249           0 :       return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
    3250             :                                                                           : 3;
    3251             :     return 2;
    3252             :   }
    3253             : 
    3254           0 :   case ARM::LDRD_POST:
    3255             :   case ARM::t2LDRD_POST:
    3256           0 :     return 3;
    3257             : 
    3258           0 :   case ARM::STRD_POST:
    3259             :   case ARM::t2STRD_POST:
    3260           0 :     return 4;
    3261             : 
    3262           0 :   case ARM::LDRD_PRE: {
    3263           0 :     unsigned Rt = MI.getOperand(0).getReg();
    3264           0 :     unsigned Rn = MI.getOperand(3).getReg();
    3265           0 :     unsigned Rm = MI.getOperand(4).getReg();
    3266           0 :     if (Rm)
    3267           0 :       return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
    3268             :                                                                           : 4;
    3269           0 :     return (Rt == Rn) ? 4 : 3;
    3270             :   }
    3271             : 
    3272           0 :   case ARM::t2LDRD_PRE: {
    3273           0 :     unsigned Rt = MI.getOperand(0).getReg();
    3274           0 :     unsigned Rn = MI.getOperand(3).getReg();
    3275           0 :     return (Rt == Rn) ? 4 : 3;
    3276             :   }
    3277             : 
    3278           0 :   case ARM::STRD_PRE: {
    3279           0 :     unsigned Rm = MI.getOperand(4).getReg();
    3280           0 :     if (Rm)
    3281           0 :       return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
    3282             :                                                                           : 4;
    3283             :     return 3;
    3284             :   }
    3285             : 
    3286           0 :   case ARM::t2STRD_PRE:
    3287           0 :     return 3;
    3288             : 
    3289             :   case ARM::t2LDR_POST:
    3290             :   case ARM::t2LDRB_POST:
    3291             :   case ARM::t2LDRB_PRE:
    3292             :   case ARM::t2LDRSBi12:
    3293             :   case ARM::t2LDRSBi8:
    3294             :   case ARM::t2LDRSBpci:
    3295             :   case ARM::t2LDRSBs:
    3296             :   case ARM::t2LDRH_POST:
    3297             :   case ARM::t2LDRH_PRE:
    3298             :   case ARM::t2LDRSBT:
    3299             :   case ARM::t2LDRSB_POST:
    3300             :   case ARM::t2LDRSB_PRE:
    3301             :   case ARM::t2LDRSH_POST:
    3302             :   case ARM::t2LDRSH_PRE:
    3303             :   case ARM::t2LDRSHi12:
    3304             :   case ARM::t2LDRSHi8:
    3305             :   case ARM::t2LDRSHpci:
    3306             :   case ARM::t2LDRSHs:
    3307             :     return 2;
    3308             : 
    3309           0 :   case ARM::t2LDRDi8: {
    3310           0 :     unsigned Rt = MI.getOperand(0).getReg();
    3311           0 :     unsigned Rn = MI.getOperand(2).getReg();
    3312           0 :     return (Rt == Rn) ? 3 : 2;
    3313             :   }
    3314             : 
    3315             :   case ARM::t2STRB_POST:
    3316             :   case ARM::t2STRB_PRE:
    3317             :   case ARM::t2STRBs:
    3318             :   case ARM::t2STRDi8:
    3319             :   case ARM::t2STRH_POST:
    3320             :   case ARM::t2STRH_PRE:
    3321             :   case ARM::t2STRHs:
    3322             :   case ARM::t2STR_POST:
    3323             :   case ARM::t2STR_PRE:
    3324             :   case ARM::t2STRs:
    3325             :     return 2;
    3326             :   }
    3327             : }
    3328             : 
    3329             : // Return the number of 32-bit words loaded by LDM or stored by STM. If this
    3330             : // can't be easily determined return 0 (missing MachineMemOperand).
    3331             : //
    3332             : // FIXME: The current MachineInstr design does not support relying on machine
    3333             : // mem operands to determine the width of a memory access. Instead, we expect
    3334             : // the target to provide this information based on the instruction opcode and
    3335             : // operands. However, using MachineMemOperand is the best solution now for
    3336             : // two reasons:
    3337             : //
    3338             : // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
    3339             : // operands. This is much more dangerous than using the MachineMemOperand
    3340             : // sizes because CodeGen passes can insert/remove optional machine operands. In
    3341             : // fact, it's totally incorrect for preRA passes and appears to be wrong for
    3342             : // postRA passes as well.
    3343             : //
    3344             : // 2) getNumLDMAddresses is only used by the scheduling machine model and any
    3345             : // machine model that calls this should handle the unknown (zero size) case.
    3346             : //
    3347             : // Long term, we should require a target hook that verifies MachineMemOperand
    3348             : // sizes during MC lowering. That target hook should be local to MC lowering
    3349             : // because we can't ensure that it is aware of other MI forms. Doing this will
    3350             : // ensure that MachineMemOperands are correctly propagated through all passes.
    3351        1351 : unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr &MI) const {
    3352             :   unsigned Size = 0;
    3353        1382 :   for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
    3354        1351 :                                   E = MI.memoperands_end();
    3355        1382 :        I != E; ++I) {
    3356          31 :     Size += (*I)->getSize();
    3357             :   }
    3358        1351 :   return Size / 4;
    3359             : }
    3360             : 
    3361           0 : static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,
    3362             :                                                     unsigned NumRegs) {
    3363           0 :   unsigned UOps = 1 + NumRegs; // 1 for address computation.
    3364           0 :   switch (Opc) {
    3365             :   default:
    3366             :     break;
    3367           0 :   case ARM::VLDMDIA_UPD:
    3368             :   case ARM::VLDMDDB_UPD:
    3369             :   case ARM::VLDMSIA_UPD:
    3370             :   case ARM::VLDMSDB_UPD:
    3371             :   case ARM::VSTMDIA_UPD:
    3372             :   case ARM::VSTMDDB_UPD:
    3373             :   case ARM::VSTMSIA_UPD:
    3374             :   case ARM::VSTMSDB_UPD:
    3375             :   case ARM::LDMIA_UPD:
    3376             :   case ARM::LDMDA_UPD:
    3377             :   case ARM::LDMDB_UPD:
    3378             :   case ARM::LDMIB_UPD:
    3379             :   case ARM::STMIA_UPD:
    3380             :   case ARM::STMDA_UPD:
    3381             :   case ARM::STMDB_UPD:
    3382             :   case ARM::STMIB_UPD:
    3383             :   case ARM::tLDMIA_UPD:
    3384             :   case ARM::tSTMIA_UPD:
    3385             :   case ARM::t2LDMIA_UPD:
    3386             :   case ARM::t2LDMDB_UPD:
    3387             :   case ARM::t2STMIA_UPD:
    3388             :   case ARM::t2STMDB_UPD:
    3389           0 :     ++UOps; // One for base register writeback.
    3390           0 :     break;
    3391           0 :   case ARM::LDMIA_RET:
    3392             :   case ARM::tPOP_RET:
    3393             :   case ARM::t2LDMIA_RET:
    3394           0 :     UOps += 2; // One for base reg wb, one for write to pc.
    3395           0 :     break;
    3396             :   }
    3397           0 :   return UOps;
    3398             : }
    3399             : 
    3400        4351 : unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
    3401             :                                           const MachineInstr &MI) const {
    3402        4351 :   if (!ItinData || ItinData->isEmpty())
    3403             :     return 1;
    3404             : 
    3405        4351 :   const MCInstrDesc &Desc = MI.getDesc();
    3406        4351 :   unsigned Class = Desc.getSchedClass();
    3407             :   int ItinUOps = ItinData->getNumMicroOps(Class);
    3408        4351 :   if (ItinUOps >= 0) {
    3409           0 :     if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
    3410           0 :       return getNumMicroOpsSwiftLdSt(ItinData, MI);
    3411             : 
    3412           0 :     return ItinUOps;
    3413             :   }
    3414             : 
    3415             :   unsigned Opc = MI.getOpcode();
    3416        4351 :   switch (Opc) {
    3417           0 :   default:
    3418           0 :     llvm_unreachable("Unexpected multi-uops instruction!");
    3419             :   case ARM::VLDMQIA:
    3420             :   case ARM::VSTMQIA:
    3421             :     return 2;
    3422             : 
    3423             :   // The number of uOps for load / store multiple are determined by the number
    3424             :   // registers.
    3425             :   //
    3426             :   // On Cortex-A8, each pair of register loads / stores can be scheduled on the
    3427             :   // same cycle. The scheduling for the first load / store must be done
    3428             :   // separately by assuming the address is not 64-bit aligned.
    3429             :   //
    3430             :   // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
    3431             :   // is not 64-bit aligned, then AGU would take an extra cycle.  For VFP / NEON
    3432             :   // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
    3433         411 :   case ARM::VLDMDIA:
    3434             :   case ARM::VLDMDIA_UPD:
    3435             :   case ARM::VLDMDDB_UPD:
    3436             :   case ARM::VLDMSIA:
    3437             :   case ARM::VLDMSIA_UPD:
    3438             :   case ARM::VLDMSDB_UPD:
    3439             :   case ARM::VSTMDIA:
    3440             :   case ARM::VSTMDIA_UPD:
    3441             :   case ARM::VSTMDDB_UPD:
    3442             :   case ARM::VSTMSIA:
    3443             :   case ARM::VSTMSIA_UPD:
    3444             :   case ARM::VSTMSDB_UPD: {
    3445         822 :     unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
    3446         411 :     return (NumRegs / 2) + (NumRegs % 2) + 1;
    3447             :   }
    3448             : 
    3449        3940 :   case ARM::LDMIA_RET:
    3450             :   case ARM::LDMIA:
    3451             :   case ARM::LDMDA:
    3452             :   case ARM::LDMDB:
    3453             :   case ARM::LDMIB:
    3454             :   case ARM::LDMIA_UPD:
    3455             :   case ARM::LDMDA_UPD:
    3456             :   case ARM::LDMDB_UPD:
    3457             :   case ARM::LDMIB_UPD:
    3458             :   case ARM::STMIA:
    3459             :   case ARM::STMDA:
    3460             :   case ARM::STMDB:
    3461             :   case ARM::STMIB:
    3462             :   case ARM::STMIA_UPD:
    3463             :   case ARM::STMDA_UPD:
    3464             :   case ARM::STMDB_UPD:
    3465             :   case ARM::STMIB_UPD:
    3466             :   case ARM::tLDMIA:
    3467             :   case ARM::tLDMIA_UPD:
    3468             :   case ARM::tSTMIA_UPD:
    3469             :   case ARM::tPOP_RET:
    3470             :   case ARM::tPOP:
    3471             :   case ARM::tPUSH:
    3472             :   case ARM::t2LDMIA_RET:
    3473             :   case ARM::t2LDMIA:
    3474             :   case ARM::t2LDMDB:
    3475             :   case ARM::t2LDMIA_UPD:
    3476             :   case ARM::t2LDMDB_UPD:
    3477             :   case ARM::t2STMIA:
    3478             :   case ARM::t2STMDB:
    3479             :   case ARM::t2STMIA_UPD:
    3480             :   case ARM::t2STMDB_UPD: {
    3481        7880 :     unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
    3482        3940 :     switch (Subtarget.getLdStMultipleTiming()) {
    3483           0 :     case ARMSubtarget::SingleIssuePlusExtras:
    3484           0 :       return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs);
    3485             :     case ARMSubtarget::SingleIssue:
    3486             :       // Assume the worst.
    3487             :       return NumRegs;
    3488         259 :     case ARMSubtarget::DoubleIssue: {
    3489         259 :       if (NumRegs < 4)
    3490             :         return 2;
    3491             :       // 4 registers would be issued: 2, 2.
    3492             :       // 5 registers would be issued: 2, 2, 1.
    3493          89 :       unsigned UOps = (NumRegs / 2);
    3494          89 :       if (NumRegs % 2)
    3495          37 :         ++UOps;
    3496             :       return UOps;
    3497             :     }
    3498          64 :     case ARMSubtarget::DoubleIssueCheckUnalignedAccess: {
    3499          64 :       unsigned UOps = (NumRegs / 2);
    3500             :       // If there are odd number of registers or if it's not 64-bit aligned,
    3501             :       // then it takes an extra AGU (Address Generation Unit) cycle.
    3502          64 :       if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
    3503           0 :           (*MI.memoperands_begin())->getAlignment() < 8)
    3504          64 :         ++UOps;
    3505             :       return UOps;
    3506             :       }
    3507           0 :     }
    3508             :   }
    3509             :   }
    3510           0 :   llvm_unreachable("Didn't find the number of microops");
    3511             : }
    3512             : 
    3513             : int
    3514         202 : ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
    3515             :                                   const MCInstrDesc &DefMCID,
    3516             :                                   unsigned DefClass,
    3517             :                                   unsigned DefIdx, unsigned DefAlign) const {
    3518         404 :   int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
    3519         202 :   if (RegNo <= 0)
    3520             :     // Def is the address writeback.
    3521             :     return ItinData->getOperandCycle(DefClass, DefIdx);
    3522             : 
    3523             :   int DefCycle;
    3524         202 :   if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
    3525             :     // (regno / 2) + (regno % 2) + 1
    3526          14 :     DefCycle = RegNo / 2 + 1;
    3527          14 :     if (RegNo % 2)
    3528           6 :       ++DefCycle;
    3529         186 :   } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
    3530             :     DefCycle = RegNo;
    3531             :     bool isSLoad = false;
    3532             : 
    3533           4 :     switch (DefMCID.getOpcode()) {
    3534             :     default: break;
    3535             :     case ARM::VLDMSIA:
    3536             :     case ARM::VLDMSIA_UPD:
    3537             :     case ARM::VLDMSDB_UPD:
    3538             :       isSLoad = true;
    3539             :       break;
    3540             :     }
    3541             : 
    3542             :     // If there are odd number of 'S' registers or if it's not 64-bit aligned,
    3543             :     // then it takes an extra cycle.
    3544           0 :     if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
    3545           0 :       ++DefCycle;
    3546             :   } else {
    3547             :     // Assume the worst.
    3548         186 :     DefCycle = RegNo + 2;
    3549             :   }
    3550             : 
    3551             :   return DefCycle;
    3552             : }
    3553             : 
    3554           0 : bool ARMBaseInstrInfo::isLDMBaseRegInList(const MachineInstr &MI) const {
    3555           0 :   unsigned BaseReg = MI.getOperand(0).getReg();
    3556           0 :   for (unsigned i = 1, sz = MI.getNumOperands(); i < sz; ++i) {
    3557             :     const auto &Op = MI.getOperand(i);
    3558           0 :     if (Op.isReg() && Op.getReg() == BaseReg)
    3559             :       return true;
    3560             :   }
    3561             :   return false;
    3562             : }
    3563             : unsigned
    3564           7 : ARMBaseInstrInfo::getLDMVariableDefsSize(const MachineInstr &MI) const {
    3565             :   // ins GPR:$Rn, $p (2xOp), reglist:$regs, variable_ops
    3566             :   // (outs GPR:$wb), (ins GPR:$Rn, $p (2xOp), reglist:$regs, variable_ops)
    3567          14 :   return MI.getNumOperands() + 1 - MI.getDesc().getNumOperands();
    3568             : }
    3569             : 
    3570             : int
    3571         394 : ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
    3572             :                                  const MCInstrDesc &DefMCID,
    3573             :                                  unsigned DefClass,
    3574             :                                  unsigned DefIdx, unsigned DefAlign) const {
    3575         788 :   int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
    3576         394 :   if (RegNo <= 0)
    3577             :     // Def is the address writeback.
    3578             :     return ItinData->getOperandCycle(DefClass, DefIdx);
    3579             : 
    3580             :   int DefCycle;
    3581         394 :   if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
    3582             :     // 4 registers would be issued: 1, 2, 1.
    3583             :     // 5 registers would be issued: 1, 2, 2.
    3584          13 :     DefCycle = RegNo / 2;
    3585          13 :     if (DefCycle < 1)
    3586             :       DefCycle = 1;
    3587             :     // Result latency is issue cycle + 2: E2.
    3588          13 :     DefCycle += 2;
    3589         375 :   } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
    3590           6 :     DefCycle = (RegNo / 2);
    3591             :     // If there are odd number of registers or if it's not 64-bit aligned,
    3592             :     // then it takes an extra AGU (Address Generation Unit) cycle.
    3593           6 :     if ((RegNo % 2) || DefAlign < 8)
    3594           6 :       ++DefCycle;
    3595             :     // Result latency is AGU cycles + 2.
    3596           6 :     DefCycle += 2;
    3597             :   } else {
    3598             :     // Assume the worst.
    3599         375 :     DefCycle = RegNo + 2;
    3600             :   }
    3601             : 
    3602             :   return DefCycle;
    3603             : }
    3604             : 
    3605             : int
    3606          12 : ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
    3607             :                                   const MCInstrDesc &UseMCID,
    3608             :                                   unsigned UseClass,
    3609             :                                   unsigned UseIdx, unsigned UseAlign) const {
    3610          24 :   int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
    3611          12 :   if (RegNo <= 0)
    3612             :     return ItinData->getOperandCycle(UseClass, UseIdx);
    3613             : 
    3614             :   int UseCycle;
    3615          12 :   if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
    3616             :     // (regno / 2) + (regno % 2) + 1
    3617           6 :     UseCycle = RegNo / 2 + 1;
    3618           6 :     if (RegNo % 2)
    3619           1 :       ++UseCycle;
    3620           3 :   } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
    3621             :     UseCycle = RegNo;
    3622             :     bool isSStore = false;
    3623             : 
    3624           6 :     switch (UseMCID.getOpcode()) {
    3625             :     default: break;
    3626             :     case ARM::VSTMSIA:
    3627             :     case ARM::VSTMSIA_UPD:
    3628             :     case ARM::VSTMSDB_UPD:
    3629             :       isSStore = true;
    3630             :       break;
    3631             :     }
    3632             : 
    3633             :     // If there are odd number of 'S' registers or if it's not 64-bit aligned,
    3634             :     // then it takes an extra cycle.
    3635           0 :     if ((isSStore && (RegNo % 2)) || UseAlign < 8)
    3636           0 :       ++UseCycle;
    3637             :   } else {
    3638             :     // Assume the worst.
    3639           3 :     UseCycle = RegNo + 2;
    3640             :   }
    3641             : 
    3642             :   return UseCycle;
    3643             : }
    3644             : 
    3645             : int
    3646         284 : ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
    3647             :                                  const MCInstrDesc &UseMCID,
    3648             :                                  unsigned UseClass,
    3649             :                                  unsigned UseIdx, unsigned UseAlign) const {
    3650         568 :   int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
    3651         284 :   if (RegNo <= 0)
    3652             :     return ItinData->getOperandCycle(UseClass, UseIdx);
    3653             : 
    3654             :   int UseCycle;
    3655         284 :   if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
    3656          15 :     UseCycle = RegNo / 2;
    3657          15 :     if (UseCycle < 2)
    3658             :       UseCycle = 2;
    3659             :     // Read in E3.
    3660          15 :     UseCycle += 2;
    3661         269 :   } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
    3662           0 :     UseCycle = (RegNo / 2);
    3663             :     // If there are odd number of registers or if it's not 64-bit aligned,
    3664             :     // then it takes an extra AGU (Address Generation Unit) cycle.
    3665           0 :     if ((RegNo % 2) || UseAlign < 8)
    3666           0 :       ++UseCycle;
    3667             :   } else {
    3668             :     // Assume the worst.
    3669             :     UseCycle = 1;
    3670             :   }
    3671             :   return UseCycle;
    3672             : }
    3673             : 
    3674             : int
    3675       66582 : ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
    3676             :                                     const MCInstrDesc &DefMCID,
    3677             :                                     unsigned DefIdx, unsigned DefAlign,
    3678             :                                     const MCInstrDesc &UseMCID,
    3679             :                                     unsigned UseIdx, unsigned UseAlign) const {
    3680       66582 :   unsigned DefClass = DefMCID.getSchedClass();
    3681       66582 :   unsigned UseClass = UseMCID.getSchedClass();
    3682             : 
    3683      198951 :   if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
    3684       64277 :     return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
    3685             : 
    3686             :   // This may be a def / use of a variable_ops instruction, the operand
    3687             :   // latency might be determinable dynamically. Let the target try to
    3688             :   // figure it out.
    3689             :   int DefCycle = -1;
    3690             :   bool LdmBypass = false;
    3691        4610 :   switch (DefMCID.getOpcode()) {
    3692             :   default:
    3693             :     DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
    3694             :     break;
    3695             : 
    3696         202 :   case ARM::VLDMDIA:
    3697             :   case ARM::VLDMDIA_UPD:
    3698             :   case ARM::VLDMDDB_UPD:
    3699             :   case ARM::VLDMSIA:
    3700             :   case ARM::VLDMSIA_UPD:
    3701             :   case ARM::VLDMSDB_UPD:
    3702         202 :     DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
    3703         202 :     break;
    3704             : 
    3705         394 :   case ARM::LDMIA_RET:
    3706             :   case ARM::LDMIA:
    3707             :   case ARM::LDMDA:
    3708             :   case ARM::LDMDB:
    3709             :   case ARM::LDMIB:
    3710             :   case ARM::LDMIA_UPD:
    3711             :   case ARM::LDMDA_UPD:
    3712             :   case ARM::LDMDB_UPD:
    3713             :   case ARM::LDMIB_UPD:
    3714             :   case ARM::tLDMIA:
    3715             :   case ARM::tLDMIA_UPD:
    3716             :   case ARM::tPUSH:
    3717             :   case ARM::t2LDMIA_RET:
    3718             :   case ARM::t2LDMIA:
    3719             :   case ARM::t2LDMDB:
    3720             :   case ARM::t2LDMIA_UPD:
    3721             :   case ARM::t2LDMDB_UPD:
    3722             :     LdmBypass = true;
    3723         394 :     DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
    3724         394 :     break;
    3725             :   }
    3726             : 
    3727        2305 :   if (DefCycle == -1)
    3728             :     // We can't seem to determine the result latency of the def, assume it's 2.
    3729             :     DefCycle = 2;
    3730             : 
    3731             :   int UseCycle = -1;
    3732        4610 :   switch (UseMCID.getOpcode()) {
    3733             :   default:
    3734             :     UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
    3735         469 :     break;
    3736             : 
    3737          12 :   case ARM::VSTMDIA:
    3738             :   case ARM::VSTMDIA_UPD:
    3739             :   case ARM::VSTMDDB_UPD:
    3740             :   case ARM::VSTMSIA:
    3741             :   case ARM::VSTMSIA_UPD:
    3742             :   case ARM::VSTMSDB_UPD:
    3743          12 :     UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
    3744          12 :     break;
    3745             : 
    3746         284 :   case ARM::STMIA:
    3747             :   case ARM::STMDA:
    3748             :   case ARM::STMDB:
    3749             :   case ARM::STMIB:
    3750             :   case ARM::STMIA_UPD:
    3751             :   case ARM::STMDA_UPD:
    3752             :   case ARM::STMDB_UPD:
    3753             :   case ARM::STMIB_UPD:
    3754             :   case ARM::tSTMIA_UPD:
    3755             :   case ARM::tPOP_RET:
    3756             :   case ARM::tPOP:
    3757             :   case ARM::t2STMIA:
    3758             :   case ARM::t2STMDB:
    3759             :   case ARM::t2STMIA_UPD:
    3760             :   case ARM::t2STMDB_UPD:
    3761         284 :     UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
    3762         284 :     break;
    3763             :   }
    3764             : 
    3765         765 :   if (UseCycle == -1)
    3766             :     // Assume it's read in the first stage.
    3767             :     UseCycle = 1;
    3768             : 
    3769        2305 :   UseCycle = DefCycle - UseCycle + 1;
    3770        2305 :   if (UseCycle > 0) {
    3771        2224 :     if (LdmBypass) {
    3772             :       // It's a variable_ops instruction so we can't use DefIdx here. Just use
    3773             :       // first def operand.
    3774         788 :       if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
    3775             :                                           UseClass, UseIdx))
    3776             :         --UseCycle;
    3777        1830 :     } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
    3778             :                                                UseClass, UseIdx)) {
    3779             :       --UseCycle;
    3780             :     }
    3781             :   }
    3782             : 
    3783             :   return UseCycle;
    3784             : }
    3785             : 
    3786         344 : static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
    3787             :                                            const MachineInstr *MI, unsigned Reg,
    3788             :                                            unsigned &DefIdx, unsigned &Dist) {
    3789         344 :   Dist = 0;
    3790             : 
    3791             :   MachineBasicBlock::const_iterator I = MI; ++I;
    3792             :   MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator());
    3793             :   assert(II->isInsideBundle() && "Empty bundle?");
    3794             : 
    3795             :   int Idx = -1;
    3796         370 :   while (II->isInsideBundle()) {
    3797         357 :     Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
    3798         357 :     if (Idx != -1)
    3799             :       break;
    3800             :     --II;
    3801          13 :     ++Dist;
    3802             :   }
    3803             : 
    3804             :   assert(Idx != -1 && "Cannot find bundled definition!");
    3805         344 :   DefIdx = Idx;
    3806         344 :   return &*II;
    3807             : }
    3808             : 
    3809        1003 : static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
    3810             :                                            const MachineInstr &MI, unsigned Reg,
    3811             :                                            unsigned &UseIdx, unsigned &Dist) {
    3812        1003 :   Dist = 0;
    3813             : 
    3814        1003 :   MachineBasicBlock::const_instr_iterator II = ++MI.getIterator();
    3815             :   assert(II->isInsideBundle() && "Empty bundle?");
    3816        1003 :   MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
    3817             : 
    3818             :   // FIXME: This doesn't properly handle multiple uses.
    3819             :   int Idx = -1;
    3820        4052 :   while (II != E && II->isInsideBundle()) {
    3821        2025 :     Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
    3822        2025 :     if (Idx != -1)
    3823             :       break;
    3824        2046 :     if (II->getOpcode() != ARM::t2IT)
    3825          20 :       ++Dist;
    3826             :     ++II;
    3827             :   }
    3828             : 
    3829        1003 :   if (Idx == -1) {
    3830           1 :     Dist = 0;
    3831           1 :     return nullptr;
    3832             :   }
    3833             : 
    3834        1002 :   UseIdx = Idx;
    3835        1002 :   return &*II;
    3836             : }
    3837             : 
    3838             : /// Return the number of cycles to add to (or subtract from) the static
    3839             : /// itinerary based on the def opcode and alignment. The caller will ensure that
    3840             : /// adjusted latency is at least one cycle.
    3841      152109 : static int adjustDefLatency(const ARMSubtarget &Subtarget,
    3842             :                             const MachineInstr &DefMI,
    3843             :                             const MCInstrDesc &DefMCID, unsigned DefAlign) {
    3844             :   int Adjust = 0;
    3845      283551 :   if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
    3846             :     // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
    3847             :     // variants are one cycle cheaper.
    3848       43468 :     switch (DefMCID.getOpcode()) {
    3849             :     default: break;
    3850          58 :     case ARM::LDRrs:
    3851             :     case ARM::LDRBrs: {
    3852          58 :       unsigned ShOpVal = DefMI.getOperand(3).getImm();
    3853             :       unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
    3854          58 :       if (ShImm == 0 ||
    3855          27 :           (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
    3856             :         --Adjust;
    3857             :       break;
    3858             :     }
    3859          67 :     case ARM::t2LDRs:
    3860             :     case ARM::t2LDRBs:
    3861             :     case ARM::t2LDRHs:
    3862             :     case ARM::t2LDRSHs: {
    3863             :       // Thumb2 mode: lsl only.
    3864          67 :       unsigned ShAmt = DefMI.getOperand(3).getImm();
    3865          67 :       if (ShAmt == 0 || ShAmt == 2)
    3866             :         --Adjust;
    3867             :       break;
    3868             :     }
    3869             :     }
    3870      130375 :   } else if (Subtarget.isSwift()) {
    3871             :     // FIXME: Properly handle all of the latency adjustments for address
    3872             :     // writeback.
    3873           6 :     switch (DefMCID.getOpcode()) {
    3874             :     default: break;
    3875           0 :     case ARM::LDRrs:
    3876             :     case ARM::LDRBrs: {
    3877           0 :       unsigned ShOpVal = DefMI.getOperand(3).getImm();
    3878             :       bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
    3879             :       unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
    3880           0 :       if (!isSub &&
    3881           0 :           (ShImm == 0 ||
    3882           0 :            ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
    3883             :             ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
    3884             :         Adjust -= 2;
    3885           0 :       else if (!isSub &&
    3886           0 :                ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
    3887             :         --Adjust;
    3888             :       break;
    3889             :     }
    3890           0 :     case ARM::t2LDRs:
    3891             :     case ARM::t2LDRBs:
    3892             :     case ARM::t2LDRHs:
    3893             :     case ARM::t2LDRSHs: {
    3894             :       // Thumb2 mode: lsl only.
    3895           0 :       unsigned ShAmt = DefMI.getOperand(3).getImm();
    3896           0 :       if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
    3897             :         Adjust -= 2;
    3898             :       break;
    3899             :     }
    3900             :     }
    3901             :   }
    3902             : 
    3903      152109 :   if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
    3904        8226 :     switch (DefMCID.getOpcode()) {
    3905             :     default: break;
    3906         198 :     case ARM::VLD1q8:
    3907             :     case ARM::VLD1q16:
    3908             :     case ARM::VLD1q32:
    3909             :     case ARM::VLD1q64:
    3910             :     case ARM::VLD1q8wb_fixed:
    3911             :     case ARM::VLD1q16wb_fixed:
    3912             :     case ARM::VLD1q32wb_fixed:
    3913             :     case ARM::VLD1q64wb_fixed:
    3914             :     case ARM::VLD1q8wb_register:
    3915             :     case ARM::VLD1q16wb_register:
    3916             :     case ARM::VLD1q32wb_register:
    3917             :     case ARM::VLD1q64wb_register:
    3918             :     case ARM::VLD2d8:
    3919             :     case ARM::VLD2d16:
    3920             :     case ARM::VLD2d32:
    3921             :     case ARM::VLD2q8:
    3922             :     case ARM::VLD2q16:
    3923             :     case ARM::VLD2q32:
    3924             :     case ARM::VLD2d8wb_fixed:
    3925             :     case ARM::VLD2d16wb_fixed:
    3926             :     case ARM::VLD2d32wb_fixed:
    3927             :     case ARM::VLD2q8wb_fixed:
    3928             :     case ARM::VLD2q16wb_fixed:
    3929             :     case ARM::VLD2q32wb_fixed:
    3930             :     case ARM::VLD2d8wb_register:
    3931             :     case ARM::VLD2d16wb_register:
    3932             :     case ARM::VLD2d32wb_register:
    3933             :     case ARM::VLD2q8wb_register:
    3934             :     case ARM::VLD2q16wb_register:
    3935             :     case ARM::VLD2q32wb_register:
    3936             :     case ARM::VLD3d8:
    3937             :     case ARM::VLD3d16:
    3938             :     case ARM::VLD3d32:
    3939             :     case ARM::VLD1d64T:
    3940             :     case ARM::VLD3d8_UPD:
    3941             :     case ARM::VLD3d16_UPD:
    3942             :     case ARM::VLD3d32_UPD:
    3943             :     case ARM::VLD1d64Twb_fixed:
    3944             :     case ARM::VLD1d64Twb_register:
    3945             :     case ARM::VLD3q8_UPD:
    3946             :     case ARM::VLD3q16_UPD:
    3947             :     case ARM::VLD3q32_UPD:
    3948             :     case ARM::VLD4d8:
    3949             :     case ARM::VLD4d16:
    3950             :     case ARM::VLD4d32:
    3951             :     case ARM::VLD1d64Q:
    3952             :     case ARM::VLD4d8_UPD:
    3953             :     case ARM::VLD4d16_UPD:
    3954             :     case ARM::VLD4d32_UPD:
    3955             :     case ARM::VLD1d64Qwb_fixed:
    3956             :     case ARM::VLD1d64Qwb_register:
    3957             :     case ARM::VLD4q8_UPD:
    3958             :     case ARM::VLD4q16_UPD:
    3959             :     case ARM::VLD4q32_UPD:
    3960             :     case ARM::VLD1DUPq8:
    3961             :     case ARM::VLD1DUPq16:
    3962             :     case ARM::VLD1DUPq32:
    3963             :     case ARM::VLD1DUPq8wb_fixed:
    3964             :     case ARM::VLD1DUPq16wb_fixed:
    3965             :     case ARM::VLD1DUPq32wb_fixed:
    3966             :     case ARM::VLD1DUPq8wb_register:
    3967             :     case ARM::VLD1DUPq16wb_register:
    3968             :     case ARM::VLD1DUPq32wb_register:
    3969             :     case ARM::VLD2DUPd8:
    3970             :     case ARM::VLD2DUPd16:
    3971             :     case ARM::VLD2DUPd32:
    3972             :     case ARM::VLD2DUPd8wb_fixed:
    3973             :     case ARM::VLD2DUPd16wb_fixed:
    3974             :     case ARM::VLD2DUPd32wb_fixed:
    3975             :     case ARM::VLD2DUPd8wb_register:
    3976             :     case ARM::VLD2DUPd16wb_register:
    3977             :     case ARM::VLD2DUPd32wb_register:
    3978             :     case ARM::VLD4DUPd8:
    3979             :     case ARM::VLD4DUPd16:
    3980             :     case ARM::VLD4DUPd32:
    3981             :     case ARM::VLD4DUPd8_UPD:
    3982             :     case ARM::VLD4DUPd16_UPD:
    3983             :     case ARM::VLD4DUPd32_UPD:
    3984             :     case ARM::VLD1LNd8:
    3985             :     case ARM::VLD1LNd16:
    3986             :     case ARM::VLD1LNd32:
    3987             :     case ARM::VLD1LNd8_UPD:
    3988             :     case ARM::VLD1LNd16_UPD:
    3989             :     case ARM::VLD1LNd32_UPD:
    3990             :     case ARM::VLD2LNd8:
    3991             :     case ARM::VLD2LNd16:
    3992             :     case ARM::VLD2LNd32:
    3993             :     case ARM::VLD2LNq16:
    3994             :     case ARM::VLD2LNq32:
    3995             :     case ARM::VLD2LNd8_UPD:
    3996             :     case ARM::VLD2LNd16_UPD:
    3997             :     case ARM::VLD2LNd32_UPD:
    3998             :     case ARM::VLD2LNq16_UPD:
    3999             :     case ARM::VLD2LNq32_UPD:
    4000             :     case ARM::VLD4LNd8:
    4001             :     case ARM::VLD4LNd16:
    4002             :     case ARM::VLD4LNd32:
    4003             :     case ARM::VLD4LNq16:
    4004             :     case ARM::VLD4LNq32:
    4005             :     case ARM::VLD4LNd8_UPD:
    4006             :     case ARM::VLD4LNd16_UPD:
    4007             :     case ARM::VLD4LNd32_UPD:
    4008             :     case ARM::VLD4LNq16_UPD:
    4009             :     case ARM::VLD4LNq32_UPD:
    4010             :       // If the address is not 64-bit aligned, the latencies of these
    4011             :       // instructions increases by one.
    4012         198 :       ++Adjust;
    4013         198 :       break;
    4014             :     }
    4015             :   }
    4016      152109 :   return Adjust;
    4017             : }
    4018             : 
    4019       56280 : int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
    4020             :                                         const MachineInstr &DefMI,
    4021             :                                         unsigned DefIdx,
    4022             :                                         const MachineInstr &UseMI,
    4023             :                                         unsigned UseIdx) const {
    4024             :   // No operand latency. The caller may fall back to getInstrLatency.
    4025       56280 :   if (!ItinData || ItinData->isEmpty())
    4026             :     return -1;
    4027             : 
    4028       56280 :   const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
    4029       56280 :   unsigned Reg = DefMO.getReg();
    4030             : 
    4031             :   const MachineInstr *ResolvedDefMI = &DefMI;
    4032       56280 :   unsigned DefAdj = 0;
    4033       56280 :   if (DefMI.isBundle())
    4034         344 :     ResolvedDefMI =
    4035         344 :         getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);
    4036       56249 :   if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
    4037       56231 :       ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
    4038             :     return 1;
    4039             :   }
    4040             : 
    4041             :   const MachineInstr *ResolvedUseMI = &UseMI;
    4042       56229 :   unsigned UseAdj = 0;
    4043       56229 :   if (UseMI.isBundle()) {
    4044        1003 :     ResolvedUseMI =
    4045        1003 :         getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
    4046        1003 :     if (!ResolvedUseMI)
    4047             :       return -1;
    4048             :   }
    4049             : 
    4050       56228 :   return getOperandLatencyImpl(
    4051             :       ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
    4052       56228 :       Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
    4053             : }
    4054             : 
    4055       56228 : int ARMBaseInstrInfo::getOperandLatencyImpl(
    4056             :     const InstrItineraryData *ItinData, const MachineInstr &DefMI,
    4057             :     unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
    4058             :     const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
    4059             :     unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
    4060       56228 :   if (Reg == ARM::CPSR) {
    4061        3738 :     if (DefMI.getOpcode() == ARM::FMSTAT) {
    4062             :       // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
    4063         457 :       return Subtarget.isLikeA9() ? 1 : 20;
    4064             :     }
    4065             : 
    4066             :     // CPSR set and branch can be paired in the same cycle.
    4067        1412 :     if (UseMI.isBranch())
    4068             :       return 0;
    4069             : 
    4070             :     // Otherwise it takes the instruction latency (generally one).
    4071        1412 :     unsigned Latency = getInstrLatency(ItinData, DefMI);
    4072             : 
    4073             :     // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
    4074             :     // its uses. Instructions which are otherwise scheduled between them may
    4075             :     // incur a code size penalty (not able to use the CPSR setting 16-bit
    4076             :     // instructions).
    4077        1412 :     if (Latency > 0 && Subtarget.isThumb2()) {
    4078         475 :       const MachineFunction *MF = DefMI.getParent()->getParent();
    4079             :       // FIXME: Use Function::optForSize().
    4080         950 :       if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize))
    4081          28 :         --Latency;
    4082             :     }
    4083        1412 :     return Latency;
    4084             :   }
    4085             : 
    4086       99798 :   if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
    4087             :     return -1;
    4088             : 
    4089       40117 :   unsigned DefAlign = DefMI.hasOneMemOperand()
    4090       10352 :                           ? (*DefMI.memoperands_begin())->getAlignment()
    4091       50469 :                           : 0;
    4092       40117 :   unsigned UseAlign = UseMI.hasOneMemOperand()
    4093        7524 :                           ? (*UseMI.memoperands_begin())->getAlignment()
    4094       47641 :                           : 0;
    4095             : 
    4096             :   // Get the itinerary's latency if possible, and handle variable_ops.
    4097             :   int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, UseMCID,
    4098       40117 :                                   UseIdx, UseAlign);
    4099             :   // Unable to find operand latency. The caller may resort to getInstrLatency.
    4100       40117 :   if (Latency < 0)
    4101             :     return Latency;
    4102             : 
    4103             :   // Adjust for IT block position.
    4104       33478 :   int Adj = DefAdj + UseAdj;
    4105             : 
    4106             :   // Adjust for dynamic def-side opcode variants not captured by the itinerary.
    4107       33478 :   Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
    4108       33478 :   if (Adj >= 0 || (int)Latency > -Adj) {
    4109       33478 :     return Latency + Adj;
    4110             :   }
    4111             :   // Return the itinerary latency, which may be zero but not less than zero.
    4112             :   return Latency;
    4113             : }
    4114             : 
    4115             : int
    4116       69322 : ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
    4117             :                                     SDNode *DefNode, unsigned DefIdx,
    4118             :                                     SDNode *UseNode, unsigned UseIdx) const {
    4119       69322 :   if (!DefNode->isMachineOpcode())
    4120             :     return 1;
    4121             : 
    4122       48839 :   const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
    4123             : 
    4124       48839 :   if (isZeroCost(DefMCID.Opcode))
    4125             :     return 0;
    4126             : 
    4127       41784 :   if (!ItinData || ItinData->isEmpty())
    4128        1606 :     return DefMCID.mayLoad() ? 3 : 1;
    4129             : 
    4130       40981 :   if (!UseNode->isMachineOpcode()) {
    4131       14516 :     int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
    4132       14516 :     int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
    4133       14516 :     int Threshold = 1 + Adj;
    4134       14516 :     return Latency <= Threshold ? 1 : Latency - Adj;
    4135             :   }
    4136             : 
    4137             :   const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
    4138             :   const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
    4139       26465 :   unsigned DefAlign = !DefMN->memoperands_empty()
    4140       26465 :     ? (*DefMN->memoperands_begin())->getAlignment() : 0;
    4141             :   const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
    4142       26465 :   unsigned UseAlign = !UseMN->memoperands_empty()
    4143       26465 :     ? (*UseMN->memoperands_begin())->getAlignment() : 0;
    4144             :   int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
    4145       26465 :                                   UseMCID, UseIdx, UseAlign);
    4146             : 
    4147       38856 :   if (Latency > 1 &&
    4148       23153 :       (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
    4149             :        Subtarget.isCortexA7())) {
    4150             :     // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
    4151             :     // variants are one cycle cheaper.
    4152        3396 :     switch (DefMCID.getOpcode()) {
    4153             :     default: break;
    4154           9 :     case ARM::LDRrs:
    4155             :     case ARM::LDRBrs: {
    4156             :       unsigned ShOpVal =
    4157          27 :         cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
    4158             :       unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
    4159           9 :       if (ShImm == 0 ||
    4160           7 :           (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
    4161           9 :         --Latency;
    4162             :       break;
    4163             :     }
    4164          18 :     case ARM::t2LDRs:
    4165             :     case ARM::t2LDRBs:
    4166             :     case ARM::t2LDRHs:
    4167             :     case ARM::t2LDRSHs: {
    4168             :       // Thumb2 mode: lsl only.
    4169             :       unsigned ShAmt =
    4170          54 :         cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
    4171          18 :       if (ShAmt == 0 || ShAmt == 2)
    4172          17 :         --Latency;
    4173             :       break;
    4174             :     }
    4175             :     }
    4176       28841 :   } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) {
    4177             :     // FIXME: Properly handle all of the latency adjustments for address
    4178             :     // writeback.
    4179           0 :     switch (DefMCID.getOpcode()) {
    4180             :     default: break;
    4181           0 :     case ARM::LDRrs:
    4182             :     case ARM::LDRBrs: {
    4183             :       unsigned ShOpVal =
    4184           0 :         cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
    4185             :       unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
    4186           0 :       if (ShImm == 0 ||
    4187           0 :           ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
    4188             :            ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
    4189           0 :         Latency -= 2;
    4190           0 :       else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
    4191           0 :         --Latency;
    4192             :       break;
    4193             :     }
    4194           0 :     case ARM::t2LDRs:
    4195             :     case ARM::t2LDRBs:
    4196             :     case ARM::t2LDRHs:
    4197             :     case ARM::t2LDRSHs:
    4198             :       // Thumb2 mode: lsl 0-3 only.
    4199           0 :       Latency -= 2;
    4200           0 :       break;
    4201             :     }
    4202             :   }
    4203             : 
    4204       26465 :   if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
    4205        1908 :     switch (DefMCID.getOpcode()) {
    4206             :     default: break;
    4207          19 :     case ARM::VLD1q8:
    4208             :     case ARM::VLD1q16:
    4209             :     case ARM::VLD1q32:
    4210             :     case ARM::VLD1q64:
    4211             :     case ARM::VLD1q8wb_register:
    4212             :     case ARM::VLD1q16wb_register:
    4213             :     case ARM::VLD1q32wb_register:
    4214             :     case ARM::VLD1q64wb_register:
    4215             :     case ARM::VLD1q8wb_fixed:
    4216             :     case ARM::VLD1q16wb_fixed:
    4217             :     case ARM::VLD1q32wb_fixed:
    4218             :     case ARM::VLD1q64wb_fixed:
    4219             :     case ARM::VLD2d8:
    4220             :     case ARM::VLD2d16:
    4221             :     case ARM::VLD2d32:
    4222             :     case ARM::VLD2q8Pseudo:
    4223             :     case ARM::VLD2q16Pseudo:
    4224             :     case ARM::VLD2q32Pseudo:
    4225             :     case ARM::VLD2d8wb_fixed:
    4226             :     case ARM::VLD2d16wb_fixed:
    4227             :     case ARM::VLD2d32wb_fixed:
    4228             :     case ARM::VLD2q8PseudoWB_fixed:
    4229             :     case ARM::VLD2q16PseudoWB_fixed:
    4230             :     case ARM::VLD2q32PseudoWB_fixed:
    4231             :     case ARM::VLD2d8wb_register:
    4232             :     case ARM::VLD2d16wb_register:
    4233             :     case ARM::VLD2d32wb_register:
    4234             :     case ARM::VLD2q8PseudoWB_register:
    4235             :     case ARM::VLD2q16PseudoWB_register:
    4236             :     case ARM::VLD2q32PseudoWB_register:
    4237             :     case ARM::VLD3d8Pseudo:
    4238             :     case ARM::VLD3d16Pseudo:
    4239             :     case ARM::VLD3d32Pseudo:
    4240             :     case ARM::VLD1d8TPseudo:
    4241             :     case ARM::VLD1d16TPseudo:
    4242             :     case ARM::VLD1d32TPseudo:
    4243             :     case ARM::VLD1d64TPseudo:
    4244             :     case ARM::VLD1d64TPseudoWB_fixed:
    4245             :     case ARM::VLD1d64TPseudoWB_register:
    4246             :     case ARM::VLD3d8Pseudo_UPD:
    4247             :     case ARM::VLD3d16Pseudo_UPD:
    4248             :     case ARM::VLD3d32Pseudo_UPD:
    4249             :     case ARM::VLD3q8Pseudo_UPD:
    4250             :     case ARM::VLD3q16Pseudo_UPD:
    4251             :     case ARM::VLD3q32Pseudo_UPD:
    4252             :     case ARM::VLD3q8oddPseudo:
    4253             :     case ARM::VLD3q16oddPseudo:
    4254             :     case ARM::VLD3q32oddPseudo:
    4255             :     case ARM::VLD3q8oddPseudo_UPD:
    4256             :     case ARM::VLD3q16oddPseudo_UPD:
    4257             :     case ARM::VLD3q32oddPseudo_UPD:
    4258             :     case ARM::VLD4d8Pseudo:
    4259             :     case ARM::VLD4d16Pseudo:
    4260             :     case ARM::VLD4d32Pseudo:
    4261             :     case ARM::VLD1d8QPseudo:
    4262             :     case ARM::VLD1d16QPseudo:
    4263             :     case ARM::VLD1d32QPseudo:
    4264             :     case ARM::VLD1d64QPseudo:
    4265             :     case ARM::VLD1d64QPseudoWB_fixed:
    4266             :     case ARM::VLD1d64QPseudoWB_register:
    4267             :     case ARM::VLD1q8HighQPseudo:
    4268             :     case ARM::VLD1q8LowQPseudo_UPD:
    4269             :     case ARM::VLD1q8HighTPseudo:
    4270             :     case ARM::VLD1q8LowTPseudo_UPD:
    4271             :     case ARM::VLD1q16HighQPseudo:
    4272             :     case ARM::VLD1q16LowQPseudo_UPD:
    4273             :     case ARM::VLD1q16HighTPseudo:
    4274             :     case ARM::VLD1q16LowTPseudo_UPD:
    4275             :     case ARM::VLD1q32HighQPseudo:
    4276             :     case ARM::VLD1q32LowQPseudo_UPD:
    4277             :     case ARM::VLD1q32HighTPseudo:
    4278             :     case ARM::VLD1q32LowTPseudo_UPD:
    4279             :     case ARM::VLD1q64HighQPseudo:
    4280             :     case ARM::VLD1q64LowQPseudo_UPD:
    4281             :     case ARM::VLD1q64HighTPseudo:
    4282             :     case ARM::VLD1q64LowTPseudo_UPD:
    4283             :     case ARM::VLD4d8Pseudo_UPD:
    4284             :     case ARM::VLD4d16Pseudo_UPD:
    4285             :     case ARM::VLD4d32Pseudo_UPD:
    4286             :     case ARM::VLD4q8Pseudo_UPD:
    4287             :     case ARM::VLD4q16Pseudo_UPD:
    4288             :     case ARM::VLD4q32Pseudo_UPD:
    4289             :     case ARM::VLD4q8oddPseudo:
    4290             :     case ARM::VLD4q16oddPseudo:
    4291             :     case ARM::VLD4q32oddPseudo:
    4292             :     case ARM::VLD4q8oddPseudo_UPD:
    4293             :     case ARM::VLD4q16oddPseudo_UPD:
    4294             :     case ARM::VLD4q32oddPseudo_UPD:
    4295             :     case ARM::VLD1DUPq8:
    4296             :     case ARM::VLD1DUPq16:
    4297             :     case ARM::VLD1DUPq32:
    4298             :     case ARM::VLD1DUPq8wb_fixed:
    4299             :     case ARM::VLD1DUPq16wb_fixed:
    4300             :     case ARM::VLD1DUPq32wb_fixed:
    4301             :     case ARM::VLD1DUPq8wb_register:
    4302             :     case ARM::VLD1DUPq16wb_register:
    4303             :     case ARM::VLD1DUPq32wb_register:
    4304             :     case ARM::VLD2DUPd8:
    4305             :     case ARM::VLD2DUPd16:
    4306             :     case ARM::VLD2DUPd32:
    4307             :     case ARM::VLD2DUPd8wb_fixed:
    4308             :     case ARM::VLD2DUPd16wb_fixed:
    4309             :     case ARM::VLD2DUPd32wb_fixed:
    4310             :     case ARM::VLD2DUPd8wb_register:
    4311             :     case ARM::VLD2DUPd16wb_register:
    4312             :     case ARM::VLD2DUPd32wb_register:
    4313             :     case ARM::VLD4DUPd8Pseudo:
    4314             :     case ARM::VLD4DUPd16Pseudo:
    4315             :     case ARM::VLD4DUPd32Pseudo:
    4316             :     case ARM::VLD4DUPd8Pseudo_UPD:
    4317             :     case ARM::VLD4DUPd16Pseudo_UPD:
    4318             :     case ARM::VLD4DUPd32Pseudo_UPD:
    4319             :     case ARM::VLD1LNq8Pseudo:
    4320             :     case ARM::VLD1LNq16Pseudo:
    4321             :     case ARM::VLD1LNq32Pseudo:
    4322             :     case ARM::VLD1LNq8Pseudo_UPD:
    4323             :     case ARM::VLD1LNq16Pseudo_UPD:
    4324             :     case ARM::VLD1LNq32Pseudo_UPD:
    4325             :     case ARM::VLD2LNd8Pseudo:
    4326             :     case ARM::VLD2LNd16Pseudo:
    4327             :     case ARM::VLD2LNd32Pseudo:
    4328             :     case ARM::VLD2LNq16Pseudo:
    4329             :     case ARM::VLD2LNq32Pseudo:
    4330             :     case ARM::VLD2LNd8Pseudo_UPD:
    4331             :     case ARM::VLD2LNd16Pseudo_UPD:
    4332             :     case ARM::VLD2LNd32Pseudo_UPD:
    4333             :     case ARM::VLD2LNq16Pseudo_UPD:
    4334             :     case ARM::VLD2LNq32Pseudo_UPD:
    4335             :     case ARM::VLD4LNd8Pseudo:
    4336             :     case ARM::VLD4LNd16Pseudo:
    4337             :     case ARM::VLD4LNd32Pseudo:
    4338             :     case ARM::VLD4LNq16Pseudo:
    4339             :     case ARM::VLD4LNq32Pseudo:
    4340             :     case ARM::VLD4LNd8Pseudo_UPD:
    4341             :     case ARM::VLD4LNd16Pseudo_UPD:
    4342             :     case ARM::VLD4LNd32Pseudo_UPD:
    4343             :     case ARM::VLD4LNq16Pseudo_UPD:
    4344             :     case ARM::VLD4LNq32Pseudo_UPD:
    4345             :       // If the address is not 64-bit aligned, the latencies of these
    4346             :       // instructions increases by one.
    4347          19 :       ++Latency;
    4348          19 :       break;
    4349             :     }
    4350             : 
    4351             :   return Latency;
    4352             : }
    4353             : 
    4354       39518 : unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
    4355       39518 :   if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
    4356             :       MI.isImplicitDef())
    4357             :     return 0;
    4358             : 
    4359       39507 :   if (MI.isBundle())
    4360             :     return 0;
    4361             : 
    4362             :   const MCInstrDesc &MCID = MI.getDesc();
    4363             : 
    4364       81025 :   if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
    4365        2071 :                         !Subtarget.cheapPredicableCPSRDef())) {
    4366             :     // When predicated, CPSR is an additional source operand for CPSR updating
    4367             :     // instructions, this apparently increases their latencies.
    4368             :     return 1;
    4369             :   }
    4370             :   return 0;
    4371             : }
    4372             : 
    4373      124244 : unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
    4374             :                                            const MachineInstr &MI,
    4375             :                                            unsigned *PredCost) const {
    4376      124233 :   if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
    4377             :       MI.isImplicitDef())
    4378             :     return 1;
    4379             : 
    4380             :   // An instruction scheduler typically runs on unbundled instructions, however
    4381             :   // other passes may query the latency of a bundled instruction.
    4382      124188 :   if (MI.isBundle()) {
    4383             :     unsigned Latency = 0;
    4384        1206 :     MachineBasicBlock::const_instr_iterator I = MI.getIterator();
    4385        1206 :     MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
    4386        7519 :     while (++I != E && I->isInsideBundle()) {
    4387        5118 :       if (I->getOpcode() != ARM::t2IT)
    4388        1491 :         Latency += getInstrLatency(ItinData, *I, PredCost);
    4389             :     }
    4390             :     return Latency;
    4391             :   }
    4392             : 
    4393             :   const MCInstrDesc &MCID = MI.getDesc();
    4394      122982 :   if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
    4395           0 :                                      !Subtarget.cheapPredicableCPSRDef()))) {
    4396             :     // When predicated, CPSR is an additional source operand for CPSR updating
    4397             :     // instructions, this apparently increases their latencies.
    4398           0 :     *PredCost = 1;
    4399             :   }
    4400             :   // Be sure to call getStageLatency for an empty itinerary in case it has a
    4401             :   // valid MinLatency property.
    4402      122982 :   if (!ItinData)
    4403           0 :     return MI.mayLoad() ? 3 : 1;
    4404             : 
    4405      122982 :   unsigned Class = MCID.getSchedClass();
    4406             : 
    4407             :   // For instructions with variable uops, use uops as latency.
    4408      244186 :   if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
    4409        4351 :     return getNumMicroOps(ItinData, MI);
    4410             : 
    4411             :   // For the common case, fall back on the itinerary's latency.
    4412      118631 :   unsigned Latency = ItinData->getStageLatency(Class);
    4413             : 
    4414             :   // Adjust for dynamic def-side opcode variants not captured by the itinerary.
    4415             :   unsigned DefAlign =
    4416      145554 :       MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlignment() : 0;
    4417      118631 :   int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
    4418      118631 :   if (Adj >= 0 || (int)Latency > -Adj) {
    4419      118600 :     return Latency + Adj;
    4420             :   }
    4421             :   return Latency;
    4422             : }
    4423             : 
    4424       64826 : int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
    4425             :                                       SDNode *Node) const {
    4426       64826 :   if (!Node->isMachineOpcode())
    4427             :     return 1;
    4428             : 
    4429       64826 :   if (!ItinData || ItinData->isEmpty())
    4430             :     return 1;
    4431             : 
    4432             :   unsigned Opcode = Node->getMachineOpcode();
    4433       64826 :   switch (Opcode) {
    4434       64824 :   default:
    4435      194472 :     return ItinData->getStageLatency(get(Opcode).getSchedClass());
    4436             :   case ARM::VLDMQIA:
    4437             :   case ARM::VSTMQIA:
    4438             :     return 2;
    4439             :   }
    4440             : }
    4441             : 
    4442          88 : bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
    4443             :                                              const MachineRegisterInfo *MRI,
    4444             :                                              const MachineInstr &DefMI,
    4445             :                                              unsigned DefIdx,
    4446             :                                              const MachineInstr &UseMI,
    4447             :                                              unsigned UseIdx) const {
    4448          88 :   unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
    4449          88 :   unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
    4450         108 :   if (Subtarget.nonpipelinedVFP() &&
    4451          20 :       (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
    4452             :     return true;
    4453             : 
    4454             :   // Hoist VFP / NEON instructions with 4 or higher latency.
    4455             :   unsigned Latency =
    4456          88 :       SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);
    4457          88 :   if (Latency <= 3)
    4458             :     return false;
    4459          19 :   return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
    4460          19 :          UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
    4461             : }
    4462             : 
    4463         442 : bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
    4464             :                                         const MachineInstr &DefMI,
    4465             :                                         unsigned DefIdx) const {
    4466             :   const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
    4467         426 :   if (!ItinData || ItinData->isEmpty())
    4468             :     return false;
    4469             : 
    4470         426 :   unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
    4471         426 :   if (DDomain == ARMII::DomainGeneral) {
    4472         384 :     unsigned DefClass = DefMI.getDesc().getSchedClass();
    4473             :     int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
    4474         384 :     return (DefCycle != -1 && DefCycle <= 2);
    4475             :   }
    4476             :   return false;
    4477             : }
    4478             : 
    4479     1296144 : bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
    4480             :                                          StringRef &ErrInfo) const {
    4481     2592288 :   if (convertAddSubFlagsOpcode(MI.getOpcode())) {
    4482           0 :     ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
    4483           0 :     return false;
    4484             :   }
    4485             :   return true;
    4486             : }
    4487             : 
    4488             : // LoadStackGuard has so far only been implemented for MachO. Different code
    4489             : // sequence is needed for other targets.
    4490          50 : void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
    4491             :                                                 unsigned LoadImmOpc,
    4492             :                                                 unsigned LoadOpc) const {
    4493             :   assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
    4494             :          "ROPI/RWPI not currently supported with stack guard");
    4495             : 
    4496          50 :   MachineBasicBlock &MBB = *MI->getParent();
    4497             :   DebugLoc DL = MI->getDebugLoc();
    4498          50 :   unsigned Reg = MI->getOperand(0).getReg();
    4499             :   const GlobalValue *GV =
    4500          50 :       cast<GlobalValue>((*MI->memoperands_begin())->getValue());
    4501          50 :   MachineInstrBuilder MIB;
    4502             : 
    4503         150 :   BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
    4504             :       .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY);
    4505             : 
    4506          50 :   if (Subtarget.isGVIndirectSymbol(GV)) {
    4507          84 :     MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
    4508          42 :     MIB.addReg(Reg, RegState::Kill).addImm(0);
    4509             :     auto Flags = MachineMemOperand::MOLoad |
    4510             :                  MachineMemOperand::MODereferenceable |
    4511             :                  MachineMemOperand::MOInvariant;
    4512         126 :     MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
    4513          42 :         MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4);
    4514          42 :     MIB.addMemOperand(MMO).add(predOps(ARMCC::AL));
    4515             :   }
    4516             : 
    4517         100 :   MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
    4518          50 :   MIB.addReg(Reg, RegState::Kill)
    4519             :      .addImm(0)
    4520          50 :      .setMemRefs(MI->memoperands_begin(), MI->memoperands_end())
    4521          50 :      .add(predOps(ARMCC::AL));
    4522          50 : }
    4523             : 
    4524             : bool
    4525         572 : ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
    4526             :                                      unsigned &AddSubOpc,
    4527             :                                      bool &NegAcc, bool &HasLane) const {
    4528         572 :   DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
    4529         572 :   if (I == MLxEntryMap.end())
    4530             :     return false;
    4531             : 
    4532          14 :   const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
    4533          14 :   MulOpc = Entry.MulOpc;
    4534          14 :   AddSubOpc = Entry.AddSubOpc;
    4535          14 :   NegAcc = Entry.NegAcc;
    4536          14 :   HasLane = Entry.HasLane;
    4537          14 :   return true;
    4538             : }
    4539             : 
    4540             : //===----------------------------------------------------------------------===//
    4541             : // Execution domains.
    4542             : //===----------------------------------------------------------------------===//
    4543             : //
    4544             : // Some instructions go down the NEON pipeline, some go down the VFP pipeline,
    4545             : // and some can go down both.  The vmov instructions go down the VFP pipeline,
    4546             : // but they can be changed to vorr equivalents that are executed by the NEON
    4547             : // pipeline.
    4548             : //
    4549             : // We use the following execution domain numbering:
    4550             : //
    4551             : enum ARMExeDomain {
    4552             :   ExeGeneric = 0,
    4553             :   ExeVFP = 1,
    4554             :   ExeNEON = 2
    4555             : };
    4556             : 
    4557             : //
    4558             : // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
    4559             : //
    4560             : std::pair<uint16_t, uint16_t>
    4561       95407 : ARMBaseInstrInfo::getExecutionDomain(const MachineInstr &MI) const {
    4562             :   // If we don't have access to NEON instructions then we won't be able
    4563             :   // to swizzle anything to the NEON domain. Check to make sure.
    4564       95407 :   if (Subtarget.hasNEON()) {
    4565             :     // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
    4566             :     // if they are not predicated.
    4567      113422 :     if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI))
    4568         298 :       return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
    4569             : 
    4570             :     // CortexA9 is particularly picky about mixing the two and wants these
    4571             :     // converted.
    4572       57897 :     if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) &&
    4573        2910 :         (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR ||
    4574             :          MI.getOpcode() == ARM::VMOVS))
    4575         149 :       return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
    4576             :   }
    4577             :   // No other instructions can be swizzled, so just determine their domain.
    4578       94960 :   unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;
    4579             : 
    4580       94960 :   if (Domain & ARMII::DomainNEON)
    4581       17050 :     return std::make_pair(ExeNEON, 0);
    4582             : 
    4583             :   // Certain instructions can go either way on Cortex-A8.
    4584             :   // Treat them as NEON instructions.
    4585       77910 :   if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
    4586         123 :     return std::make_pair(ExeNEON, 0);
    4587             : 
    4588       77787 :   if (Domain & ARMII::DomainVFP)
    4589        4845 :     return std::make_pair(ExeVFP, 0);
    4590             : 
    4591       72942 :   return std::make_pair(ExeGeneric, 0);
    4592             : }
    4593             : 
    4594          69 : static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI,
    4595             :                                             unsigned SReg, unsigned &Lane) {
    4596          69 :   unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
    4597          69 :   Lane = 0;
    4598             : 
    4599          69 :   if (DReg != ARM::NoRegister)
    4600             :    return DReg;
    4601             : 
    4602          24 :   Lane = 1;
    4603          24 :   DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
    4604             : 
    4605             :   assert(DReg && "S-register with no D super-register?");
    4606          24 :   return DReg;
    4607             : }
    4608             : 
    4609             : /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
    4610             : /// set ImplicitSReg to a register number that must be marked as implicit-use or
    4611             : /// zero if no register needs to be defined as implicit-use.
    4612             : ///
    4613             : /// If the function cannot determine if an SPR should be marked implicit use or
    4614             : /// not, it returns false.
    4615             : ///
    4616             : /// This function handles cases where an instruction is being modified from taking
    4617             : /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
    4618             : /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
    4619             : /// lane of the DPR).
    4620             : ///
    4621             : /// If the other SPR is defined, an implicit-use of it should be added. Else,
    4622             : /// (including the case where the DPR itself is defined), it should not.
    4623             : ///
    4624          21 : static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI,
    4625             :                                        MachineInstr &MI, unsigned DReg,
    4626             :                                        unsigned Lane, unsigned &ImplicitSReg) {
    4627             :   // If the DPR is defined or used already, the other SPR lane will be chained
    4628             :   // correctly, so there is nothing to be done.
    4629          31 :   if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) {
    4630          13 :     ImplicitSReg = 0;
    4631          13 :     return true;
    4632             :   }
    4633             : 
    4634             :   // Otherwise we need to go searching to see if the SPR is set explicitly.
    4635          16 :   ImplicitSReg = TRI->getSubReg(DReg,
    4636           8 :                                 (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
    4637             :   MachineBasicBlock::LivenessQueryResult LQR =
    4638          16 :       MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
    4639             : 
    4640           8 :   if (LQR == MachineBasicBlock::LQR_Live)
    4641             :     return true;
    4642           8 :   else if (LQR == MachineBasicBlock::LQR_Unknown)
    4643             :     return false;
    4644             : 
    4645             :   // If the register is known not to be live, there is no need to add an
    4646             :   // implicit-use.
    4647           8 :   ImplicitSReg = 0;
    4648           8 :   return true;
    4649             : }
    4650             : 
    4651         447 : void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
    4652             :                                           unsigned Domain) const {
    4653             :   unsigned DstReg, SrcReg, DReg;
    4654             :   unsigned Lane;
    4655         447 :   MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
    4656         447 :   const TargetRegisterInfo *TRI = &getRegisterInfo();
    4657         894 :   switch (MI.getOpcode()) {
    4658           0 :   default:
    4659           0 :     llvm_unreachable("cannot handle opcode!");
    4660             :     break;
    4661         298 :   case ARM::VMOVD:
    4662         298 :     if (Domain != ExeNEON)
    4663             :       break;
    4664             : 
    4665             :     // Zap the predicate operands.
    4666             :     assert(!isPredicated(MI) && "Cannot predicate a VORRd");
    4667             : 
    4668             :     // Make sure we've got NEON instructions.
    4669             :     assert(Subtarget.hasNEON() && "VORRd requires NEON");
    4670             : 
    4671             :     // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
    4672         179 :     DstReg = MI.getOperand(0).getReg();
    4673         179 :     SrcReg = MI.getOperand(1).getReg();
    4674             : 
    4675        1790 :     for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
    4676         895 :       MI.RemoveOperand(i - 1);
    4677             : 
    4678             :     // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
    4679         179 :     MI.setDesc(get(ARM::VORRd));
    4680         179 :     MIB.addReg(DstReg, RegState::Define)
    4681         179 :         .addReg(SrcReg)
    4682         179 :         .addReg(SrcReg)
    4683         179 :         .add(predOps(ARMCC::AL));
    4684         179 :     break;
    4685          58 :   case ARM::VMOVRS:
    4686          58 :     if (Domain != ExeNEON)
    4687             :       break;
    4688             :     assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
    4689             : 
    4690             :     // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
    4691          37 :     DstReg = MI.getOperand(0).getReg();
    4692          37 :     SrcReg = MI.getOperand(1).getReg();
    4693             : 
    4694         370 :     for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
    4695         185 :       MI.RemoveOperand(i - 1);
    4696             : 
    4697          37 :     DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
    4698             : 
    4699             :     // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
    4700             :     // Note that DSrc has been widened and the other lane may be undef, which
    4701             :     // contaminates the entire register.
    4702          37 :     MI.setDesc(get(ARM::VGETLNi32));
    4703          37 :     MIB.addReg(DstReg, RegState::Define)
    4704          37 :         .addReg(DReg, RegState::Undef)
    4705          37 :         .addImm(Lane)
    4706          37 :         .add(predOps(ARMCC::AL));
    4707             : 
    4708             :     // The old source should be an implicit use, otherwise we might think it
    4709             :     // was dead before here.
    4710          37 :     MIB.addReg(SrcReg, RegState::Implicit);
    4711          37 :     break;
    4712          77 :   case ARM::VMOVSR: {
    4713          77 :     if (Domain != ExeNEON)
    4714             :       break;
    4715             :     assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
    4716             : 
    4717             :     // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
    4718          10 :     DstReg = MI.getOperand(0).getReg();
    4719          10 :     SrcReg = MI.getOperand(1).getReg();
    4720             : 
    4721          10 :     DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
    4722             : 
    4723             :     unsigned ImplicitSReg;
    4724          10 :     if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
    4725             :       break;
    4726             : 
    4727         100 :     for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
    4728          40 :       MI.RemoveOperand(i - 1);
    4729             : 
    4730             :     // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
    4731             :     // Again DDst may be undefined at the beginning of this instruction.
    4732          10 :     MI.setDesc(get(ARM::VSETLNi32));
    4733          10 :     MIB.addReg(DReg, RegState::Define)
    4734          10 :         .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI)))
    4735          10 :         .addReg(SrcReg)
    4736          10 :         .addImm(Lane)
    4737          10 :         .add(predOps(ARMCC::AL));
    4738             : 
    4739             :     // The narrower destination must be marked as set to keep previous chains
    4740             :     // in place.
    4741          10 :     MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
    4742          10 :     if (ImplicitSReg != 0)
    4743           0 :       MIB.addReg(ImplicitSReg, RegState::Implicit);
    4744             :     break;
    4745             :     }
    4746          14 :     case ARM::VMOVS: {
    4747          14 :       if (Domain != ExeNEON)
    4748             :         break;
    4749             : 
    4750             :       // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
    4751          11 :       DstReg = MI.getOperand(0).getReg();
    4752          11 :       SrcReg = MI.getOperand(1).getReg();
    4753             : 
    4754          11 :       unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
    4755          11 :       DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
    4756          11 :       DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
    4757             : 
    4758             :       unsigned ImplicitSReg;
    4759          11 :       if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
    4760             :         break;
    4761             : 
    4762         110 :       for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
    4763          44 :         MI.RemoveOperand(i - 1);
    4764             : 
    4765          11 :       if (DSrc == DDst) {
    4766             :         // Destination can be:
    4767             :         //     %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
    4768           1 :         MI.setDesc(get(ARM::VDUPLN32d));
    4769           1 :         MIB.addReg(DDst, RegState::Define)
    4770           1 :             .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI)))
    4771           1 :             .addImm(SrcLane)
    4772           1 :             .add(predOps(ARMCC::AL));
    4773             : 
    4774             :         // Neither the source or the destination are naturally represented any
    4775             :         // more, so add them in manually.
    4776           1 :         MIB.addReg(DstReg, RegState::Implicit | RegState::Define);
    4777           1 :         MIB.addReg(SrcReg, RegState::Implicit);
    4778           1 :         if (ImplicitSReg != 0)
    4779           0 :           MIB.addReg(ImplicitSReg, RegState::Implicit);
    4780             :         break;
    4781             :       }
    4782             : 
    4783             :       // In general there's no single instruction that can perform an S <-> S
    4784             :       // move in NEON space, but a pair of VEXT instructions *can* do the
    4785             :       // job. It turns out that the VEXTs needed will only use DSrc once, with
    4786             :       // the position based purely on the combination of lane-0 and lane-1
    4787             :       // involved. For example
    4788             :       //     vmov s0, s2 -> vext.32 d0, d0, d1, #1  vext.32 d0, d0, d0, #1
    4789             :       //     vmov s1, s3 -> vext.32 d0, d1, d0, #1  vext.32 d0, d0, d0, #1
    4790             :       //     vmov s0, s3 -> vext.32 d0, d0, d0, #1  vext.32 d0, d1, d0, #1
    4791             :       //     vmov s1, s2 -> vext.32 d0, d0, d0, #1  vext.32 d0, d0, d1, #1
    4792             :       //
    4793             :       // Pattern of the MachineInstrs is:
    4794             :       //     %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
    4795          10 :       MachineInstrBuilder NewMIB;
    4796          10 :       NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32),
    4797          10 :                        DDst);
    4798             : 
    4799             :       // On the first instruction, both DSrc and DDst may be undef if present.
    4800             :       // Specifically when the original instruction didn't have them as an
    4801             :       // <imp-use>.
    4802          10 :       unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
    4803             :       bool CurUndef = !MI.readsRegister(CurReg, TRI);
    4804          10 :       NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
    4805             : 
    4806          10 :       CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
    4807             :       CurUndef = !MI.readsRegister(CurReg, TRI);
    4808          10 :       NewMIB.addReg(CurReg, getUndefRegState(CurUndef))
    4809             :             .addImm(1)
    4810          10 :             .add(predOps(ARMCC::AL));
    4811             : 
    4812          10 :       if (SrcLane == DstLane)
    4813           7 :         NewMIB.addReg(SrcReg, RegState::Implicit);
    4814             : 
    4815          10 :       MI.setDesc(get(ARM::VEXTd32));
    4816          10 :       MIB.addReg(DDst, RegState::Define);
    4817             : 
    4818             :       // On the second instruction, DDst has definitely been defined above, so
    4819             :       // it is not undef. DSrc, if present, can be undef as above.
    4820          10 :       CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
    4821          11 :       CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
    4822          10 :       MIB.addReg(CurReg, getUndefRegState(CurUndef));
    4823             : 
    4824          10 :       CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
    4825          12 :       CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
    4826          10 :       MIB.addReg(CurReg, getUndefRegState(CurUndef))
    4827             :          .addImm(1)
    4828          10 :          .add(predOps(ARMCC::AL));
    4829             : 
    4830          10 :       if (SrcLane != DstLane)
    4831           3 :         MIB.addReg(SrcReg, RegState::Implicit);
    4832             : 
    4833             :       // As before, the original destination is no longer represented, add it
    4834             :       // implicitly.
    4835          10 :       MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
    4836          10 :       if (ImplicitSReg != 0)
    4837           0 :         MIB.addReg(ImplicitSReg, RegState::Implicit);
    4838             :       break;
    4839             :     }
    4840             :   }
    4841         447 : }
    4842             : 
    4843             : //===----------------------------------------------------------------------===//
    4844             : // Partial register updates
    4845             : //===----------------------------------------------------------------------===//
    4846             : //
    4847             : // Swift renames NEON registers with 64-bit granularity.  That means any
    4848             : // instruction writing an S-reg implicitly reads the containing D-reg.  The
    4849             : // problem is mostly avoided by translating f32 operations to v2f32 operations
    4850             : // on D-registers, but f32 loads are still a problem.
    4851             : //
    4852             : // These instructions can load an f32 into a NEON register:
    4853             : //
    4854             : // VLDRS - Only writes S, partial D update.
    4855             : // VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
    4856             : // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
    4857             : //
    4858             : // FCONSTD can be used as a dependency-breaking instruction.
    4859       98062 : unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance(
    4860             :     const MachineInstr &MI, unsigned OpNum,
    4861             :     const TargetRegisterInfo *TRI) const {
    4862       98062 :   auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance();
    4863       98062 :   if (!PartialUpdateClearance)
    4864             :     return 0;
    4865             : 
    4866             :   assert(TRI && "Need TRI instance");
    4867             : 
    4868        2960 :   const MachineOperand &MO = MI.getOperand(OpNum);
    4869             :   if (MO.readsReg())
    4870             :     return 0;
    4871        2960 :   unsigned Reg = MO.getReg();
    4872             :   int UseOp = -1;
    4873             : 
    4874        5920 :   switch (MI.getOpcode()) {
    4875             :   // Normal instructions writing only an S-register.
    4876          47 :   case ARM::VLDRS:
    4877             :   case ARM::FCONSTS:
    4878             :   case ARM::VMOVSR:
    4879             :   case ARM::VMOVv8i8:
    4880             :   case ARM::VMOVv4i16:
    4881             :   case ARM::VMOVv2i32:
    4882             :   case ARM::VMOVv2f32:
    4883             :   case ARM::VMOVv1i64:
    4884          47 :     UseOp = MI.findRegisterUseOperandIdx(Reg, false, TRI);
    4885             :     break;
    4886             : 
    4887             :     // Explicitly reads the dependency.
    4888             :   case ARM::VLD1LNd32:
    4889             :     UseOp = 3;
    4890             :     break;
    4891             :   default:
    4892             :     return 0;
    4893             :   }
    4894             : 
    4895             :   // If this instruction actually reads a value from Reg, there is no unwanted
    4896             :   // dependency.
    4897          47 :   if (UseOp != -1 && MI.getOperand(UseOp).readsReg())
    4898             :     return 0;
    4899             : 
    4900             :   // We must be able to clobber the whole D-reg.
    4901          48 :   if (TargetRegisterInfo::isVirtualRegister(Reg)) {
    4902             :     // Virtual register must be a def undef foo:ssub_0 operand.
    4903           0 :     if (!MO.getSubReg() || MI.readsVirtualRegister(Reg))
    4904             :       return 0;
    4905          96 :   } else if (ARM::SPRRegClass.contains(Reg)) {
    4906             :     // Physical register: MI must define the full D-reg.
    4907             :     unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
    4908          27 :                                              &ARM::DPRRegClass);
    4909          49 :     if (!DReg || !MI.definesRegister(DReg, TRI))
    4910             :       return 0;
    4911             :   }
    4912             : 
    4913             :   // MI has an unwanted D-register dependency.
    4914             :   // Avoid defs in the previous N instructrions.
    4915             :   return PartialUpdateClearance;
    4916             : }
    4917             : 
    4918             : // Break a partial register dependency after getPartialRegUpdateClearance
    4919             : // returned non-zero.
    4920           3 : void ARMBaseInstrInfo::breakPartialRegDependency(
    4921             :     MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
    4922             :   assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def");
    4923             :   assert(TRI && "Need TRI instance");
    4924             : 
    4925           3 :   const MachineOperand &MO = MI.getOperand(OpNum);
    4926           3 :   unsigned Reg = MO.getReg();
    4927             :   assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
    4928             :          "Can't break virtual register dependencies.");
    4929             :   unsigned DReg = Reg;
    4930             : 
    4931             :   // If MI defines an S-reg, find the corresponding D super-register.
    4932           6 :   if (ARM::SPRRegClass.contains(Reg)) {
    4933           0 :     DReg = ARM::D0 + (Reg - ARM::S0) / 2;
    4934             :     assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
    4935             :   }
    4936             : 
    4937             :   assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
    4938             :   assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
    4939             : 
    4940             :   // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
    4941             :   // the full D-register by loading the same value to both lanes.  The
    4942             :   // instruction is micro-coded with 2 uops, so don't do this until we can
    4943             :   // properly schedule micro-coded instructions.  The dispatcher stalls cause
    4944             :   // too big regressions.
    4945             : 
    4946             :   // Insert the dependency-breaking FCONSTD before MI.
    4947             :   // 96 is the encoding of 0.5, but the actual value doesn't matter here.
    4948           6 :   BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
    4949             :       .addImm(96)
    4950           3 :       .add(predOps(ARMCC::AL));
    4951           3 :   MI.addRegisterKilled(DReg, TRI, true);
    4952           3 : }
    4953             : 
    4954          16 : bool ARMBaseInstrInfo::hasNOP() const {
    4955          32 :   return Subtarget.getFeatureBits()[ARM::HasV6KOps];
    4956             : }
    4957             : 
    4958         110 : bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
    4959         110 :   if (MI->getNumOperands() < 4)
    4960             :     return true;
    4961         110 :   unsigned ShOpVal = MI->getOperand(3).getImm();
    4962             :   unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
    4963             :   // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
    4964         220 :   if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
    4965         130 :       ((ShImm == 1 || ShImm == 2) &&
    4966             :        ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
    4967             :     return true;
    4968             : 
    4969             :   return false;
    4970             : }
    4971             : 
    4972        1326 : bool ARMBaseInstrInfo::getRegSequenceLikeInputs(
    4973             :     const MachineInstr &MI, unsigned DefIdx,
    4974             :     SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
    4975             :   assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
    4976             :   assert(MI.isRegSequenceLike() && "Invalid kind of instruction");
    4977             : 
    4978        2652 :   switch (MI.getOpcode()) {
    4979        1326 :   case ARM::VMOVDRR:
    4980             :     // dX = VMOVDRR rY, rZ
    4981             :     // is the same as:
    4982             :     // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
    4983             :     // Populate the InputRegs accordingly.
    4984             :     // rY
    4985        1326 :     const MachineOperand *MOReg = &MI.getOperand(1);
    4986        1326 :     if (!MOReg->isUndef())
    4987        3978 :       InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
    4988             :                                               MOReg->getSubReg(), ARM::ssub_0));
    4989             :     // rZ
    4990        1326 :     MOReg = &MI.getOperand(2);
    4991        1326 :     if (!MOReg->isUndef())
    4992        3978 :       InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
    4993             :                                               MOReg->getSubReg(), ARM::ssub_1));
    4994        1326 :     return true;
    4995             :   }
    4996           0 :   llvm_unreachable("Target dependent opcode missing");
    4997             : }
    4998             : 
    4999        2915 : bool ARMBaseInstrInfo::getExtractSubregLikeInputs(
    5000             :     const MachineInstr &MI, unsigned DefIdx,
    5001             :     RegSubRegPairAndIdx &InputReg) const {
    5002             :   assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
    5003             :   assert(MI.isExtractSubregLike() && "Invalid kind of instruction");
    5004             : 
    5005        5830 :   switch (MI.getOpcode()) {
    5006        2915 :   case ARM::VMOVRRD:
    5007             :     // rX, rY = VMOVRRD dZ
    5008             :     // is the same as:
    5009             :     // rX = EXTRACT_SUBREG dZ, ssub_0
    5010             :     // rY = EXTRACT_SUBREG dZ, ssub_1
    5011        2915 :     const MachineOperand &MOReg = MI.getOperand(2);
    5012        2915 :     if (MOReg.isUndef())
    5013             :       return false;
    5014        2915 :     InputReg.Reg = MOReg.getReg();
    5015        2915 :     InputReg.SubReg = MOReg.getSubReg();
    5016        2915 :     InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
    5017        2915 :     return true;
    5018             :   }
    5019           0 :   llvm_unreachable("Target dependent opcode missing");
    5020             : }
    5021             : 
    5022         193 : bool ARMBaseInstrInfo::getInsertSubregLikeInputs(
    5023             :     const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
    5024             :     RegSubRegPairAndIdx &InsertedReg) const {
    5025             :   assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
    5026             :   assert(MI.isInsertSubregLike() && "Invalid kind of instruction");
    5027             : 
    5028         386 :   switch (MI.getOpcode()) {
    5029         193 :   case ARM::VSETLNi32:
    5030             :     // dX = VSETLNi32 dY, rZ, imm
    5031         193 :     const MachineOperand &MOBaseReg = MI.getOperand(1);
    5032             :     const MachineOperand &MOInsertedReg = MI.getOperand(2);
    5033         193 :     if (MOInsertedReg.isUndef())
    5034             :       return false;
    5035             :     const MachineOperand &MOIndex = MI.getOperand(3);
    5036         193 :     BaseReg.Reg = MOBaseReg.getReg();
    5037         193 :     BaseReg.SubReg = MOBaseReg.getSubReg();
    5038             : 
    5039         193 :     InsertedReg.Reg = MOInsertedReg.getReg();
    5040         193 :     InsertedReg.SubReg = MOInsertedReg.getSubReg();
    5041         193 :     InsertedReg.SubIdx = MOIndex.getImm() == 0 ? ARM::ssub_0 : ARM::ssub_1;
    5042         193 :     return true;
    5043             :   }
    5044           0 :   llvm_unreachable("Target dependent opcode missing");
    5045      303507 : }

Generated by: LCOV version 1.13