LCOV - code coverage report
Current view: top level - lib/Target/ARM - ARMLoadStoreOptimizer.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 835 925 90.3 %
Date: 2018-07-13 00:08:38 Functions: 47 48 97.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass -------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file This file contains a pass that performs load / store related peephole
      11             : /// optimizations. This pass should be run after register allocation.
      12             : //
      13             : //===----------------------------------------------------------------------===//
      14             : 
      15             : #include "ARM.h"
      16             : #include "ARMBaseInstrInfo.h"
      17             : #include "ARMBaseRegisterInfo.h"
      18             : #include "ARMISelLowering.h"
      19             : #include "ARMMachineFunctionInfo.h"
      20             : #include "ARMSubtarget.h"
      21             : #include "MCTargetDesc/ARMAddressingModes.h"
      22             : #include "MCTargetDesc/ARMBaseInfo.h"
      23             : #include "Utils/ARMBaseInfo.h"
      24             : #include "llvm/ADT/ArrayRef.h"
      25             : #include "llvm/ADT/DenseMap.h"
      26             : #include "llvm/ADT/DenseSet.h"
      27             : #include "llvm/ADT/STLExtras.h"
      28             : #include "llvm/ADT/SmallPtrSet.h"
      29             : #include "llvm/ADT/SmallSet.h"
      30             : #include "llvm/ADT/SmallVector.h"
      31             : #include "llvm/ADT/Statistic.h"
      32             : #include "llvm/ADT/iterator_range.h"
      33             : #include "llvm/Analysis/AliasAnalysis.h"
      34             : #include "llvm/CodeGen/LivePhysRegs.h"
      35             : #include "llvm/CodeGen/MachineBasicBlock.h"
      36             : #include "llvm/CodeGen/MachineFunction.h"
      37             : #include "llvm/CodeGen/MachineFunctionPass.h"
      38             : #include "llvm/CodeGen/MachineInstr.h"
      39             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      40             : #include "llvm/CodeGen/MachineMemOperand.h"
      41             : #include "llvm/CodeGen/MachineOperand.h"
      42             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      43             : #include "llvm/CodeGen/RegisterClassInfo.h"
      44             : #include "llvm/CodeGen/TargetFrameLowering.h"
      45             : #include "llvm/CodeGen/TargetInstrInfo.h"
      46             : #include "llvm/CodeGen/TargetLowering.h"
      47             : #include "llvm/CodeGen/TargetRegisterInfo.h"
      48             : #include "llvm/CodeGen/TargetSubtargetInfo.h"
      49             : #include "llvm/IR/DataLayout.h"
      50             : #include "llvm/IR/DebugLoc.h"
      51             : #include "llvm/IR/DerivedTypes.h"
      52             : #include "llvm/IR/Function.h"
      53             : #include "llvm/IR/Type.h"
      54             : #include "llvm/MC/MCInstrDesc.h"
      55             : #include "llvm/Pass.h"
      56             : #include "llvm/Support/Allocator.h"
      57             : #include "llvm/Support/CommandLine.h"
      58             : #include "llvm/Support/Debug.h"
      59             : #include "llvm/Support/ErrorHandling.h"
      60             : #include "llvm/Support/raw_ostream.h"
      61             : #include <algorithm>
      62             : #include <cassert>
      63             : #include <cstddef>
      64             : #include <cstdlib>
      65             : #include <iterator>
      66             : #include <limits>
      67             : #include <utility>
      68             : 
      69             : using namespace llvm;
      70             : 
      71             : #define DEBUG_TYPE "arm-ldst-opt"
      72             : 
      73             : STATISTIC(NumLDMGened , "Number of ldm instructions generated");
      74             : STATISTIC(NumSTMGened , "Number of stm instructions generated");
      75             : STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
      76             : STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
      77             : STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
      78             : STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
      79             : STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
      80             : STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
      81             : STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
      82             : STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
      83             : STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
      84             : 
      85             : /// This switch disables formation of double/multi instructions that could
      86             : /// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP
      87             : /// disabled. This can be used to create libraries that are robust even when
      88             : /// users provoke undefined behaviour by supplying misaligned pointers.
      89             : /// \see mayCombineMisaligned()
      90             : static cl::opt<bool>
      91       99743 : AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden,
      92       99743 :     cl::init(false), cl::desc("Be more conservative in ARM load/store opt"));
      93             : 
      94             : #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
      95             : 
      96             : namespace {
      97             : 
      98             :   /// Post- register allocation pass the combine load / store instructions to
      99             :   /// form ldm / stm instructions.
     100       12265 :   struct ARMLoadStoreOpt : public MachineFunctionPass {
     101             :     static char ID;
     102             : 
     103             :     const MachineFunction *MF;
     104             :     const TargetInstrInfo *TII;
     105             :     const TargetRegisterInfo *TRI;
     106             :     const ARMSubtarget *STI;
     107             :     const TargetLowering *TL;
     108             :     ARMFunctionInfo *AFI;
     109             :     LivePhysRegs LiveRegs;
     110             :     RegisterClassInfo RegClassInfo;
     111             :     MachineBasicBlock::const_iterator LiveRegPos;
     112             :     bool LiveRegsValid;
     113             :     bool RegClassInfoValid;
     114             :     bool isThumb1, isThumb2;
     115             : 
     116        7440 :     ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
     117             : 
     118             :     bool runOnMachineFunction(MachineFunction &Fn) override;
     119             : 
     120        2469 :     MachineFunctionProperties getRequiredProperties() const override {
     121        4938 :       return MachineFunctionProperties().set(
     122        2469 :           MachineFunctionProperties::Property::NoVRegs);
     123             :     }
     124             : 
     125        2468 :     StringRef getPassName() const override { return ARM_LOAD_STORE_OPT_NAME; }
     126             : 
     127             :   private:
     128             :     /// A set of load/store MachineInstrs with same base register sorted by
     129             :     /// offset.
     130             :     struct MemOpQueueEntry {
     131             :       MachineInstr *MI;
     132             :       int Offset;        ///< Load/Store offset.
     133             :       unsigned Position; ///< Position as counted from end of basic block.
     134             : 
     135             :       MemOpQueueEntry(MachineInstr &MI, int Offset, unsigned Position)
     136       11900 :           : MI(&MI), Offset(Offset), Position(Position) {}
     137             :     };
     138             :     using MemOpQueue = SmallVector<MemOpQueueEntry, 8>;
     139             : 
     140             :     /// A set of MachineInstrs that fulfill (nearly all) conditions to get
     141             :     /// merged into a LDM/STM.
     142        9784 :     struct MergeCandidate {
     143             :       /// List of instructions ordered by load/store offset.
     144             :       SmallVector<MachineInstr*, 4> Instrs;
     145             : 
     146             :       /// Index in Instrs of the instruction being latest in the schedule.
     147             :       unsigned LatestMIIdx;
     148             : 
     149             :       /// Index in Instrs of the instruction being earliest in the schedule.
     150             :       unsigned EarliestMIIdx;
     151             : 
     152             :       /// Index into the basic block where the merged instruction will be
     153             :       /// inserted. (See MemOpQueueEntry.Position)
     154             :       unsigned InsertPos;
     155             : 
     156             :       /// Whether the instructions can be merged into a ldm/stm instruction.
     157             :       bool CanMergeToLSMulti;
     158             : 
     159             :       /// Whether the instructions can be merged into a ldrd/strd instruction.
     160             :       bool CanMergeToLSDouble;
     161             :     };
     162             :     SpecificBumpPtrAllocator<MergeCandidate> Allocator;
     163             :     SmallVector<const MergeCandidate*,4> Candidates;
     164             :     SmallVector<MachineInstr*,4> MergeBaseCandidates;
     165             : 
     166             :     void moveLiveRegsBefore(const MachineBasicBlock &MBB,
     167             :                             MachineBasicBlock::const_iterator Before);
     168             :     unsigned findFreeReg(const TargetRegisterClass &RegClass);
     169             :     void UpdateBaseRegUses(MachineBasicBlock &MBB,
     170             :                            MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
     171             :                            unsigned Base, unsigned WordOffset,
     172             :                            ARMCC::CondCodes Pred, unsigned PredReg);
     173             :     MachineInstr *CreateLoadStoreMulti(
     174             :         MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
     175             :         int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
     176             :         ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
     177             :         ArrayRef<std::pair<unsigned, bool>> Regs);
     178             :     MachineInstr *CreateLoadStoreDouble(
     179             :         MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
     180             :         int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
     181             :         ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
     182             :         ArrayRef<std::pair<unsigned, bool>> Regs) const;
     183             :     void FormCandidates(const MemOpQueue &MemOps);
     184             :     MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
     185             :     bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
     186             :                              MachineBasicBlock::iterator &MBBI);
     187             :     bool MergeBaseUpdateLoadStore(MachineInstr *MI);
     188             :     bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
     189             :     bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
     190             :     bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
     191             :     bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
     192             :     bool CombineMovBx(MachineBasicBlock &MBB);
     193             :   };
     194             : 
     195             : } // end anonymous namespace
     196             : 
     197             : char ARMLoadStoreOpt::ID = 0;
     198             : 
     199      342570 : INITIALIZE_PASS(ARMLoadStoreOpt, "arm-ldst-opt", ARM_LOAD_STORE_OPT_NAME, false,
     200             :                 false)
     201             : 
     202          85 : static bool definesCPSR(const MachineInstr &MI) {
     203        1093 :   for (const auto &MO : MI.operands()) {
     204         504 :     if (!MO.isReg())
     205         170 :       continue;
     206         334 :     if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
     207             :       // If the instruction has live CPSR def, then it's not safe to fold it
     208             :       // into load / store.
     209             :       return true;
     210             :   }
     211             : 
     212             :   return false;
     213             : }
     214             : 
     215       50547 : static int getMemoryOpOffset(const MachineInstr &MI) {
     216       50547 :   unsigned Opcode = MI.getOpcode();
     217       50547 :   bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
     218       50547 :   unsigned NumOperands = MI.getDesc().getNumOperands();
     219      101094 :   unsigned OffField = MI.getOperand(NumOperands - 3).getImm();
     220             : 
     221      101094 :   if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
     222      137835 :       Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
     223      130914 :       Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
     224       81006 :       Opcode == ARM::LDRi12   || Opcode == ARM::STRi12)
     225       39550 :     return OffField;
     226             : 
     227             :   // Thumb1 immediate offsets are scaled by 4
     228       10997 :   if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
     229        8519 :       Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
     230        3872 :     return OffField * 4;
     231             : 
     232       14250 :   int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
     233        7110 :     : ARM_AM::getAM5Offset(OffField) * 4;
     234        7125 :   ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
     235             :     : ARM_AM::getAM5Op(OffField);
     236             : 
     237             :   if (Op == ARM_AM::sub)
     238          48 :     return -Offset;
     239             : 
     240             :   return Offset;
     241             : }
     242             : 
     243             : static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
     244        9055 :   return MI.getOperand(1);
     245             : }
     246             : 
     247             : static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
     248       15667 :   return MI.getOperand(0);
     249             : }
     250             : 
     251        3479 : static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
     252        3479 :   switch (Opcode) {
     253           0 :   default: llvm_unreachable("Unhandled opcode!");
     254             :   case ARM::LDRi12:
     255             :     ++NumLDMGened;
     256         762 :     switch (Mode) {
     257           0 :     default: llvm_unreachable("Unhandled submode!");
     258             :     case ARM_AM::ia: return ARM::LDMIA;
     259           0 :     case ARM_AM::da: return ARM::LDMDA;
     260           0 :     case ARM_AM::db: return ARM::LDMDB;
     261          29 :     case ARM_AM::ib: return ARM::LDMIB;
     262             :     }
     263             :   case ARM::STRi12:
     264             :     ++NumSTMGened;
     265        1128 :     switch (Mode) {
     266           0 :     default: llvm_unreachable("Unhandled submode!");
     267             :     case ARM_AM::ia: return ARM::STMIA;
     268           0 :     case ARM_AM::da: return ARM::STMDA;
     269           0 :     case ARM_AM::db: return ARM::STMDB;
     270          24 :     case ARM_AM::ib: return ARM::STMIB;
     271             :     }
     272             :   case ARM::tLDRi:
     273             :   case ARM::tLDRspi:
     274             :     // tLDMIA is writeback-only - unless the base register is in the input
     275             :     // reglist.
     276             :     ++NumLDMGened;
     277         337 :     switch (Mode) {
     278           0 :     default: llvm_unreachable("Unhandled submode!");
     279             :     case ARM_AM::ia: return ARM::tLDMIA;
     280             :     }
     281             :   case ARM::tSTRi:
     282             :   case ARM::tSTRspi:
     283             :     // There is no non-writeback tSTMIA either.
     284             :     ++NumSTMGened;
     285         151 :     switch (Mode) {
     286           0 :     default: llvm_unreachable("Unhandled submode!");
     287             :     case ARM_AM::ia: return ARM::tSTMIA_UPD;
     288             :     }
     289             :   case ARM::t2LDRi8:
     290             :   case ARM::t2LDRi12:
     291             :     ++NumLDMGened;
     292         355 :     switch (Mode) {
     293           0 :     default: llvm_unreachable("Unhandled submode!");
     294             :     case ARM_AM::ia: return ARM::t2LDMIA;
     295           0 :     case ARM_AM::db: return ARM::t2LDMDB;
     296             :     }
     297             :   case ARM::t2STRi8:
     298             :   case ARM::t2STRi12:
     299             :     ++NumSTMGened;
     300         399 :     switch (Mode) {
     301           0 :     default: llvm_unreachable("Unhandled submode!");
     302             :     case ARM_AM::ia: return ARM::t2STMIA;
     303           0 :     case ARM_AM::db: return ARM::t2STMDB;
     304             :     }
     305             :   case ARM::VLDRS:
     306             :     ++NumVLDMGened;
     307          41 :     switch (Mode) {
     308           0 :     default: llvm_unreachable("Unhandled submode!");
     309             :     case ARM_AM::ia: return ARM::VLDMSIA;
     310           0 :     case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
     311             :     }
     312             :   case ARM::VSTRS:
     313             :     ++NumVSTMGened;
     314           0 :     switch (Mode) {
     315           0 :     default: llvm_unreachable("Unhandled submode!");
     316             :     case ARM_AM::ia: return ARM::VSTMSIA;
     317           0 :     case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
     318             :     }
     319             :   case ARM::VLDRD:
     320             :     ++NumVLDMGened;
     321         210 :     switch (Mode) {
     322           0 :     default: llvm_unreachable("Unhandled submode!");
     323             :     case ARM_AM::ia: return ARM::VLDMDIA;
     324           0 :     case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
     325             :     }
     326             :   case ARM::VSTRD:
     327             :     ++NumVSTMGened;
     328          96 :     switch (Mode) {
     329           0 :     default: llvm_unreachable("Unhandled submode!");
     330             :     case ARM_AM::ia: return ARM::VSTMDIA;
     331           0 :     case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
     332             :     }
     333             :   }
     334             : }
     335             : 
     336         258 : static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
     337         258 :   switch (Opcode) {
     338           0 :   default: llvm_unreachable("Unhandled opcode!");
     339             :   case ARM::LDMIA_RET:
     340             :   case ARM::LDMIA:
     341             :   case ARM::LDMIA_UPD:
     342             :   case ARM::STMIA:
     343             :   case ARM::STMIA_UPD:
     344             :   case ARM::tLDMIA:
     345             :   case ARM::tLDMIA_UPD:
     346             :   case ARM::tSTMIA_UPD:
     347             :   case ARM::t2LDMIA_RET:
     348             :   case ARM::t2LDMIA:
     349             :   case ARM::t2LDMIA_UPD:
     350             :   case ARM::t2STMIA:
     351             :   case ARM::t2STMIA_UPD:
     352             :   case ARM::VLDMSIA:
     353             :   case ARM::VLDMSIA_UPD:
     354             :   case ARM::VSTMSIA:
     355             :   case ARM::VSTMSIA_UPD:
     356             :   case ARM::VLDMDIA:
     357             :   case ARM::VLDMDIA_UPD:
     358             :   case ARM::VSTMDIA:
     359             :   case ARM::VSTMDIA_UPD:
     360             :     return ARM_AM::ia;
     361             : 
     362           0 :   case ARM::LDMDA:
     363             :   case ARM::LDMDA_UPD:
     364             :   case ARM::STMDA:
     365             :   case ARM::STMDA_UPD:
     366           0 :     return ARM_AM::da;
     367             : 
     368           0 :   case ARM::LDMDB:
     369             :   case ARM::LDMDB_UPD:
     370             :   case ARM::STMDB:
     371             :   case ARM::STMDB_UPD:
     372             :   case ARM::t2LDMDB:
     373             :   case ARM::t2LDMDB_UPD:
     374             :   case ARM::t2STMDB:
     375             :   case ARM::t2STMDB_UPD:
     376             :   case ARM::VLDMSDB_UPD:
     377             :   case ARM::VSTMSDB_UPD:
     378             :   case ARM::VLDMDDB_UPD:
     379             :   case ARM::VSTMDDB_UPD:
     380           0 :     return ARM_AM::db;
     381             : 
     382          51 :   case ARM::LDMIB:
     383             :   case ARM::LDMIB_UPD:
     384             :   case ARM::STMIB:
     385             :   case ARM::STMIB_UPD:
     386          51 :     return ARM_AM::ib;
     387             :   }
     388             : }
     389             : 
     390             : static bool isT1i32Load(unsigned Opc) {
     391       23098 :   return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
     392             : }
     393             : 
     394             : static bool isT2i32Load(unsigned Opc) {
     395       21026 :   return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
     396             : }
     397             : 
     398             : static bool isi32Load(unsigned Opc) {
     399       74173 :   return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
     400             : }
     401             : 
     402             : static bool isT1i32Store(unsigned Opc) {
     403        6291 :   return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
     404             : }
     405             : 
     406             : static bool isT2i32Store(unsigned Opc) {
     407        5845 :   return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
     408             : }
     409             : 
     410             : static bool isi32Store(unsigned Opc) {
     411       22809 :   return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
     412             : }
     413             : 
     414             : static bool isLoadSingle(unsigned Opc) {
     415        6418 :   return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
     416             : }
     417             : 
     418             : static unsigned getImmScale(unsigned Opc) {
     419             :   switch (Opc) {
     420           0 :   default: llvm_unreachable("Unhandled opcode!");
     421             :   case ARM::tLDRi:
     422             :   case ARM::tSTRi:
     423             :   case ARM::tLDRspi:
     424             :   case ARM::tSTRspi:
     425             :     return 1;
     426             :   case ARM::tLDRHi:
     427             :   case ARM::tSTRHi:
     428             :     return 2;
     429             :   case ARM::tLDRBi:
     430             :   case ARM::tSTRBi:
     431             :     return 4;
     432             :   }
     433             : }
     434             : 
     435       14606 : static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
     436       29212 :   switch (MI->getOpcode()) {
     437             :   default: return 0;
     438       10744 :   case ARM::LDRi12:
     439             :   case ARM::STRi12:
     440             :   case ARM::tLDRi:
     441             :   case ARM::tSTRi:
     442             :   case ARM::tLDRspi:
     443             :   case ARM::tSTRspi:
     444             :   case ARM::t2LDRi8:
     445             :   case ARM::t2LDRi12:
     446             :   case ARM::t2STRi8:
     447             :   case ARM::t2STRi12:
     448             :   case ARM::VLDRS:
     449             :   case ARM::VSTRS:
     450       10744 :     return 4;
     451        3604 :   case ARM::VLDRD:
     452             :   case ARM::VSTRD:
     453        3604 :     return 8;
     454         194 :   case ARM::LDMIA:
     455             :   case ARM::LDMDA:
     456             :   case ARM::LDMDB:
     457             :   case ARM::LDMIB:
     458             :   case ARM::STMIA:
     459             :   case ARM::STMDA:
     460             :   case ARM::STMDB:
     461             :   case ARM::STMIB:
     462             :   case ARM::tLDMIA:
     463             :   case ARM::tLDMIA_UPD:
     464             :   case ARM::tSTMIA_UPD:
     465             :   case ARM::t2LDMIA:
     466             :   case ARM::t2LDMDB:
     467             :   case ARM::t2STMIA:
     468             :   case ARM::t2STMDB:
     469             :   case ARM::VLDMSIA:
     470             :   case ARM::VSTMSIA:
     471         388 :     return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
     472          64 :   case ARM::VLDMDIA:
     473             :   case ARM::VSTMDIA:
     474         128 :     return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
     475             :   }
     476             : }
     477             : 
     478             : /// Update future uses of the base register with the offset introduced
     479             : /// due to writeback. This function only works on Thumb1.
     480           8 : void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
     481             :                                         MachineBasicBlock::iterator MBBI,
     482             :                                         const DebugLoc &DL, unsigned Base,
     483             :                                         unsigned WordOffset,
     484             :                                         ARMCC::CondCodes Pred,
     485             :                                         unsigned PredReg) {
     486             :   assert(isThumb1 && "Can only update base register uses for Thumb1!");
     487             :   // Start updating any instructions with immediate offsets. Insert a SUB before
     488             :   // the first non-updateable instruction (if any).
     489          23 :   for (; MBBI != MBB.end(); ++MBBI) {
     490             :     bool InsertSub = false;
     491          23 :     unsigned Opc = MBBI->getOpcode();
     492             : 
     493          23 :     if (MBBI->readsRegister(Base)) {
     494             :       int Offset;
     495             :       bool IsLoad =
     496          12 :         Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
     497             :       bool IsStore =
     498          12 :         Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
     499             : 
     500          12 :       if (IsLoad || IsStore) {
     501             :         // Loads and stores with immediate offsets can be updated, but only if
     502             :         // the new offset isn't negative.
     503             :         // The MachineOperand containing the offset immediate is the last one
     504             :         // before predicates.
     505             :         MachineOperand &MO =
     506          22 :           MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
     507             :         // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
     508          22 :         Offset = MO.getImm() - WordOffset * getImmScale(Opc);
     509             : 
     510             :         // If storing the base register, it needs to be reset first.
     511          11 :         unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
     512             : 
     513          11 :         if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
     514          11 :           MO.setImm(Offset);
     515             :         else
     516             :           InsertSub = true;
     517           1 :       } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
     518           0 :                  !definesCPSR(*MBBI)) {
     519             :         // SUBS/ADDS using this register, with a dead def of the CPSR.
     520             :         // Merge it with the update; if the merged offset is too large,
     521             :         // insert a new sub instead.
     522             :         MachineOperand &MO =
     523           0 :           MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
     524           0 :         Offset = (Opc == ARM::tSUBi8) ?
     525           0 :           MO.getImm() + WordOffset * 4 :
     526           0 :           MO.getImm() - WordOffset * 4 ;
     527           0 :         if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
     528             :           // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
     529             :           // Offset == 0.
     530             :           MO.setImm(Offset);
     531             :           // The base register has now been reset, so exit early.
     532             :           return;
     533             :         } else {
     534             :           InsertSub = true;
     535             :         }
     536             :       } else {
     537             :         // Can't update the instruction.
     538             :         InsertSub = true;
     539             :       }
     540          30 :     } else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) {
     541             :       // Since SUBS sets the condition flags, we can't place the base reset
     542             :       // after an instruction that has a live CPSR def.
     543             :       // The base register might also contain an argument for a function call.
     544             :       InsertSub = true;
     545             :     }
     546             : 
     547             :     if (InsertSub) {
     548             :       // An instruction above couldn't be updated, so insert a sub.
     549          12 :       BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
     550           4 :           .add(t1CondCodeOp(true))
     551           4 :           .addReg(Base)
     552           4 :           .addImm(WordOffset * 4)
     553           4 :           .addImm(Pred)
     554           4 :           .addReg(PredReg);
     555             :       return;
     556             :     }
     557             : 
     558          34 :     if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
     559             :       // Register got killed. Stop updating.
     560             :       return;
     561             :   }
     562             : 
     563             :   // End of block was reached.
     564           0 :   if (MBB.succ_size() > 0) {
     565             :     // FIXME: Because of a bug, live registers are sometimes missing from
     566             :     // the successor blocks' live-in sets. This means we can't trust that
     567             :     // information and *always* have to reset at the end of a block.
     568             :     // See PR21029.
     569             :     if (MBBI != MBB.end()) --MBBI;
     570           0 :     BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
     571           0 :         .add(t1CondCodeOp(true))
     572           0 :         .addReg(Base)
     573           0 :         .addImm(WordOffset * 4)
     574           0 :         .addImm(Pred)
     575           0 :         .addReg(PredReg);
     576             :   }
     577             : }
     578             : 
     579             : /// Return the first register of class \p RegClass that is not in \p Regs.
     580          62 : unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
     581          62 :   if (!RegClassInfoValid) {
     582          60 :     RegClassInfo.runOnMachineFunction(*MF);
     583          60 :     RegClassInfoValid = true;
     584             :   }
     585             : 
     586         628 :   for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
     587         307 :     if (!LiveRegs.contains(Reg))
     588             :       return Reg;
     589             :   return 0;
     590             : }
     591             : 
     592             : /// Compute live registers just before instruction \p Before (in normal schedule
     593             : /// direction). Computes backwards so multiple queries in the same block must
     594             : /// come in reverse order.
     595          62 : void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
     596             :     MachineBasicBlock::const_iterator Before) {
     597             :   // Initialize if we never queried in this block.
     598          62 :   if (!LiveRegsValid) {
     599          60 :     LiveRegs.init(*TRI);
     600          60 :     LiveRegs.addLiveOuts(MBB);
     601          60 :     LiveRegPos = MBB.end();
     602          60 :     LiveRegsValid = true;
     603             :   }
     604             :   // Move backward just before the "Before" position.
     605        1672 :   while (LiveRegPos != Before) {
     606             :     --LiveRegPos;
     607        1610 :     LiveRegs.stepBackward(*LiveRegPos);
     608             :   }
     609          62 : }
     610             : 
     611             : static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
     612             :                         unsigned Reg) {
     613        1481 :   for (const std::pair<unsigned, bool> &R : Regs)
     614         656 :     if (R.first == Reg)
     615             :       return true;
     616             :   return false;
     617             : }
     618             : 
     619             : /// Create and insert a LDM or STM with Base as base register and registers in
     620             : /// Regs as the register operands that would be loaded / stored.  It returns
     621             : /// true if the transformation is done.
     622         827 : MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
     623             :     MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
     624             :     int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
     625             :     ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
     626             :     ArrayRef<std::pair<unsigned, bool>> Regs) {
     627         827 :   unsigned NumRegs = Regs.size();
     628             :   assert(NumRegs > 1);
     629             : 
     630             :   // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
     631             :   // Compute liveness information for that register to make the decision.
     632        1009 :   bool SafeToClobberCPSR = !isThumb1 ||
     633        1009 :     (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
     634             :      MachineBasicBlock::LQR_Dead);
     635             : 
     636         827 :   bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
     637             : 
     638             :   // Exception: If the base register is in the input reglist, Thumb1 LDM is
     639             :   // non-writeback.
     640             :   // It's also not possible to merge an STR of the base register in Thumb1.
     641        1009 :   if (isThumb1 && ContainsReg(Regs, Base)) {
     642             :     assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
     643           6 :     if (Opcode == ARM::tLDRi)
     644             :       Writeback = false;
     645           2 :     else if (Opcode == ARM::tSTRi)
     646             :       return nullptr;
     647             :   }
     648             : 
     649             :   ARM_AM::AMSubMode Mode = ARM_AM::ia;
     650             :   // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
     651             :   bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
     652         742 :   bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
     653             : 
     654         825 :   if (Offset == 4 && haveIBAndDA) {
     655             :     Mode = ARM_AM::ib;
     656         772 :   } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
     657             :     Mode = ARM_AM::da;
     658         772 :   } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
     659             :     // VLDM/VSTM do not support DB mode without also updating the base reg.
     660             :     Mode = ARM_AM::db;
     661         772 :   } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
     662             :     // Check if this is a supported opcode before inserting instructions to
     663             :     // calculate a new base register.
     664         588 :     if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
     665             : 
     666             :     // If starting offset isn't zero, insert a MI to materialize a new base.
     667             :     // But only do so if it is cost effective, i.e. merging more than two
     668             :     // loads / stores.
     669         588 :     if (NumRegs <= 2)
     670             :       return nullptr;
     671             : 
     672             :     // On Thumb1, it's not worth materializing a new base register without
     673             :     // clobbering the CPSR (i.e. not using ADDS/SUBS).
     674         438 :     if (!SafeToClobberCPSR)
     675             :       return nullptr;
     676             : 
     677             :     unsigned NewBase;
     678             :     if (isi32Load(Opcode)) {
     679             :       // If it is a load, then just use one of the destination registers
     680             :       // as the new base. Will no longer be writeback in Thumb1.
     681         746 :       NewBase = Regs[NumRegs-1].first;
     682             :       Writeback = false;
     683             :     } else {
     684             :       // Find a free register that we can use as scratch register.
     685          62 :       moveLiveRegsBefore(MBB, InsertBefore);
     686             :       // The merged instruction does not exist yet but will use several Regs if
     687             :       // it is a Store.
     688             :       if (!isLoadSingle(Opcode))
     689         474 :         for (const std::pair<unsigned, bool> &R : Regs)
     690         209 :           LiveRegs.addReg(R.first);
     691             : 
     692          62 :       NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
     693          62 :       if (NewBase == 0)
     694             :         return nullptr;
     695             :     }
     696             : 
     697             :     int BaseOpc =
     698        1164 :       isThumb2 ? ARM::t2ADDri :
     699         482 :       (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
     700           9 :       (isThumb1 && Offset < 8) ? ARM::tADDi3 :
     701             :       isThumb1 ? ARM::tADDi8  : ARM::ADDri;
     702             : 
     703         428 :     if (Offset < 0) {
     704           0 :       Offset = - Offset;
     705             :       BaseOpc =
     706           0 :         isThumb2 ? ARM::t2SUBri :
     707           0 :         (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
     708             :         isThumb1 ? ARM::tSUBi8  : ARM::SUBri;
     709             :     }
     710             : 
     711         428 :     if (!TL->isLegalAddImmediate(Offset))
     712             :       // FIXME: Try add with register operand?
     713             :       return nullptr; // Probably not worth it then.
     714             : 
     715             :     // We can only append a kill flag to the add/sub input if the value is not
     716             :     // used in the register list of the stm as well.
     717         365 :     bool KillOldBase = BaseKill &&
     718           3 :       (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
     719             : 
     720         365 :     if (isThumb1) {
     721             :       // Thumb1: depending on immediate size, use either
     722             :       //   ADDS NewBase, Base, #imm3
     723             :       // or
     724             :       //   MOV  NewBase, Base
     725             :       //   ADDS NewBase, #imm8.
     726         167 :       if (Base != NewBase &&
     727          82 :           (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
     728             :         // Need to insert a MOV to the new base first.
     729           4 :         if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
     730           4 :             !STI->hasV6Ops()) {
     731             :           // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
     732           2 :           if (Pred != ARMCC::AL)
     733             :             return nullptr;
     734           6 :           BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
     735           2 :             .addReg(Base, getKillRegState(KillOldBase));
     736             :         } else
     737           6 :           BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
     738           2 :               .addReg(Base, getKillRegState(KillOldBase))
     739           2 :               .add(predOps(Pred, PredReg));
     740             : 
     741             :         // The following ADDS/SUBS becomes an update.
     742             :         Base = NewBase;
     743             :         KillOldBase = true;
     744             :       }
     745          85 :       if (BaseOpc == ARM::tADDrSPi) {
     746             :         assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
     747         228 :         BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
     748          76 :             .addReg(Base, getKillRegState(KillOldBase))
     749          76 :             .addImm(Offset / 4)
     750          76 :             .add(predOps(Pred, PredReg));
     751             :       } else
     752          27 :         BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
     753           9 :             .add(t1CondCodeOp(true))
     754           9 :             .addReg(Base, getKillRegState(KillOldBase))
     755             :             .addImm(Offset)
     756           9 :             .add(predOps(Pred, PredReg));
     757             :     } else {
     758         840 :       BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
     759         280 :           .addReg(Base, getKillRegState(KillOldBase))
     760             :           .addImm(Offset)
     761         280 :           .add(predOps(Pred, PredReg))
     762         280 :           .add(condCodeOp());
     763             :     }
     764             :     Base = NewBase;
     765             :     BaseKill = true; // New base is always killed straight away.
     766             :   }
     767             : 
     768             :   bool isDef = isLoadSingle(Opcode);
     769             : 
     770             :   // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
     771             :   // base register writeback.
     772         602 :   Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
     773         602 :   if (!Opcode)
     774             :     return nullptr;
     775             : 
     776             :   // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
     777             :   // - There is no writeback (LDM of base register),
     778             :   // - the base register is killed by the merged instruction,
     779             :   // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
     780             :   //   to reset the base register.
     781             :   // Otherwise, don't merge.
     782             :   // It's safe to return here since the code to materialize a new base register
     783             :   // above is also conditional on SafeToClobberCPSR.
     784         602 :   if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
     785             :     return nullptr;
     786             : 
     787         602 :   MachineInstrBuilder MIB;
     788             : 
     789         602 :   if (Writeback) {
     790             :     assert(isThumb1 && "expected Writeback only inThumb1");
     791          28 :     if (Opcode == ARM::tLDMIA) {
     792             :       assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
     793             :       // Update tLDMIA with writeback if necessary.
     794             :       Opcode = ARM::tLDMIA_UPD;
     795             :     }
     796             : 
     797          56 :     MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
     798             : 
     799             :     // Thumb1: we might need to set base writeback when building the MI.
     800          28 :     MIB.addReg(Base, getDefRegState(true))
     801          28 :        .addReg(Base, getKillRegState(BaseKill));
     802             : 
     803             :     // The base isn't dead after a merged instruction with writeback.
     804             :     // Insert a sub instruction after the newly formed instruction to reset.
     805          28 :     if (!BaseKill)
     806           8 :       UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
     807             :   } else {
     808             :     // No writeback, simply build the MachineInstr.
     809        1148 :     MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
     810         574 :     MIB.addReg(Base, getKillRegState(BaseKill));
     811             :   }
     812             : 
     813        1204 :   MIB.addImm(Pred).addReg(PredReg);
     814             : 
     815        5040 :   for (const std::pair<unsigned, bool> &R : Regs)
     816        4438 :     MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
     817             : 
     818         602 :   return MIB.getInstr();
     819             : }
     820             : 
     821         138 : MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
     822             :     MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
     823             :     int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
     824             :     ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
     825             :     ArrayRef<std::pair<unsigned, bool>> Regs) const {
     826             :   bool IsLoad = isi32Load(Opcode);
     827             :   assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
     828             :   unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
     829             : 
     830             :   assert(Regs.size() == 2);
     831             :   MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
     832         276 :                                     TII->get(LoadStoreOpcode));
     833         138 :   if (IsLoad) {
     834          51 :     MIB.addReg(Regs[0].first, RegState::Define)
     835          51 :        .addReg(Regs[1].first, RegState::Define);
     836             :   } else {
     837         174 :     MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
     838         174 :        .addReg(Regs[1].first, getKillRegState(Regs[1].second));
     839             :   }
     840         414 :   MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
     841         138 :   return MIB.getInstr();
     842             : }
     843             : 
     844             : /// Call MergeOps and update MemOps and merges accordingly on success.
     845         965 : MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
     846         965 :   const MachineInstr *First = Cand.Instrs.front();
     847         965 :   unsigned Opcode = First->getOpcode();
     848             :   bool IsLoad = isLoadSingle(Opcode);
     849             :   SmallVector<std::pair<unsigned, bool>, 8> Regs;
     850             :   SmallVector<unsigned, 4> ImpDefs;
     851             :   DenseSet<unsigned> KilledRegs;
     852             :   DenseSet<unsigned> UsedRegs;
     853             :   // Determine list of registers and list of implicit super-register defs.
     854        7127 :   for (const MachineInstr *MI : Cand.Instrs) {
     855             :     const MachineOperand &MO = getLoadStoreRegOp(*MI);
     856        3081 :     unsigned Reg = MO.getReg();
     857             :     bool IsKill = MO.isKill();
     858        3081 :     if (IsKill)
     859             :       KilledRegs.insert(Reg);
     860        6162 :     Regs.push_back(std::make_pair(Reg, IsKill));
     861             :     UsedRegs.insert(Reg);
     862             : 
     863        3081 :     if (IsLoad) {
     864             :       // Collect any implicit defs of super-registers, after merging we can't
     865             :       // be sure anymore that we properly preserved these live ranges and must
     866             :       // removed these implicit operands.
     867        2367 :       for (const MachineOperand &MO : MI->implicit_operands()) {
     868         289 :         if (!MO.isReg() || !MO.isDef() || MO.isDead())
     869         123 :           continue;
     870             :         assert(MO.isImplicit());
     871          69 :         unsigned DefReg = MO.getReg();
     872             : 
     873          69 :         if (is_contained(ImpDefs, DefReg))
     874          13 :           continue;
     875             :         // We can ignore cases where the super-reg is read and written.
     876          56 :         if (MI->readsRegister(DefReg))
     877          28 :           continue;
     878          28 :         ImpDefs.push_back(DefReg);
     879             :       }
     880             :     }
     881             :   }
     882             : 
     883             :   // Attempt the merge.
     884             :   using iterator = MachineBasicBlock::iterator;
     885             : 
     886        1930 :   MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
     887         965 :   iterator InsertBefore = std::next(iterator(LatestMI));
     888         965 :   MachineBasicBlock &MBB = *LatestMI->getParent();
     889         965 :   unsigned Offset = getMemoryOpOffset(*First);
     890         965 :   unsigned Base = getLoadStoreBaseOp(*First).getReg();
     891             :   bool BaseKill = LatestMI->killsRegister(Base);
     892         965 :   unsigned PredReg = 0;
     893         965 :   ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg);
     894             :   DebugLoc DL = First->getDebugLoc();
     895             :   MachineInstr *Merged = nullptr;
     896         965 :   if (Cand.CanMergeToLSDouble)
     897         138 :     Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
     898             :                                    Opcode, Pred, PredReg, DL, Regs);
     899         138 :   if (!Merged && Cand.CanMergeToLSMulti)
     900         827 :     Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
     901             :                                   Opcode, Pred, PredReg, DL, Regs);
     902         965 :   if (!Merged)
     903             :     return nullptr;
     904             : 
     905             :   // Determine earliest instruction that will get removed. We then keep an
     906             :   // iterator just above it so the following erases don't invalidated it.
     907        1480 :   iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
     908             :   bool EarliestAtBegin = false;
     909         740 :   if (EarliestI == MBB.begin()) {
     910             :     EarliestAtBegin = true;
     911             :   } else {
     912             :     EarliestI = std::prev(EarliestI);
     913             :   }
     914             : 
     915             :   // Remove instructions which have been merged.
     916        5730 :   for (MachineInstr *MI : Cand.Instrs)
     917             :     MBB.erase(MI);
     918             : 
     919             :   // Determine range between the earliest removed instruction and the new one.
     920         740 :   if (EarliestAtBegin)
     921             :     EarliestI = MBB.begin();
     922             :   else
     923             :     EarliestI = std::next(EarliestI);
     924             :   auto FixupRange = make_range(EarliestI, iterator(Merged));
     925             : 
     926             :   if (isLoadSingle(Opcode)) {
     927             :     // If the previous loads defined a super-reg, then we have to mark earlier
     928             :     // operands undef; Replicate the super-reg def on the merged instruction.
     929        1296 :     for (MachineInstr &MI : FixupRange) {
     930         340 :       for (unsigned &ImpDefReg : ImpDefs) {
     931          13 :         for (MachineOperand &MO : MI.implicit_operands()) {
     932           2 :           if (!MO.isReg() || MO.getReg() != ImpDefReg)
     933           0 :             continue;
     934             :           if (MO.readsReg())
     935             :             MO.setIsUndef();
     936           1 :           else if (MO.isDef())
     937           1 :             ImpDefReg = 0;
     938             :         }
     939             :       }
     940             :     }
     941             : 
     942         487 :     MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
     943         541 :     for (unsigned ImpDef : ImpDefs)
     944          27 :       MIB.addReg(ImpDef, RegState::ImplicitDefine);
     945             :   } else {
     946             :     // Remove kill flags: We are possibly storing the values later now.
     947             :     assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
     948         565 :     for (MachineInstr &MI : FixupRange) {
     949         615 :       for (MachineOperand &MO : MI.uses()) {
     950         716 :         if (!MO.isReg() || !MO.isKill())
     951         276 :           continue;
     952           4 :         if (UsedRegs.count(MO.getReg()))
     953             :           MO.setIsKill(false);
     954             :       }
     955             :     }
     956             :     assert(ImpDefs.empty());
     957             :   }
     958             : 
     959             :   return Merged;
     960             : }
     961             : 
     962             : static bool isValidLSDoubleOffset(int Offset) {
     963        1932 :   unsigned Value = abs(Offset);
     964             :   // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
     965             :   // multiplied by 4.
     966        1932 :   return (Value % 4) == 0 && Value < 1024;
     967             : }
     968             : 
     969             : /// Return true for loads/stores that can be combined to a double/multi
     970             : /// operation without increasing the requirements for alignment.
     971          46 : static bool mayCombineMisaligned(const TargetSubtargetInfo &STI,
     972             :                                  const MachineInstr &MI) {
     973             :   // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no
     974             :   // difference.
     975          46 :   unsigned Opcode = MI.getOpcode();
     976             :   if (!isi32Load(Opcode) && !isi32Store(Opcode))
     977             :     return true;
     978             : 
     979             :   // Stack pointer alignment is out of the programmers control so we can trust
     980             :   // SP-relative loads/stores.
     981          47 :   if (getLoadStoreBaseOp(MI).getReg() == ARM::SP &&
     982           4 :       STI.getFrameLowering()->getTransientStackAlignment() >= 4)
     983             :     return true;
     984             :   return false;
     985             : }
     986             : 
     987             : /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
     988        8972 : void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
     989        8972 :   const MachineInstr *FirstMI = MemOps[0].MI;
     990        8972 :   unsigned Opcode = FirstMI->getOpcode();
     991             :   bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
     992        8972 :   unsigned Size = getLSMultipleTransferSize(FirstMI);
     993             : 
     994             :   unsigned SIndex = 0;
     995        8972 :   unsigned EIndex = MemOps.size();
     996             :   do {
     997             :     // Look at the first instruction.
     998       19568 :     const MachineInstr *MI = MemOps[SIndex].MI;
     999        9784 :     int Offset = MemOps[SIndex].Offset;
    1000             :     const MachineOperand &PMO = getLoadStoreRegOp(*MI);
    1001        9784 :     unsigned PReg = PMO.getReg();
    1002       19568 :     unsigned PRegNum = PMO.isUndef() ? std::numeric_limits<unsigned>::max()
    1003        9784 :                                      : TRI->getEncodingValue(PReg);
    1004             :     unsigned Latest = SIndex;
    1005             :     unsigned Earliest = SIndex;
    1006             :     unsigned Count = 1;
    1007             :     bool CanMergeToLSDouble =
    1008       11999 :       STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
    1009             :     // ARM errata 602117: LDRD with base in list may result in incorrect base
    1010             :     // register when interrupted or faulted.
    1011        9821 :     if (STI->isCortexM3() && isi32Load(Opcode) &&
    1012          37 :         PReg == getLoadStoreBaseOp(*MI).getReg())
    1013             :       CanMergeToLSDouble = false;
    1014             : 
    1015             :     bool CanMergeToLSMulti = true;
    1016             :     // On swift vldm/vstm starting with an odd register number as that needs
    1017             :     // more uops than single vldrs.
    1018        9784 :     if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
    1019             :       CanMergeToLSMulti = false;
    1020             : 
    1021             :     // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
    1022             :     // deprecated; LDM to PC is fine but cannot happen here.
    1023        9784 :     if (PReg == ARM::SP || PReg == ARM::PC)
    1024             :       CanMergeToLSMulti = CanMergeToLSDouble = false;
    1025             : 
    1026             :     // Should we be conservative?
    1027        9784 :     if (AssumeMisalignedLoadStores && !mayCombineMisaligned(*STI, *MI))
    1028             :       CanMergeToLSMulti = CanMergeToLSDouble = false;
    1029             : 
    1030             :     // Merge following instructions where possible.
    1031       11900 :     for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
    1032        5856 :       int NewOffset = MemOps[I].Offset;
    1033        2928 :       if (NewOffset != Offset + (int)Size)
    1034             :         break;
    1035        2802 :       const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
    1036        2802 :       unsigned Reg = MO.getReg();
    1037        2802 :       if (Reg == ARM::SP || Reg == ARM::PC)
    1038             :         break;
    1039             : 
    1040             :       // See if the current load/store may be part of a multi load/store.
    1041        5604 :       unsigned RegNum = MO.isUndef() ? std::numeric_limits<unsigned>::max()
    1042        2802 :                                      : TRI->getEncodingValue(Reg);
    1043             :       bool PartOfLSMulti = CanMergeToLSMulti;
    1044        2802 :       if (PartOfLSMulti) {
    1045             :         // Register numbers must be in ascending order.
    1046        2749 :         if (RegNum <= PRegNum)
    1047             :           PartOfLSMulti = false;
    1048             :         // For VFP / NEON load/store multiples, the registers must be
    1049             :         // consecutive and within the limit on the number of registers per
    1050             :         // instruction.
    1051        2088 :         else if (!isNotVFP && RegNum != PRegNum+1)
    1052             :           PartOfLSMulti = false;
    1053             :       }
    1054             :       // See if the current load/store may be part of a double load/store.
    1055        2802 :       bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
    1056             : 
    1057        2802 :       if (!PartOfLSMulti && !PartOfLSDouble)
    1058             :         break;
    1059             :       CanMergeToLSMulti &= PartOfLSMulti;
    1060             :       CanMergeToLSDouble &= PartOfLSDouble;
    1061             :       // Track MemOp with latest and earliest position (Positions are
    1062             :       // counted in reverse).
    1063        2116 :       unsigned Position = MemOps[I].Position;
    1064        4232 :       if (Position < MemOps[Latest].Position)
    1065             :         Latest = I;
    1066        1304 :       else if (Position > MemOps[Earliest].Position)
    1067             :         Earliest = I;
    1068             :       // Prepare for next MemOp.
    1069        2116 :       Offset += Size;
    1070             :       PRegNum = RegNum;
    1071             :     }
    1072             : 
    1073             :     // Form a candidate from the Ops collected so far.
    1074             :     MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
    1075       21684 :     for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
    1076       23800 :       Candidate->Instrs.push_back(MemOps[C].MI);
    1077        9784 :     Candidate->LatestMIIdx = Latest - SIndex;
    1078        9784 :     Candidate->EarliestMIIdx = Earliest - SIndex;
    1079       19568 :     Candidate->InsertPos = MemOps[Latest].Position;
    1080        9784 :     if (Count == 1)
    1081             :       CanMergeToLSMulti = CanMergeToLSDouble = false;
    1082        9784 :     Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
    1083        9784 :     Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
    1084        9784 :     Candidates.push_back(Candidate);
    1085             :     // Continue after the chain.
    1086             :     SIndex += Count;
    1087        9784 :   } while (SIndex < EIndex);
    1088        8972 : }
    1089             : 
    1090          12 : static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
    1091             :                                             ARM_AM::AMSubMode Mode) {
    1092          12 :   switch (Opc) {
    1093           0 :   default: llvm_unreachable("Unhandled opcode!");
    1094           2 :   case ARM::LDMIA:
    1095             :   case ARM::LDMDA:
    1096             :   case ARM::LDMDB:
    1097             :   case ARM::LDMIB:
    1098           2 :     switch (Mode) {
    1099           0 :     default: llvm_unreachable("Unhandled submode!");
    1100             :     case ARM_AM::ia: return ARM::LDMIA_UPD;
    1101           0 :     case ARM_AM::ib: return ARM::LDMIB_UPD;
    1102           0 :     case ARM_AM::da: return ARM::LDMDA_UPD;
    1103           0 :     case ARM_AM::db: return ARM::LDMDB_UPD;
    1104             :     }
    1105           1 :   case ARM::STMIA:
    1106             :   case ARM::STMDA:
    1107             :   case ARM::STMDB:
    1108             :   case ARM::STMIB:
    1109           1 :     switch (Mode) {
    1110           0 :     default: llvm_unreachable("Unhandled submode!");
    1111             :     case ARM_AM::ia: return ARM::STMIA_UPD;
    1112           0 :     case ARM_AM::ib: return ARM::STMIB_UPD;
    1113           0 :     case ARM_AM::da: return ARM::STMDA_UPD;
    1114           0 :     case ARM_AM::db: return ARM::STMDB_UPD;
    1115             :     }
    1116           2 :   case ARM::t2LDMIA:
    1117             :   case ARM::t2LDMDB:
    1118           2 :     switch (Mode) {
    1119           0 :     default: llvm_unreachable("Unhandled submode!");
    1120             :     case ARM_AM::ia: return ARM::t2LDMIA_UPD;
    1121           0 :     case ARM_AM::db: return ARM::t2LDMDB_UPD;
    1122             :     }
    1123           4 :   case ARM::t2STMIA:
    1124             :   case ARM::t2STMDB:
    1125           4 :     switch (Mode) {
    1126           0 :     default: llvm_unreachable("Unhandled submode!");
    1127             :     case ARM_AM::ia: return ARM::t2STMIA_UPD;
    1128           0 :     case ARM_AM::db: return ARM::t2STMDB_UPD;
    1129             :     }
    1130           0 :   case ARM::VLDMSIA:
    1131           0 :     switch (Mode) {
    1132           0 :     default: llvm_unreachable("Unhandled submode!");
    1133             :     case ARM_AM::ia: return ARM::VLDMSIA_UPD;
    1134           0 :     case ARM_AM::db: return ARM::VLDMSDB_UPD;
    1135             :     }
    1136           0 :   case ARM::VLDMDIA:
    1137           0 :     switch (Mode) {
    1138           0 :     default: llvm_unreachable("Unhandled submode!");
    1139             :     case ARM_AM::ia: return ARM::VLDMDIA_UPD;
    1140           0 :     case ARM_AM::db: return ARM::VLDMDDB_UPD;
    1141             :     }
    1142           0 :   case ARM::VSTMSIA:
    1143           0 :     switch (Mode) {
    1144           0 :     default: llvm_unreachable("Unhandled submode!");
    1145             :     case ARM_AM::ia: return ARM::VSTMSIA_UPD;
    1146           0 :     case ARM_AM::db: return ARM::VSTMSDB_UPD;
    1147             :     }
    1148           3 :   case ARM::VSTMDIA:
    1149           3 :     switch (Mode) {
    1150           0 :     default: llvm_unreachable("Unhandled submode!");
    1151             :     case ARM_AM::ia: return ARM::VSTMDIA_UPD;
    1152           0 :     case ARM_AM::db: return ARM::VSTMDDB_UPD;
    1153             :     }
    1154             :   }
    1155             : }
    1156             : 
    1157             : /// Check if the given instruction increments or decrements a register and
    1158             : /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
    1159             : /// generated by the instruction are possibly read as well.
    1160        5887 : static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
    1161             :                                   ARMCC::CondCodes Pred, unsigned PredReg) {
    1162             :   bool CheckCPSRDef;
    1163             :   int Scale;
    1164       11774 :   switch (MI.getOpcode()) {
    1165             :   case ARM::tADDi8:  Scale =  4; CheckCPSRDef = true; break;
    1166           0 :   case ARM::tSUBi8:  Scale = -4; CheckCPSRDef = true; break;
    1167          38 :   case ARM::t2SUBri:
    1168          38 :   case ARM::SUBri:   Scale = -1; CheckCPSRDef = true; break;
    1169         270 :   case ARM::t2ADDri:
    1170         270 :   case ARM::ADDri:   Scale =  1; CheckCPSRDef = true; break;
    1171          11 :   case ARM::tADDspi: Scale =  4; CheckCPSRDef = false; break;
    1172          14 :   case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
    1173             :   default: return 0;
    1174             :   }
    1175             : 
    1176             :   unsigned MIPredReg;
    1177         497 :   if (MI.getOperand(0).getReg() != Reg ||
    1178         277 :       MI.getOperand(1).getReg() != Reg ||
    1179         539 :       getInstrPredicate(MI, MIPredReg) != Pred ||
    1180          93 :       MIPredReg != PredReg)
    1181             :     return 0;
    1182             : 
    1183          93 :   if (CheckCPSRDef && definesCPSR(MI))
    1184             :     return 0;
    1185          93 :   return MI.getOperand(2).getImm() * Scale;
    1186             : }
    1187             : 
    1188             : /// Searches for an increment or decrement of \p Reg before \p MBBI.
    1189             : static MachineBasicBlock::iterator
    1190        3494 : findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
    1191             :                  ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
    1192        3494 :   Offset = 0;
    1193        3494 :   MachineBasicBlock &MBB = *MBBI->getParent();
    1194             :   MachineBasicBlock::iterator BeginMBBI = MBB.begin();
    1195        3494 :   MachineBasicBlock::iterator EndMBBI = MBB.end();
    1196        3494 :   if (MBBI == BeginMBBI)
    1197        1051 :     return EndMBBI;
    1198             : 
    1199             :   // Skip debug values.
    1200        2443 :   MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
    1201           5 :   while (PrevMBBI->isDebugInstr() && PrevMBBI != BeginMBBI)
    1202             :     --PrevMBBI;
    1203             : 
    1204        2443 :   Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
    1205        4886 :   return Offset == 0 ? EndMBBI : PrevMBBI;
    1206             : }
    1207             : 
    1208             : /// Searches for a increment or decrement of \p Reg after \p MBBI.
    1209             : static MachineBasicBlock::iterator
    1210        3483 : findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
    1211             :                 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
    1212        3483 :   Offset = 0;
    1213        3483 :   MachineBasicBlock &MBB = *MBBI->getParent();
    1214        3483 :   MachineBasicBlock::iterator EndMBBI = MBB.end();
    1215        3483 :   MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
    1216             :   // Skip debug values.
    1217        3487 :   while (NextMBBI != EndMBBI && NextMBBI->isDebugInstr())
    1218             :     ++NextMBBI;
    1219        3483 :   if (NextMBBI == EndMBBI)
    1220          39 :     return EndMBBI;
    1221             : 
    1222        3444 :   Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
    1223        3444 :   return Offset == 0 ? EndMBBI : NextMBBI;
    1224             : }
    1225             : 
    1226             : /// Fold proceeding/trailing inc/dec of base register into the
    1227             : /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
    1228             : ///
    1229             : /// stmia rn, <ra, rb, rc>
    1230             : /// rn := rn + 4 * 3;
    1231             : /// =>
    1232             : /// stmia rn!, <ra, rb, rc>
    1233             : ///
    1234             : /// rn := rn - 4 * 3;
    1235             : /// ldmia rn, <ra, rb, rc>
    1236             : /// =>
    1237             : /// ldmdb rn!, <ra, rb, rc>
    1238         602 : bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
    1239             :   // Thumb1 is already using updating loads/stores.
    1240         602 :   if (isThumb1) return false;
    1241             : 
    1242         501 :   const MachineOperand &BaseOP = MI->getOperand(0);
    1243         501 :   unsigned Base = BaseOP.getReg();
    1244             :   bool BaseKill = BaseOP.isKill();
    1245         501 :   unsigned PredReg = 0;
    1246         501 :   ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
    1247         501 :   unsigned Opcode = MI->getOpcode();
    1248             :   DebugLoc DL = MI->getDebugLoc();
    1249             : 
    1250             :   // Can't use an updating ld/st if the base register is also a dest
    1251             :   // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
    1252        2638 :   for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
    1253        4760 :     if (MI->getOperand(i).getReg() == Base)
    1254             :       return false;
    1255             : 
    1256         258 :   int Bytes = getLSMultipleTransferSize(MI);
    1257         258 :   MachineBasicBlock &MBB = *MI->getParent();
    1258             :   MachineBasicBlock::iterator MBBI(MI);
    1259             :   int Offset;
    1260             :   MachineBasicBlock::iterator MergeInstr
    1261         258 :     = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
    1262         258 :   ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
    1263         258 :   if (Mode == ARM_AM::ia && Offset == -Bytes) {
    1264             :     Mode = ARM_AM::db;
    1265         258 :   } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
    1266             :     Mode = ARM_AM::da;
    1267             :   } else {
    1268         258 :     MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
    1269         511 :     if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
    1270         253 :         ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) {
    1271             : 
    1272             :       // We couldn't find an inc/dec to merge. But if the base is dead, we
    1273             :       // can still change to a writeback form as that will save us 2 bytes
    1274             :       // of code size. It can create WAW hazards though, so only do it if
    1275             :       // we're minimizing code size.
    1276         506 :       if (!MBB.getParent()->getFunction().optForMinSize() || !BaseKill)
    1277             :         return false;
    1278             :       
    1279             :       bool HighRegsUsed = false;
    1280          35 :       for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
    1281          56 :         if (MI->getOperand(i).getReg() >= ARM::R8) {
    1282             :           HighRegsUsed = true;
    1283             :           break;
    1284             :         }
    1285             : 
    1286           7 :       if (!HighRegsUsed)
    1287             :         MergeInstr = MBB.end();
    1288             :       else
    1289             :         return false;
    1290             :     }
    1291             :   }
    1292          12 :   if (MergeInstr != MBB.end())
    1293           5 :     MBB.erase(MergeInstr);
    1294             : 
    1295          12 :   unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
    1296          36 :   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
    1297          12 :     .addReg(Base, getDefRegState(true)) // WB base register
    1298          12 :     .addReg(Base, getKillRegState(BaseKill))
    1299          24 :     .addImm(Pred).addReg(PredReg);
    1300             : 
    1301             :   // Transfer the rest of operands.
    1302          46 :   for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
    1303          34 :     MIB.add(MI->getOperand(OpNum));
    1304             : 
    1305             :   // Transfer memoperands.
    1306          12 :   MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
    1307             : 
    1308          12 :   MBB.erase(MBBI);
    1309             :   return true;
    1310             : }
    1311             : 
    1312             : static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
    1313             :                                              ARM_AM::AddrOpc Mode) {
    1314           5 :   switch (Opc) {
    1315             :   case ARM::LDRi12:
    1316             :     return ARM::LDR_PRE_IMM;
    1317           1 :   case ARM::STRi12:
    1318             :     return ARM::STR_PRE_IMM;
    1319           0 :   case ARM::VLDRS:
    1320             :     return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
    1321           0 :   case ARM::VLDRD:
    1322             :     return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
    1323           0 :   case ARM::VSTRS:
    1324             :     return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
    1325           0 :   case ARM::VSTRD:
    1326             :     return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
    1327           0 :   case ARM::t2LDRi8:
    1328             :   case ARM::t2LDRi12:
    1329             :     return ARM::t2LDR_PRE;
    1330           4 :   case ARM::t2STRi8:
    1331             :   case ARM::t2STRi12:
    1332             :     return ARM::t2STR_PRE;
    1333           0 :   default: llvm_unreachable("Unhandled opcode!");
    1334             :   }
    1335             : }
    1336             : 
    1337             : static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
    1338             :                                               ARM_AM::AddrOpc Mode) {
    1339          27 :   switch (Opc) {
    1340             :   case ARM::LDRi12:
    1341             :     return ARM::LDR_POST_IMM;
    1342          13 :   case ARM::STRi12:
    1343             :     return ARM::STR_POST_IMM;
    1344           0 :   case ARM::VLDRS:
    1345             :     return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
    1346           2 :   case ARM::VLDRD:
    1347             :     return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
    1348           0 :   case ARM::VSTRS:
    1349             :     return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
    1350          10 :   case ARM::VSTRD:
    1351             :     return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
    1352           1 :   case ARM::t2LDRi8:
    1353             :   case ARM::t2LDRi12:
    1354             :     return ARM::t2LDR_POST;
    1355           0 :   case ARM::t2STRi8:
    1356             :   case ARM::t2STRi12:
    1357             :     return ARM::t2STR_POST;
    1358           0 :   default: llvm_unreachable("Unhandled opcode!");
    1359             :   }
    1360             : }
    1361             : 
    1362             : /// Fold proceeding/trailing inc/dec of base register into the
    1363             : /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
    1364        9405 : bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
    1365             :   // Thumb1 doesn't have updating LDR/STR.
    1366             :   // FIXME: Use LDM/STM with single register instead.
    1367        9405 :   if (isThumb1) return false;
    1368             : 
    1369        8047 :   unsigned Base = getLoadStoreBaseOp(*MI).getReg();
    1370             :   bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
    1371        8047 :   unsigned Opcode = MI->getOpcode();
    1372             :   DebugLoc DL = MI->getDebugLoc();
    1373        8047 :   bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
    1374        8047 :                 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
    1375        8047 :   bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
    1376             :   if (isi32Load(Opcode) || isi32Store(Opcode))
    1377        5898 :     if (MI->getOperand(2).getImm() != 0)
    1378             :       return false;
    1379        6288 :   if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
    1380             :     return false;
    1381             : 
    1382             :   // Can't do the merge if the destination register is the same as the would-be
    1383             :   // writeback register.
    1384        3713 :   if (MI->getOperand(0).getReg() == Base)
    1385             :     return false;
    1386             : 
    1387        3090 :   unsigned PredReg = 0;
    1388        3090 :   ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
    1389        3090 :   int Bytes = getLSMultipleTransferSize(MI);
    1390        3090 :   MachineBasicBlock &MBB = *MI->getParent();
    1391             :   MachineBasicBlock::iterator MBBI(MI);
    1392             :   int Offset;
    1393             :   MachineBasicBlock::iterator MergeInstr
    1394        3090 :     = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
    1395             :   unsigned NewOpc;
    1396        3090 :   if (!isAM5 && Offset == Bytes) {
    1397             :     NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
    1398        3090 :   } else if (Offset == -Bytes) {
    1399             :     NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
    1400             :   } else {
    1401        3085 :     MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
    1402        3085 :     if (Offset == Bytes) {
    1403             :       NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
    1404        3058 :     } else if (!isAM5 && Offset == -Bytes) {
    1405             :       NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
    1406             :     } else
    1407             :       return false;
    1408             :   }
    1409          32 :   MBB.erase(MergeInstr);
    1410             : 
    1411          32 :   ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
    1412             : 
    1413             :   bool isLd = isLoadSingle(Opcode);
    1414          32 :   if (isAM5) {
    1415             :     // VLDM[SD]_UPD, VSTM[SD]_UPD
    1416             :     // (There are no base-updating versions of VLDR/VSTR instructions, but the
    1417             :     // updating load/store-multiple instructions can be used with only one
    1418             :     // register.)
    1419          12 :     MachineOperand &MO = MI->getOperand(0);
    1420          36 :     BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
    1421          12 :       .addReg(Base, getDefRegState(true)) // WB base register
    1422          24 :       .addReg(Base, getKillRegState(isLd ? BaseKill : false))
    1423          24 :       .addImm(Pred).addReg(PredReg)
    1424          12 :       .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
    1425          36 :                             getKillRegState(MO.isKill())));
    1426          20 :   } else if (isLd) {
    1427           2 :     if (isAM2) {
    1428             :       // LDR_PRE, LDR_POST
    1429           1 :       if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
    1430           0 :         BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
    1431           0 :           .addReg(Base, RegState::Define)
    1432           0 :           .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
    1433             :       } else {
    1434           1 :         int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
    1435           3 :         BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
    1436           1 :             .addReg(Base, RegState::Define)
    1437           1 :             .addReg(Base)
    1438           1 :             .addReg(0)
    1439           1 :             .addImm(Imm)
    1440           2 :             .add(predOps(Pred, PredReg));
    1441             :       }
    1442             :     } else {
    1443             :       // t2LDR_PRE, t2LDR_POST
    1444           3 :       BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
    1445           1 :           .addReg(Base, RegState::Define)
    1446           1 :           .addReg(Base)
    1447           1 :           .addImm(Offset)
    1448           2 :           .add(predOps(Pred, PredReg));
    1449             :     }
    1450             :   } else {
    1451          18 :     MachineOperand &MO = MI->getOperand(0);
    1452             :     // FIXME: post-indexed stores use am2offset_imm, which still encodes
    1453             :     // the vestigal zero-reg offset register. When that's fixed, this clause
    1454             :     // can be removed entirely.
    1455          18 :     if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
    1456          13 :       int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
    1457             :       // STR_PRE, STR_POST
    1458          39 :       BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
    1459          13 :           .addReg(MO.getReg(), getKillRegState(MO.isKill()))
    1460          13 :           .addReg(Base)
    1461          13 :           .addReg(0)
    1462          13 :           .addImm(Imm)
    1463          26 :           .add(predOps(Pred, PredReg));
    1464             :     } else {
    1465             :       // t2STR_PRE, t2STR_POST
    1466          15 :       BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
    1467           5 :           .addReg(MO.getReg(), getKillRegState(MO.isKill()))
    1468           5 :           .addReg(Base)
    1469           5 :           .addImm(Offset)
    1470          10 :           .add(predOps(Pred, PredReg));
    1471             :     }
    1472             :   }
    1473          32 :   MBB.erase(MBBI);
    1474             : 
    1475             :   return true;
    1476             : }
    1477             : 
    1478         266 : bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
    1479         266 :   unsigned Opcode = MI.getOpcode();
    1480             :   assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
    1481             :          "Must have t2STRDi8 or t2LDRDi8");
    1482         266 :   if (MI.getOperand(3).getImm() != 0)
    1483             :     return false;
    1484             : 
    1485             :   // Behaviour for writeback is undefined if base register is the same as one
    1486             :   // of the others.
    1487             :   const MachineOperand &BaseOp = MI.getOperand(2);
    1488         169 :   unsigned Base = BaseOp.getReg();
    1489             :   const MachineOperand &Reg0Op = MI.getOperand(0);
    1490             :   const MachineOperand &Reg1Op = MI.getOperand(1);
    1491         169 :   if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
    1492             :     return false;
    1493             : 
    1494             :   unsigned PredReg;
    1495         146 :   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
    1496             :   MachineBasicBlock::iterator MBBI(MI);
    1497         146 :   MachineBasicBlock &MBB = *MI.getParent();
    1498             :   int Offset;
    1499             :   MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
    1500         146 :                                                             PredReg, Offset);
    1501             :   unsigned NewOpc;
    1502         146 :   if (Offset == 8 || Offset == -8) {
    1503           6 :     NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
    1504             :   } else {
    1505         140 :     MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
    1506         140 :     if (Offset == 8 || Offset == -8) {
    1507          21 :       NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
    1508             :     } else
    1509             :       return false;
    1510             :   }
    1511          27 :   MBB.erase(MergeInstr);
    1512             : 
    1513             :   DebugLoc DL = MI.getDebugLoc();
    1514          54 :   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
    1515          27 :   if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
    1516          10 :     MIB.add(Reg0Op).add(Reg1Op).addReg(BaseOp.getReg(), RegState::Define);
    1517             :   } else {
    1518             :     assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
    1519          17 :     MIB.addReg(BaseOp.getReg(), RegState::Define).add(Reg0Op).add(Reg1Op);
    1520             :   }
    1521          27 :   MIB.addReg(BaseOp.getReg(), RegState::Kill)
    1522          81 :      .addImm(Offset).addImm(Pred).addReg(PredReg);
    1523             :   assert(TII->get(Opcode).getNumOperands() == 6 &&
    1524             :          TII->get(NewOpc).getNumOperands() == 7 &&
    1525             :          "Unexpected number of operands in Opcode specification.");
    1526             : 
    1527             :   // Transfer implicit operands.
    1528          27 :   for (const MachineOperand &MO : MI.implicit_operands())
    1529             :     MIB.add(MO);
    1530          27 :   MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
    1531             : 
    1532          27 :   MBB.erase(MBBI);
    1533             :   return true;
    1534             : }
    1535             : 
    1536             : /// Returns true if instruction is a memory operation that this pass is capable
    1537             : /// of operating on.
    1538      264759 : static bool isMemoryOp(const MachineInstr &MI) {
    1539      264759 :   unsigned Opcode = MI.getOpcode();
    1540      264759 :   switch (Opcode) {
    1541             :   case ARM::VLDRS:
    1542             :   case ARM::VSTRS:
    1543             :   case ARM::VLDRD:
    1544             :   case ARM::VSTRD:
    1545             :   case ARM::LDRi12:
    1546             :   case ARM::STRi12:
    1547             :   case ARM::tLDRi:
    1548             :   case ARM::tSTRi:
    1549             :   case ARM::tLDRspi:
    1550             :   case ARM::tSTRspi:
    1551             :   case ARM::t2LDRi8:
    1552             :   case ARM::t2LDRi12:
    1553             :   case ARM::t2STRi8:
    1554             :   case ARM::t2STRi12:
    1555             :     break;
    1556             :   default:
    1557             :     return false;
    1558             :   }
    1559       61104 :   if (!MI.getOperand(1).isReg())
    1560             :     return false;
    1561             : 
    1562             :   // When no memory operands are present, conservatively assume unaligned,
    1563             :   // volatile, unfoldable.
    1564       24146 :   if (!MI.hasOneMemOperand())
    1565             :     return false;
    1566             : 
    1567       22957 :   const MachineMemOperand &MMO = **MI.memoperands_begin();
    1568             : 
    1569             :   // Don't touch volatile memory accesses - we may be changing their order.
    1570       45914 :   if (MMO.isVolatile())
    1571             :     return false;
    1572             : 
    1573             :   // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
    1574             :   // not.
    1575       21114 :   if (MMO.getAlignment() < 4)
    1576             :     return false;
    1577             : 
    1578             :   // str <undef> could probably be eliminated entirely, but for now we just want
    1579             :   // to avoid making a mess of it.
    1580             :   // FIXME: Use str <undef> as a wildcard to enable better stm folding.
    1581       62550 :   if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
    1582             :     return false;
    1583             : 
    1584             :   // Likewise don't mess with references to undefined addresses.
    1585       20850 :   if (MI.getOperand(1).isUndef())
    1586             :     return false;
    1587             : 
    1588       20756 :   return true;
    1589             : }
    1590             : 
    1591          10 : static void InsertLDR_STR(MachineBasicBlock &MBB,
    1592             :                           MachineBasicBlock::iterator &MBBI, int Offset,
    1593             :                           bool isDef, unsigned NewOpc, unsigned Reg,
    1594             :                           bool RegDeadKill, bool RegUndef, unsigned BaseReg,
    1595             :                           bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred,
    1596             :                           unsigned PredReg, const TargetInstrInfo *TII) {
    1597          10 :   if (isDef) {
    1598           4 :     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
    1599           2 :                                       TII->get(NewOpc))
    1600           2 :       .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
    1601           2 :       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
    1602           6 :     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
    1603             :   } else {
    1604          16 :     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
    1605           8 :                                       TII->get(NewOpc))
    1606           8 :       .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
    1607           8 :       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
    1608          24 :     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
    1609             :   }
    1610          10 : }
    1611             : 
    1612      128705 : bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
    1613             :                                           MachineBasicBlock::iterator &MBBI) {
    1614             :   MachineInstr *MI = &*MBBI;
    1615      128705 :   unsigned Opcode = MI->getOpcode();
    1616             :   // FIXME: Code/comments below check Opcode == t2STRDi8, but this check returns
    1617             :   // if we see this opcode.
    1618      128705 :   if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
    1619             :     return false;
    1620             : 
    1621         153 :   const MachineOperand &BaseOp = MI->getOperand(2);
    1622         153 :   unsigned BaseReg = BaseOp.getReg();
    1623         153 :   unsigned EvenReg = MI->getOperand(0).getReg();
    1624         153 :   unsigned OddReg  = MI->getOperand(1).getReg();
    1625         153 :   unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
    1626         153 :   unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
    1627             : 
    1628             :   // ARM errata 602117: LDRD with base in list may result in incorrect base
    1629             :   // register when interrupted or faulted.
    1630          43 :   bool Errata602117 = EvenReg == BaseReg &&
    1631         196 :     (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
    1632             :   // ARM LDRD/STRD needs consecutive registers.
    1633         255 :   bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
    1634         200 :     (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
    1635             : 
    1636         153 :   if (!Errata602117 && !NonConsecutiveRegs)
    1637             :     return false;
    1638             : 
    1639          17 :   bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
    1640          17 :   bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
    1641          17 :   bool EvenDeadKill = isLd ?
    1642          17 :     MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
    1643          17 :   bool EvenUndef = MI->getOperand(0).isUndef();
    1644          17 :   bool OddDeadKill  = isLd ?
    1645             :     MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
    1646             :   bool OddUndef = MI->getOperand(1).isUndef();
    1647             :   bool BaseKill = BaseOp.isKill();
    1648             :   bool BaseUndef = BaseOp.isUndef();
    1649             :   assert((isT2 || MI->getOperand(3).getReg() == ARM::NoRegister) &&
    1650             :          "register offset not handled below");
    1651          17 :   int OffImm = getMemoryOpOffset(*MI);
    1652          17 :   unsigned PredReg = 0;
    1653          17 :   ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
    1654             : 
    1655          17 :   if (OddRegNum > EvenRegNum && OffImm == 0) {
    1656             :     // Ascending register numbers and no offset. It's safe to change it to a
    1657             :     // ldm or stm.
    1658             :     unsigned NewOpc = (isLd)
    1659          12 :       ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
    1660             :       : (isT2 ? ARM::t2STMIA : ARM::STMIA);
    1661          12 :     if (isLd) {
    1662           6 :       BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
    1663           2 :         .addReg(BaseReg, getKillRegState(BaseKill))
    1664           4 :         .addImm(Pred).addReg(PredReg)
    1665           4 :         .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
    1666           2 :         .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
    1667             :       ++NumLDRD2LDM;
    1668             :     } else {
    1669          30 :       BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
    1670          10 :         .addReg(BaseReg, getKillRegState(BaseKill))
    1671          20 :         .addImm(Pred).addReg(PredReg)
    1672             :         .addReg(EvenReg,
    1673          20 :                 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
    1674          10 :         .addReg(OddReg,
    1675          10 :                 getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
    1676             :       ++NumSTRD2STM;
    1677             :     }
    1678             :   } else {
    1679             :     // Split into two instructions.
    1680             :     unsigned NewOpc = (isLd)
    1681           5 :       ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
    1682             :       : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
    1683             :     // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
    1684             :     // so adjust and use t2LDRi12 here for that.
    1685             :     unsigned NewOpc2 = (isLd)
    1686           5 :       ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
    1687             :       : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
    1688             :     // If this is a load, make sure the first load does not clobber the base
    1689             :     // register before the second load reads it.
    1690           5 :     if (isLd && TRI->regsOverlap(EvenReg, BaseReg)) {
    1691             :       assert(!TRI->regsOverlap(OddReg, BaseReg));
    1692           1 :       InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
    1693             :                     false, BaseReg, false, BaseUndef, Pred, PredReg, TII);
    1694           1 :       InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
    1695             :                     false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII);
    1696             :     } else {
    1697           4 :       if (OddReg == EvenReg && EvenDeadKill) {
    1698             :         // If the two source operands are the same, the kill marker is
    1699             :         // probably on the first one. e.g.
    1700             :         // t2STRDi8 killed %r5, %r5, killed %r9, 0, 14, %reg0
    1701             :         EvenDeadKill = false;
    1702             :         OddDeadKill = true;
    1703             :       }
    1704             :       // Never kill the base register in the first instruction.
    1705           4 :       if (EvenReg == BaseReg)
    1706             :         EvenDeadKill = false;
    1707           4 :       InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
    1708             :                     EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII);
    1709           4 :       InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
    1710             :                     OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII);
    1711             :     }
    1712             :     if (isLd)
    1713             :       ++NumLDRD2LDR;
    1714             :     else
    1715             :       ++NumSTRD2STR;
    1716             :   }
    1717             : 
    1718          17 :   MBBI = MBB.erase(MBBI);
    1719          17 :   return true;
    1720             : }
    1721             : 
    1722             : /// An optimization pass to turn multiple LDR / STR ops of the same base and
    1723             : /// incrementing offset into LDM / STM ops.
    1724       17267 : bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
    1725             :   MemOpQueue MemOps;
    1726             :   unsigned CurrBase = 0;
    1727             :   unsigned CurrOpc = ~0u;
    1728             :   ARMCC::CondCodes CurrPred = ARMCC::AL;
    1729             :   unsigned Position = 0;
    1730             :   assert(Candidates.size() == 0);
    1731             :   assert(MergeBaseCandidates.size() == 0);
    1732       17267 :   LiveRegsValid = false;
    1733             : 
    1734      274677 :   for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
    1735      128705 :        I = MBBI) {
    1736             :     // The instruction in front of the iterator is the one we look at.
    1737      128705 :     MBBI = std::prev(I);
    1738      128705 :     if (FixInvalidRegPairOp(MBB, MBBI))
    1739          17 :       continue;
    1740      128688 :     ++Position;
    1741             : 
    1742      128688 :     if (isMemoryOp(*MBBI)) {
    1743       14598 :       unsigned Opcode = MBBI->getOpcode();
    1744       14598 :       const MachineOperand &MO = MBBI->getOperand(0);
    1745       14598 :       unsigned Reg = MO.getReg();
    1746       14598 :       unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
    1747       14598 :       unsigned PredReg = 0;
    1748       14598 :       ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg);
    1749       14598 :       int Offset = getMemoryOpOffset(*MBBI);
    1750       23570 :       if (CurrBase == 0) {
    1751             :         // Start of a new chain.
    1752             :         CurrBase = Base;
    1753             :         CurrOpc  = Opcode;
    1754             :         CurrPred = Pred;
    1755        8972 :         MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
    1756       20872 :         continue;
    1757             :       }
    1758             :       // Note: No need to match PredReg in the next if.
    1759        5626 :       if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
    1760             :         // Watch out for:
    1761             :         //   r4 := ldr [r0, #8]
    1762             :         //   r4 := ldr [r0, #4]
    1763             :         // or
    1764             :         //   r0 := ldr [r0]
    1765             :         // If a load overrides the base register or a register loaded by
    1766             :         // another load in our chain, we cannot take this instruction.
    1767             :         bool Overlap = false;
    1768             :         if (isLoadSingle(Opcode)) {
    1769        1985 :           Overlap = (Base == Reg);
    1770        1985 :           if (!Overlap) {
    1771        9964 :             for (const MemOpQueueEntry &E : MemOps) {
    1772        4034 :               if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
    1773             :                 Overlap = true;
    1774             :                 break;
    1775             :               }
    1776             :             }
    1777             :           }
    1778             :         }
    1779             : 
    1780        1985 :         if (!Overlap) {
    1781             :           // Check offset and sort memory operation into the current chain.
    1782        3904 :           if (Offset > MemOps.back().Offset) {
    1783         975 :             MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
    1784         975 :             continue;
    1785             :           } else {
    1786             :             MemOpQueue::iterator MI, ME;
    1787        2410 :             for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
    1788        2182 :               if (Offset < MI->Offset) {
    1789             :                 // Found a place to insert.
    1790             :                 break;
    1791             :               }
    1792         229 :               if (Offset == MI->Offset) {
    1793             :                 // Collision, abort.
    1794             :                 MI = ME;
    1795             :                 break;
    1796             :               }
    1797             :             }
    1798        3907 :             if (MI != MemOps.end()) {
    1799        1953 :               MemOps.insert(MI, MemOpQueueEntry(*MBBI, Offset, Position));
    1800        1953 :               continue;
    1801             :             }
    1802             :           }
    1803             :         }
    1804             :       }
    1805             : 
    1806             :       // Don't advance the iterator; The op will start a new chain next.
    1807        2698 :       MBBI = I;
    1808             :       --Position;
    1809             :       // Fallthrough to look into existing chain.
    1810         130 :     } else if (MBBI->isDebugInstr()) {
    1811         130 :       continue;
    1812      227871 :     } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
    1813             :                MBBI->getOpcode() == ARM::t2STRDi8) {
    1814             :       // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
    1815             :       // remember them because we may still be able to merge add/sub into them.
    1816         128 :       MergeBaseCandidates.push_back(&*MBBI);
    1817             :     }
    1818             : 
    1819             :     // If we are here then the chain is broken; Extract candidates for a merge.
    1820      116658 :     if (MemOps.size() > 0) {
    1821        7584 :       FormCandidates(MemOps);
    1822             :       // Reset for the next chain.
    1823             :       CurrBase = 0;
    1824             :       CurrOpc = ~0u;
    1825             :       CurrPred = ARMCC::AL;
    1826             :       MemOps.clear();
    1827             :     }
    1828             :   }
    1829       17267 :   if (MemOps.size() > 0)
    1830        1388 :     FormCandidates(MemOps);
    1831             : 
    1832             :   // Sort candidates so they get processed from end to begin of the basic
    1833             :   // block later; This is necessary for liveness calculation.
    1834             :   auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
    1835             :     return M0->InsertPos < M1->InsertPos;
    1836             :   };
    1837             :   llvm::sort(Candidates.begin(), Candidates.end(), LessThan);
    1838             : 
    1839             :   // Go through list of candidates and merge.
    1840             :   bool Changed = false;
    1841       36835 :   for (const MergeCandidate *Candidate : Candidates) {
    1842        9784 :     if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
    1843         965 :       MachineInstr *Merged = MergeOpsUpdate(*Candidate);
    1844             :       // Merge preceding/trailing base inc/dec into the merged op.
    1845         965 :       if (Merged) {
    1846             :         Changed = true;
    1847         740 :         unsigned Opcode = Merged->getOpcode();
    1848         740 :         if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
    1849         138 :           MergeBaseUpdateLSDouble(*Merged);
    1850             :         else
    1851         602 :           MergeBaseUpdateLSMultiple(Merged);
    1852             :       } else {
    1853        1397 :         for (MachineInstr *MI : Candidate->Instrs) {
    1854         586 :           if (MergeBaseUpdateLoadStore(MI))
    1855             :             Changed = true;
    1856             :         }
    1857             :       }
    1858             :     } else {
    1859             :       assert(Candidate->Instrs.size() == 1);
    1860        8819 :       if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
    1861             :         Changed = true;
    1862             :     }
    1863             :   }
    1864             :   Candidates.clear();
    1865             :   // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
    1866       17523 :   for (MachineInstr *MI : MergeBaseCandidates)
    1867         128 :     MergeBaseUpdateLSDouble(*MI);
    1868             :   MergeBaseCandidates.clear();
    1869             : 
    1870       17267 :   return Changed;
    1871             : }
    1872             : 
    1873             : /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
    1874             : /// into the preceding stack restore so it directly restore the value of LR
    1875             : /// into pc.
    1876             : ///   ldmfd sp!, {..., lr}
    1877             : ///   bx lr
    1878             : /// or
    1879             : ///   ldmfd sp!, {..., lr}
    1880             : ///   mov pc, lr
    1881             : /// =>
    1882             : ///   ldmfd sp!, {..., pc}
    1883       12916 : bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
    1884             :   // Thumb1 LDM doesn't allow high registers.
    1885       12916 :   if (isThumb1) return false;
    1886       11453 :   if (MBB.empty()) return false;
    1887             : 
    1888       11412 :   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
    1889       31901 :   if (MBBI != MBB.begin() && MBBI != MBB.end() &&
    1890       17838 :       (MBBI->getOpcode() == ARM::BX_RET ||
    1891        4698 :        MBBI->getOpcode() == ARM::tBX_RET ||
    1892             :        MBBI->getOpcode() == ARM::MOVPCLR)) {
    1893        5546 :     MachineBasicBlock::iterator PrevI = std::prev(MBBI);
    1894             :     // Ignore any debug instructions.
    1895           7 :     while (PrevI->isDebugInstr() && PrevI != MBB.begin())
    1896             :       --PrevI;
    1897             :     MachineInstr &PrevMI = *PrevI;
    1898             :     unsigned Opcode = PrevMI.getOpcode();
    1899        5546 :     if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
    1900        5538 :         Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
    1901        5538 :         Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
    1902          24 :       MachineOperand &MO = PrevMI.getOperand(PrevMI.getNumOperands() - 1);
    1903          24 :       if (MO.getReg() != ARM::LR)
    1904          24 :         return false;
    1905          20 :       unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
    1906             :       assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
    1907             :               Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
    1908          20 :       PrevMI.setDesc(TII->get(NewOpc));
    1909          20 :       MO.setReg(ARM::PC);
    1910          20 :       PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI);
    1911          20 :       MBB.erase(MBBI);
    1912             :       // We now restore LR into PC so it is not live-out of the return block
    1913             :       // anymore: Clear the CSI Restored bit.
    1914          20 :       MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo();
    1915             :       // CSI should be fixed after PrologEpilog Insertion
    1916             :       assert(MFI.isCalleeSavedInfoValid() && "CSI should be valid");
    1917          20 :       for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
    1918          20 :         if (Info.getReg() == ARM::LR) {
    1919             :           Info.setRestored(false);
    1920             :           break;
    1921             :         }
    1922             :       }
    1923             :       return true;
    1924             :     }
    1925             :   }
    1926             :   return false;
    1927             : }
    1928             : 
    1929        2038 : bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
    1930        2038 :   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
    1931        5528 :   if (MBBI == MBB.begin() || MBBI == MBB.end() ||
    1932        1592 :       MBBI->getOpcode() != ARM::tBX_RET)
    1933             :     return false;
    1934             : 
    1935         667 :   MachineBasicBlock::iterator Prev = MBBI;
    1936             :   --Prev;
    1937        1518 :   if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR))
    1938             :     return false;
    1939             : 
    1940         175 :   for (auto Use : Prev->uses())
    1941         175 :     if (Use.isKill()) {
    1942             :       assert(STI->hasV4TOps());
    1943         525 :       BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
    1944         175 :           .addReg(Use.getReg(), RegState::Kill)
    1945         175 :           .add(predOps(ARMCC::AL))
    1946             :           .copyImplicitOps(*MBBI);
    1947         175 :       MBB.erase(MBBI);
    1948         175 :       MBB.erase(Prev);
    1949         175 :       return true;
    1950             :     }
    1951             : 
    1952           0 :   llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
    1953             : }
    1954             : 
    1955       12527 : bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
    1956       12527 :   if (skipFunction(Fn.getFunction()))
    1957             :     return false;
    1958             : 
    1959       12519 :   MF = &Fn;
    1960       12519 :   STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
    1961       12519 :   TL = STI->getTargetLowering();
    1962       12519 :   AFI = Fn.getInfo<ARMFunctionInfo>();
    1963       12519 :   TII = STI->getInstrInfo();
    1964       12519 :   TRI = STI->getRegisterInfo();
    1965             : 
    1966       12519 :   RegClassInfoValid = false;
    1967       25038 :   isThumb2 = AFI->isThumb2Function();
    1968       12519 :   isThumb1 = AFI->isThumbFunction() && !isThumb2;
    1969             : 
    1970             :   bool Modified = false;
    1971       29786 :   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
    1972             :        ++MFI) {
    1973             :     MachineBasicBlock &MBB = *MFI;
    1974       17267 :     Modified |= LoadStoreMultipleOpti(MBB);
    1975       17267 :     if (STI->hasV5TOps())
    1976       12916 :       Modified |= MergeReturnIntoLDM(MBB);
    1977       17267 :     if (isThumb1)
    1978        2038 :       Modified |= CombineMovBx(MBB);
    1979             :   }
    1980             : 
    1981       12519 :   Allocator.DestroyAll();
    1982       12519 :   return Modified;
    1983             : }
    1984             : 
    1985             : #define ARM_PREALLOC_LOAD_STORE_OPT_NAME                                       \
    1986             :   "ARM pre- register allocation load / store optimization pass"
    1987             : 
    1988             : namespace {
    1989             : 
    1990             :   /// Pre- register allocation pass that move load / stores from consecutive
    1991             :   /// locations close to make it more likely they will be combined later.
    1992        2452 :   struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
    1993             :     static char ID;
    1994             : 
    1995             :     AliasAnalysis *AA;
    1996             :     const DataLayout *TD;
    1997             :     const TargetInstrInfo *TII;
    1998             :     const TargetRegisterInfo *TRI;
    1999             :     const ARMSubtarget *STI;
    2000             :     MachineRegisterInfo *MRI;
    2001             :     MachineFunction *MF;
    2002             : 
    2003        2479 :     ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
    2004             : 
    2005             :     bool runOnMachineFunction(MachineFunction &Fn) override;
    2006             : 
    2007        2467 :     StringRef getPassName() const override {
    2008        2467 :       return ARM_PREALLOC_LOAD_STORE_OPT_NAME;
    2009             :     }
    2010             : 
    2011        2467 :     void getAnalysisUsage(AnalysisUsage &AU) const override {
    2012             :       AU.addRequired<AAResultsWrapperPass>();
    2013        2467 :       MachineFunctionPass::getAnalysisUsage(AU);
    2014        2467 :     }
    2015             : 
    2016             :   private:
    2017             :     bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
    2018             :                           unsigned &NewOpc, unsigned &EvenReg,
    2019             :                           unsigned &OddReg, unsigned &BaseReg,
    2020             :                           int &Offset,
    2021             :                           unsigned &PredReg, ARMCC::CondCodes &Pred,
    2022             :                           bool &isT2);
    2023             :     bool RescheduleOps(MachineBasicBlock *MBB,
    2024             :                        SmallVectorImpl<MachineInstr *> &Ops,
    2025             :                        unsigned Base, bool isLd,
    2026             :                        DenseMap<MachineInstr*, unsigned> &MI2LocMap);
    2027             :     bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
    2028             :   };
    2029             : 
    2030             : } // end anonymous namespace
    2031             : 
    2032             : char ARMPreAllocLoadStoreOpt::ID = 0;
    2033             : 
    2034      342570 : INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
    2035             :                 ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
    2036             : 
    2037       12527 : bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
    2038       12527 :   if (AssumeMisalignedLoadStores || skipFunction(Fn.getFunction()))
    2039             :     return false;
    2040             : 
    2041       12500 :   TD = &Fn.getDataLayout();
    2042       12500 :   STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
    2043       12500 :   TII = STI->getInstrInfo();
    2044       12500 :   TRI = STI->getRegisterInfo();
    2045       12500 :   MRI = &Fn.getRegInfo();
    2046       12500 :   MF  = &Fn;
    2047       25000 :   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
    2048             : 
    2049             :   bool Modified = false;
    2050       29936 :   for (MachineBasicBlock &MFI : Fn)
    2051       17436 :     Modified |= RescheduleLoadStoreInstrs(&MFI);
    2052             : 
    2053             :   return Modified;
    2054             : }
    2055             : 
    2056         675 : static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
    2057             :                                       MachineBasicBlock::iterator I,
    2058             :                                       MachineBasicBlock::iterator E,
    2059             :                                       SmallPtrSetImpl<MachineInstr*> &MemOps,
    2060             :                                       SmallSet<unsigned, 4> &MemRegs,
    2061             :                                       const TargetRegisterInfo *TRI,
    2062             :                                       AliasAnalysis *AA) {
    2063             :   // Are there stores / loads / calls between them?
    2064         675 :   SmallSet<unsigned, 4> AddedRegPressure;
    2065        1691 :   while (++I != E) {
    2066        1789 :     if (I->isDebugInstr() || MemOps.count(&*I))
    2067         746 :       continue;
    2068         894 :     if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
    2069             :       return false;
    2070         418 :     if (I->mayStore() || (!isLd && I->mayLoad()))
    2071          37 :       for (MachineInstr *MemOp : MemOps)
    2072          84 :         if (I->mayAlias(AA, *MemOp, /*UseTBAA*/ false))
    2073          20 :           return false;
    2074        1664 :     for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
    2075        1394 :       MachineOperand &MO = I->getOperand(j);
    2076        1394 :       if (!MO.isReg())
    2077         414 :         continue;
    2078         980 :       unsigned Reg = MO.getReg();
    2079         980 :       if (MO.isDef() && TRI->regsOverlap(Reg, Base))
    2080           0 :         return false;
    2081        1938 :       if (Reg != Base && !MemRegs.count(Reg))
    2082         752 :         AddedRegPressure.insert(Reg);
    2083             :     }
    2084             :   }
    2085             : 
    2086             :   // Estimate register pressure increase due to the transformation.
    2087         647 :   if (MemRegs.size() <= 4)
    2088             :     // Ok if we are moving small number of instructions.
    2089             :     return true;
    2090          81 :   return AddedRegPressure.size() <= MemRegs.size() * 2;
    2091             : }
    2092             : 
    2093             : bool
    2094         377 : ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
    2095             :                                           DebugLoc &dl, unsigned &NewOpc,
    2096             :                                           unsigned &FirstReg,
    2097             :                                           unsigned &SecondReg,
    2098             :                                           unsigned &BaseReg, int &Offset,
    2099             :                                           unsigned &PredReg,
    2100             :                                           ARMCC::CondCodes &Pred,
    2101             :                                           bool &isT2) {
    2102             :   // Make sure we're allowed to generate LDRD/STRD.
    2103         377 :   if (!STI->hasV5TEOps())
    2104             :     return false;
    2105             : 
    2106             :   // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
    2107             :   unsigned Scale = 1;
    2108         323 :   unsigned Opcode = Op0->getOpcode();
    2109         323 :   if (Opcode == ARM::LDRi12) {
    2110          37 :     NewOpc = ARM::LDRD;
    2111         286 :   } else if (Opcode == ARM::STRi12) {
    2112          67 :     NewOpc = ARM::STRD;
    2113         219 :   } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
    2114          58 :     NewOpc = ARM::t2LDRDi8;
    2115             :     Scale = 4;
    2116          58 :     isT2 = true;
    2117         161 :   } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
    2118          96 :     NewOpc = ARM::t2STRDi8;
    2119             :     Scale = 4;
    2120          96 :     isT2 = true;
    2121             :   } else {
    2122             :     return false;
    2123             :   }
    2124             : 
    2125             :   // Make sure the base address satisfies i64 ld / st alignment requirement.
    2126             :   // At the moment, we ignore the memoryoperand's value.
    2127             :   // If we want to use AliasAnalysis, we should check it accordingly.
    2128         516 :   if (!Op0->hasOneMemOperand() ||
    2129         258 :       (*Op0->memoperands_begin())->isVolatile())
    2130             :     return false;
    2131             : 
    2132         258 :   unsigned Align = (*Op0->memoperands_begin())->getAlignment();
    2133         258 :   const Function &Func = MF->getFunction();
    2134         258 :   unsigned ReqAlign = STI->hasV6Ops()
    2135         258 :     ? TD->getABITypeAlignment(Type::getInt64Ty(Func.getContext()))
    2136             :     : 8;  // Pre-v6 need 8-byte align
    2137         258 :   if (Align < ReqAlign)
    2138             :     return false;
    2139             : 
    2140             :   // Then make sure the immediate offset fits.
    2141         229 :   int OffImm = getMemoryOpOffset(*Op0);
    2142         229 :   if (isT2) {
    2143         131 :     int Limit = (1 << 8) * Scale;
    2144         131 :     if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
    2145             :       return false;
    2146         130 :     Offset = OffImm;
    2147             :   } else {
    2148             :     ARM_AM::AddrOpc AddSub = ARM_AM::add;
    2149          98 :     if (OffImm < 0) {
    2150             :       AddSub = ARM_AM::sub;
    2151           0 :       OffImm = - OffImm;
    2152             :     }
    2153          98 :     int Limit = (1 << 8) * Scale;
    2154          98 :     if (OffImm >= Limit || (OffImm & (Scale-1)))
    2155             :       return false;
    2156          98 :     Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
    2157             :   }
    2158         228 :   FirstReg = Op0->getOperand(0).getReg();
    2159         228 :   SecondReg = Op1->getOperand(0).getReg();
    2160         228 :   if (FirstReg == SecondReg)
    2161             :     return false;
    2162         223 :   BaseReg = Op0->getOperand(1).getReg();
    2163         223 :   Pred = getInstrPredicate(*Op0, PredReg);
    2164             :   dl = Op0->getDebugLoc();
    2165         223 :   return true;
    2166             : }
    2167             : 
    2168         658 : bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
    2169             :                                  SmallVectorImpl<MachineInstr *> &Ops,
    2170             :                                  unsigned Base, bool isLd,
    2171             :                                  DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
    2172             :   bool RetVal = false;
    2173             : 
    2174             :   // Sort by offset (in reverse order).
    2175             :   llvm::sort(Ops.begin(), Ops.end(),
    2176             :              [](const MachineInstr *LHS, const MachineInstr *RHS) {
    2177        3704 :                int LOffset = getMemoryOpOffset(*LHS);
    2178        3721 :                int ROffset = getMemoryOpOffset(*RHS);
    2179             :                assert(LHS == RHS || LOffset != ROffset);
    2180             :                return LOffset > ROffset;
    2181             :              });
    2182             : 
    2183             :   // The loads / stores of the same base are in order. Scan them from first to
    2184             :   // last and check for the following:
    2185             :   // 1. Any def of base.
    2186             :   // 2. Any gaps.
    2187        1384 :   while (Ops.size() > 1) {
    2188             :     unsigned FirstLoc = ~0U;
    2189             :     unsigned LastLoc = 0;
    2190             :     MachineInstr *FirstOp = nullptr;
    2191             :     MachineInstr *LastOp = nullptr;
    2192             :     int LastOffset = 0;
    2193             :     unsigned LastOpcode = 0;
    2194             :     unsigned LastBytes = 0;
    2195             :     unsigned NumMove = 0;
    2196        2911 :     for (int i = Ops.size() - 1; i >= 0; --i) {
    2197             :       // Make sure each operation has the same kind.
    2198        4578 :       MachineInstr *Op = Ops[i];
    2199             :       unsigned LSMOpcode
    2200        4578 :         = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
    2201        2289 :       if (LastOpcode && LSMOpcode != LastOpcode)
    2202             :         break;
    2203             : 
    2204             :       // Check that we have a continuous set of offsets.
    2205        2286 :       int Offset = getMemoryOpOffset(*Op);
    2206        2286 :       unsigned Bytes = getLSMultipleTransferSize(Op);
    2207        2286 :       if (LastBytes) {
    2208        1560 :         if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
    2209             :           break;
    2210             :       }
    2211             : 
    2212             :       // Don't try to reschedule too many instructions.
    2213        2224 :       if (NumMove == 8) // FIXME: Tune this limit.
    2214             :         break;
    2215             : 
    2216             :       // Found a mergable instruction; save information about it.
    2217        2185 :       ++NumMove;
    2218             :       LastOffset = Offset;
    2219             :       LastBytes = Bytes;
    2220             :       LastOpcode = LSMOpcode;
    2221             : 
    2222        2185 :       unsigned Loc = MI2LocMap[Op];
    2223        2185 :       if (Loc <= FirstLoc) {
    2224             :         FirstLoc = Loc;
    2225        1524 :         FirstOp = Op;
    2226             :       }
    2227        2185 :       if (Loc >= LastLoc) {
    2228             :         LastLoc = Loc;
    2229        1358 :         LastOp = Op;
    2230             :       }
    2231             :     }
    2232             : 
    2233         726 :     if (NumMove <= 1)
    2234             :       Ops.pop_back();
    2235             :     else {
    2236             :       SmallPtrSet<MachineInstr*, 4> MemOps;
    2237         684 :       SmallSet<unsigned, 4> MemRegs;
    2238        2827 :       for (size_t i = Ops.size() - NumMove, e = Ops.size(); i != e; ++i) {
    2239        2143 :         MemOps.insert(Ops[i]);
    2240        2143 :         MemRegs.insert(Ops[i]->getOperand(0).getReg());
    2241             :       }
    2242             : 
    2243             :       // Be conservative, if the instructions are too far apart, don't
    2244             :       // move them. We want to limit the increase of register pressure.
    2245         684 :       bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
    2246         684 :       if (DoMove)
    2247        1350 :         DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
    2248             :                                            MemOps, MemRegs, TRI, AA);
    2249         684 :       if (!DoMove) {
    2250         287 :         for (unsigned i = 0; i != NumMove; ++i)
    2251             :           Ops.pop_back();
    2252             :       } else {
    2253             :         // This is the new location for the loads / stores.
    2254         645 :         MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
    2255        3285 :         while (InsertPos != MBB->end() &&
    2256        1639 :                (MemOps.count(&*InsertPos) || InsertPos->isDebugInstr()))
    2257             :           ++InsertPos;
    2258             : 
    2259             :         // If we are moving a pair of loads / stores, see if it makes sense
    2260             :         // to try to allocate a pair of registers that can form register pairs.
    2261         645 :         MachineInstr *Op0 = Ops.back();
    2262        1290 :         MachineInstr *Op1 = Ops[Ops.size()-2];
    2263         645 :         unsigned FirstReg = 0, SecondReg = 0;
    2264         645 :         unsigned BaseReg = 0, PredReg = 0;
    2265         645 :         ARMCC::CondCodes Pred = ARMCC::AL;
    2266         645 :         bool isT2 = false;
    2267         645 :         unsigned NewOpc = 0;
    2268         645 :         int Offset = 0;
    2269         645 :         DebugLoc dl;
    2270         645 :         if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
    2271             :                                              FirstReg, SecondReg, BaseReg,
    2272             :                                              Offset, PredReg, Pred, isT2)) {
    2273             :           Ops.pop_back();
    2274             :           Ops.pop_back();
    2275             : 
    2276         223 :           const MCInstrDesc &MCID = TII->get(NewOpc);
    2277         223 :           const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
    2278         223 :           MRI->constrainRegClass(FirstReg, TRC);
    2279         223 :           MRI->constrainRegClass(SecondReg, TRC);
    2280             : 
    2281             :           // Form the pair instruction.
    2282         223 :           if (isLd) {
    2283         166 :             MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
    2284          83 :               .addReg(FirstReg, RegState::Define)
    2285         166 :               .addReg(SecondReg, RegState::Define)
    2286         166 :               .addReg(BaseReg);
    2287             :             // FIXME: We're converting from LDRi12 to an insn that still
    2288             :             // uses addrmode2, so we need an explicit offset reg. It should
    2289             :             // always by reg0 since we're transforming LDRi12s.
    2290          83 :             if (!isT2)
    2291          34 :               MIB.addReg(0);
    2292         249 :             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
    2293          83 :             MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
    2294             :             LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
    2295             :             ++NumLDRDFormed;
    2296             :           } else {
    2297         280 :             MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
    2298         140 :               .addReg(FirstReg)
    2299         280 :               .addReg(SecondReg)
    2300         280 :               .addReg(BaseReg);
    2301             :             // FIXME: We're converting from LDRi12 to an insn that still
    2302             :             // uses addrmode2, so we need an explicit offset reg. It should
    2303             :             // always by reg0 since we're transforming STRi12s.
    2304         140 :             if (!isT2)
    2305          63 :               MIB.addReg(0);
    2306         420 :             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
    2307         140 :             MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
    2308             :             LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
    2309             :             ++NumSTRDFormed;
    2310             :           }
    2311             :           MBB->erase(Op0);
    2312             :           MBB->erase(Op1);
    2313             : 
    2314         223 :           if (!isT2) {
    2315             :             // Add register allocation hints to form register pairs.
    2316          97 :             MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
    2317          97 :             MRI->setRegAllocationHint(SecondReg,  ARMRI::RegPairOdd, FirstReg);
    2318             :           }
    2319             :         } else {
    2320        3568 :           for (unsigned i = 0; i != NumMove; ++i) {
    2321        1573 :             MachineInstr *Op = Ops.back();
    2322             :             Ops.pop_back();
    2323        1573 :             MBB->splice(InsertPos, MBB, Op);
    2324             :           }
    2325             :         }
    2326             : 
    2327             :         NumLdStMoved += NumMove;
    2328             :         RetVal = true;
    2329             :       }
    2330             :     }
    2331             :   }
    2332             : 
    2333         658 :   return RetVal;
    2334             : }
    2335             : 
    2336             : bool
    2337       17436 : ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
    2338             :   bool RetVal = false;
    2339             : 
    2340             :   DenseMap<MachineInstr*, unsigned> MI2LocMap;
    2341             :   DenseMap<unsigned, SmallVector<MachineInstr *, 4>> Base2LdsMap;
    2342             :   DenseMap<unsigned, SmallVector<MachineInstr *, 4>> Base2StsMap;
    2343             :   SmallVector<unsigned, 4> LdBases;
    2344             :   SmallVector<unsigned, 4> StBases;
    2345             : 
    2346             :   unsigned Loc = 0;
    2347       17436 :   MachineBasicBlock::iterator MBBI = MBB->begin();
    2348             :   MachineBasicBlock::iterator E = MBB->end();
    2349       42504 :   while (MBBI != E) {
    2350      161116 :     for (; MBBI != E; ++MBBI) {
    2351             :       MachineInstr &MI = *MBBI;
    2352      312734 :       if (MI.isCall() || MI.isTerminator()) {
    2353             :         // Stop at barriers.
    2354             :         ++MBBI;
    2355       23711 :         break;
    2356             :       }
    2357             : 
    2358             :       if (!MI.isDebugInstr())
    2359      271890 :         MI2LocMap[&MI] = ++Loc;
    2360             : 
    2361      136071 :       if (!isMemoryOp(MI))
    2362      259826 :         continue;
    2363        6158 :       unsigned PredReg = 0;
    2364        6158 :       if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
    2365           0 :         continue;
    2366             : 
    2367        6158 :       int Opc = MI.getOpcode();
    2368             :       bool isLd = isLoadSingle(Opc);
    2369        6158 :       unsigned Base = MI.getOperand(1).getReg();
    2370        6158 :       int Offset = getMemoryOpOffset(MI);
    2371             : 
    2372             :       bool StopHere = false;
    2373        6158 :       if (isLd) {
    2374             :         DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator BI =
    2375        3546 :           Base2LdsMap.find(Base);
    2376        3546 :         if (BI != Base2LdsMap.end()) {
    2377        2447 :           for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
    2378        3836 :             if (Offset == getMemoryOpOffset(*BI->second[i])) {
    2379             :               StopHere = true;
    2380             :               break;
    2381             :             }
    2382             :           }
    2383         550 :           if (!StopHere)
    2384         529 :             BI->second.push_back(&MI);
    2385             :         } else {
    2386        2996 :           Base2LdsMap[Base].push_back(&MI);
    2387        2996 :           LdBases.push_back(Base);
    2388             :         }
    2389             :       } else {
    2390             :         DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator BI =
    2391        2612 :           Base2StsMap.find(Base);
    2392        2612 :         if (BI != Base2StsMap.end()) {
    2393       17985 :           for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
    2394       33902 :             if (Offset == getMemoryOpOffset(*BI->second[i])) {
    2395             :               StopHere = true;
    2396             :               break;
    2397             :             }
    2398             :           }
    2399        1036 :           if (!StopHere)
    2400        1034 :             BI->second.push_back(&MI);
    2401             :         } else {
    2402        1576 :           Base2StsMap[Base].push_back(&MI);
    2403        1576 :           StBases.push_back(Base);
    2404             :         }
    2405             :       }
    2406             : 
    2407        1586 :       if (StopHere) {
    2408             :         // Found a duplicate (a base+offset combination that's seen earlier).
    2409             :         // Backtrack.
    2410          23 :         --Loc;
    2411          23 :         break;
    2412             :       }
    2413             :     }
    2414             : 
    2415             :     // Re-schedule loads.
    2416       28064 :     for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
    2417        5992 :       unsigned Base = LdBases[i];
    2418        2996 :       SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
    2419        2996 :       if (Lds.size() > 1)
    2420         263 :         RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
    2421             :     }
    2422             : 
    2423             :     // Re-schedule stores.
    2424       26644 :     for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
    2425        3152 :       unsigned Base = StBases[i];
    2426        1576 :       SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
    2427        1576 :       if (Sts.size() > 1)
    2428         395 :         RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
    2429             :     }
    2430             : 
    2431       25068 :     if (MBBI != E) {
    2432        7895 :       Base2LdsMap.clear();
    2433        7895 :       Base2StsMap.clear();
    2434             :       LdBases.clear();
    2435             :       StBases.clear();
    2436             :     }
    2437             :   }
    2438             : 
    2439       17436 :   return RetVal;
    2440             : }
    2441             : 
    2442             : /// Returns an instance of the load / store optimization pass.
    2443        4954 : FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
    2444        4954 :   if (PreAlloc)
    2445        4954 :     return new ARMPreAllocLoadStoreOpt();
    2446        2477 :   return new ARMLoadStoreOpt();
    2447      299229 : }

Generated by: LCOV version 1.13