LCOV - code coverage report
Current view: top level - lib/Target/ARM - ARMLoadStoreOptimizer.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 669 926 72.2 %
Date: 2018-10-20 13:21:21 Functions: 37 42 88.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass -------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file This file contains a pass that performs load / store related peephole
      11             : /// optimizations. This pass should be run after register allocation.
      12             : //
      13             : //===----------------------------------------------------------------------===//
      14             : 
      15             : #include "ARM.h"
      16             : #include "ARMBaseInstrInfo.h"
      17             : #include "ARMBaseRegisterInfo.h"
      18             : #include "ARMISelLowering.h"
      19             : #include "ARMMachineFunctionInfo.h"
      20             : #include "ARMSubtarget.h"
      21             : #include "MCTargetDesc/ARMAddressingModes.h"
      22             : #include "MCTargetDesc/ARMBaseInfo.h"
      23             : #include "Utils/ARMBaseInfo.h"
      24             : #include "llvm/ADT/ArrayRef.h"
      25             : #include "llvm/ADT/DenseMap.h"
      26             : #include "llvm/ADT/DenseSet.h"
      27             : #include "llvm/ADT/STLExtras.h"
      28             : #include "llvm/ADT/SmallPtrSet.h"
      29             : #include "llvm/ADT/SmallSet.h"
      30             : #include "llvm/ADT/SmallVector.h"
      31             : #include "llvm/ADT/Statistic.h"
      32             : #include "llvm/ADT/iterator_range.h"
      33             : #include "llvm/Analysis/AliasAnalysis.h"
      34             : #include "llvm/CodeGen/LivePhysRegs.h"
      35             : #include "llvm/CodeGen/MachineBasicBlock.h"
      36             : #include "llvm/CodeGen/MachineFunction.h"
      37             : #include "llvm/CodeGen/MachineFunctionPass.h"
      38             : #include "llvm/CodeGen/MachineInstr.h"
      39             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      40             : #include "llvm/CodeGen/MachineMemOperand.h"
      41             : #include "llvm/CodeGen/MachineOperand.h"
      42             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      43             : #include "llvm/CodeGen/RegisterClassInfo.h"
      44             : #include "llvm/CodeGen/TargetFrameLowering.h"
      45             : #include "llvm/CodeGen/TargetInstrInfo.h"
      46             : #include "llvm/CodeGen/TargetLowering.h"
      47             : #include "llvm/CodeGen/TargetRegisterInfo.h"
      48             : #include "llvm/CodeGen/TargetSubtargetInfo.h"
      49             : #include "llvm/IR/DataLayout.h"
      50             : #include "llvm/IR/DebugLoc.h"
      51             : #include "llvm/IR/DerivedTypes.h"
      52             : #include "llvm/IR/Function.h"
      53             : #include "llvm/IR/Type.h"
      54             : #include "llvm/MC/MCInstrDesc.h"
      55             : #include "llvm/Pass.h"
      56             : #include "llvm/Support/Allocator.h"
      57             : #include "llvm/Support/CommandLine.h"
      58             : #include "llvm/Support/Debug.h"
      59             : #include "llvm/Support/ErrorHandling.h"
      60             : #include "llvm/Support/raw_ostream.h"
      61             : #include <algorithm>
      62             : #include <cassert>
      63             : #include <cstddef>
      64             : #include <cstdlib>
      65             : #include <iterator>
      66             : #include <limits>
      67             : #include <utility>
      68             : 
      69             : using namespace llvm;
      70             : 
      71             : #define DEBUG_TYPE "arm-ldst-opt"
      72             : 
      73             : STATISTIC(NumLDMGened , "Number of ldm instructions generated");
      74             : STATISTIC(NumSTMGened , "Number of stm instructions generated");
      75             : STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
      76             : STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
      77             : STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
      78             : STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
      79             : STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
      80             : STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
      81             : STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
      82             : STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
      83             : STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
      84             : 
      85             : /// This switch disables formation of double/multi instructions that could
      86             : /// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP
      87             : /// disabled. This can be used to create libraries that are robust even when
      88             : /// users provoke undefined behaviour by supplying misaligned pointers.
      89             : /// \see mayCombineMisaligned()
      90             : static cl::opt<bool>
      91             : AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden,
      92             :     cl::init(false), cl::desc("Be more conservative in ARM load/store opt"));
      93             : 
      94             : #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
      95             : 
      96             : namespace {
      97             : 
      98             :   /// Post- register allocation pass the combine load / store instructions to
      99             :   /// form ldm / stm instructions.
     100             :   struct ARMLoadStoreOpt : public MachineFunctionPass {
     101             :     static char ID;
     102             : 
     103             :     const MachineFunction *MF;
     104             :     const TargetInstrInfo *TII;
     105             :     const TargetRegisterInfo *TRI;
     106             :     const ARMSubtarget *STI;
     107             :     const TargetLowering *TL;
     108             :     ARMFunctionInfo *AFI;
     109             :     LivePhysRegs LiveRegs;
     110             :     RegisterClassInfo RegClassInfo;
     111             :     MachineBasicBlock::const_iterator LiveRegPos;
     112             :     bool LiveRegsValid;
     113             :     bool RegClassInfoValid;
     114             :     bool isThumb1, isThumb2;
     115             : 
     116        2573 :     ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
     117             : 
     118             :     bool runOnMachineFunction(MachineFunction &Fn) override;
     119             : 
     120        2562 :     MachineFunctionProperties getRequiredProperties() const override {
     121        2562 :       return MachineFunctionProperties().set(
     122        2562 :           MachineFunctionProperties::Property::NoVRegs);
     123             :     }
     124             : 
     125        2561 :     StringRef getPassName() const override { return ARM_LOAD_STORE_OPT_NAME; }
     126             : 
     127             :   private:
     128             :     /// A set of load/store MachineInstrs with same base register sorted by
     129             :     /// offset.
     130             :     struct MemOpQueueEntry {
     131             :       MachineInstr *MI;
     132             :       int Offset;        ///< Load/Store offset.
     133             :       unsigned Position; ///< Position as counted from end of basic block.
     134             : 
     135             :       MemOpQueueEntry(MachineInstr &MI, int Offset, unsigned Position)
     136       12328 :           : MI(&MI), Offset(Offset), Position(Position) {}
     137             :     };
     138             :     using MemOpQueue = SmallVector<MemOpQueueEntry, 8>;
     139             : 
     140             :     /// A set of MachineInstrs that fulfill (nearly all) conditions to get
     141             :     /// merged into a LDM/STM.
     142       10157 :     struct MergeCandidate {
     143             :       /// List of instructions ordered by load/store offset.
     144             :       SmallVector<MachineInstr*, 4> Instrs;
     145             : 
     146             :       /// Index in Instrs of the instruction being latest in the schedule.
     147             :       unsigned LatestMIIdx;
     148             : 
     149             :       /// Index in Instrs of the instruction being earliest in the schedule.
     150             :       unsigned EarliestMIIdx;
     151             : 
     152             :       /// Index into the basic block where the merged instruction will be
     153             :       /// inserted. (See MemOpQueueEntry.Position)
     154             :       unsigned InsertPos;
     155             : 
     156             :       /// Whether the instructions can be merged into a ldm/stm instruction.
     157             :       bool CanMergeToLSMulti;
     158             : 
     159             :       /// Whether the instructions can be merged into a ldrd/strd instruction.
     160             :       bool CanMergeToLSDouble;
     161             :     };
     162             :     SpecificBumpPtrAllocator<MergeCandidate> Allocator;
     163             :     SmallVector<const MergeCandidate*,4> Candidates;
     164             :     SmallVector<MachineInstr*,4> MergeBaseCandidates;
     165             : 
     166             :     void moveLiveRegsBefore(const MachineBasicBlock &MBB,
     167             :                             MachineBasicBlock::const_iterator Before);
     168             :     unsigned findFreeReg(const TargetRegisterClass &RegClass);
     169             :     void UpdateBaseRegUses(MachineBasicBlock &MBB,
     170             :                            MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
     171             :                            unsigned Base, unsigned WordOffset,
     172             :                            ARMCC::CondCodes Pred, unsigned PredReg);
     173             :     MachineInstr *CreateLoadStoreMulti(
     174             :         MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
     175             :         int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
     176             :         ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
     177             :         ArrayRef<std::pair<unsigned, bool>> Regs);
     178             :     MachineInstr *CreateLoadStoreDouble(
     179             :         MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
     180             :         int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
     181             :         ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
     182             :         ArrayRef<std::pair<unsigned, bool>> Regs) const;
     183             :     void FormCandidates(const MemOpQueue &MemOps);
     184             :     MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
     185             :     bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
     186             :                              MachineBasicBlock::iterator &MBBI);
     187             :     bool MergeBaseUpdateLoadStore(MachineInstr *MI);
     188             :     bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
     189             :     bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
     190             :     bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
     191             :     bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
     192             :     bool CombineMovBx(MachineBasicBlock &MBB);
     193             :   };
     194             : 
     195             : } // end anonymous namespace
     196             : 
     197             : char ARMLoadStoreOpt::ID = 0;
     198             : 
     199      199024 : INITIALIZE_PASS(ARMLoadStoreOpt, "arm-ldst-opt", ARM_LOAD_STORE_OPT_NAME, false,
     200             :                 false)
     201             : 
     202          85 : static bool definesCPSR(const MachineInstr &MI) {
     203         589 :   for (const auto &MO : MI.operands()) {
     204         504 :     if (!MO.isReg())
     205             :       continue;
     206         334 :     if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
     207             :       // If the instruction has live CPSR def, then it's not safe to fold it
     208             :       // into load / store.
     209             :       return true;
     210             :   }
     211             : 
     212             :   return false;
     213             : }
     214             : 
     215       51645 : static int getMemoryOpOffset(const MachineInstr &MI) {
     216       51645 :   unsigned Opcode = MI.getOpcode();
     217       51645 :   bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
     218       51645 :   unsigned NumOperands = MI.getDesc().getNumOperands();
     219       51645 :   unsigned OffField = MI.getOperand(NumOperands - 3).getImm();
     220             : 
     221      103290 :   if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
     222       51645 :       Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
     223       44518 :       Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
     224       44509 :       Opcode == ARM::LDRi12   || Opcode == ARM::STRi12)
     225       40017 :     return OffField;
     226             : 
     227             :   // Thumb1 immediate offsets are scaled by 4
     228       11628 :   if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
     229        9012 :       Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
     230        4073 :     return OffField * 4;
     231             : 
     232        7555 :   int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
     233        7540 :     : ARM_AM::getAM5Offset(OffField) * 4;
     234        7555 :   ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
     235             :     : ARM_AM::getAM5Op(OffField);
     236             : 
     237             :   if (Op == ARM_AM::sub)
     238          48 :     return -Offset;
     239             : 
     240             :   return Offset;
     241             : }
     242             : 
     243             : static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
     244        1039 :   return MI.getOperand(1);
     245             : }
     246             : 
     247             : static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
     248       16283 :   return MI.getOperand(0);
     249             : }
     250             : 
     251        3573 : static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
     252        3573 :   switch (Opcode) {
     253           0 :   default: llvm_unreachable("Unhandled opcode!");
     254             :   case ARM::LDRi12:
     255             :     ++NumLDMGened;
     256             :     switch (Mode) {
     257           0 :     default: llvm_unreachable("Unhandled submode!");
     258             :     case ARM_AM::ia: return ARM::LDMIA;
     259             :     case ARM_AM::da: return ARM::LDMDA;
     260             :     case ARM_AM::db: return ARM::LDMDB;
     261             :     case ARM_AM::ib: return ARM::LDMIB;
     262             :     }
     263             :   case ARM::STRi12:
     264             :     ++NumSTMGened;
     265             :     switch (Mode) {
     266           0 :     default: llvm_unreachable("Unhandled submode!");
     267             :     case ARM_AM::ia: return ARM::STMIA;
     268             :     case ARM_AM::da: return ARM::STMDA;
     269             :     case ARM_AM::db: return ARM::STMDB;
     270             :     case ARM_AM::ib: return ARM::STMIB;
     271             :     }
     272             :   case ARM::tLDRi:
     273             :   case ARM::tLDRspi:
     274             :     // tLDMIA is writeback-only - unless the base register is in the input
     275             :     // reglist.
     276             :     ++NumLDMGened;
     277         339 :     switch (Mode) {
     278           0 :     default: llvm_unreachable("Unhandled submode!");
     279             :     case ARM_AM::ia: return ARM::tLDMIA;
     280             :     }
     281             :   case ARM::tSTRi:
     282             :   case ARM::tSTRspi:
     283             :     // There is no non-writeback tSTMIA either.
     284             :     ++NumSTMGened;
     285         174 :     switch (Mode) {
     286           0 :     default: llvm_unreachable("Unhandled submode!");
     287             :     case ARM_AM::ia: return ARM::tSTMIA_UPD;
     288             :     }
     289             :   case ARM::t2LDRi8:
     290             :   case ARM::t2LDRi12:
     291             :     ++NumLDMGened;
     292         355 :     switch (Mode) {
     293           0 :     default: llvm_unreachable("Unhandled submode!");
     294             :     case ARM_AM::ia: return ARM::t2LDMIA;
     295           0 :     case ARM_AM::db: return ARM::t2LDMDB;
     296             :     }
     297             :   case ARM::t2STRi8:
     298             :   case ARM::t2STRi12:
     299             :     ++NumSTMGened;
     300         411 :     switch (Mode) {
     301           0 :     default: llvm_unreachable("Unhandled submode!");
     302             :     case ARM_AM::ia: return ARM::t2STMIA;
     303           0 :     case ARM_AM::db: return ARM::t2STMDB;
     304             :     }
     305             :   case ARM::VLDRS:
     306             :     ++NumVLDMGened;
     307          41 :     switch (Mode) {
     308           0 :     default: llvm_unreachable("Unhandled submode!");
     309             :     case ARM_AM::ia: return ARM::VLDMSIA;
     310           0 :     case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
     311             :     }
     312             :   case ARM::VSTRS:
     313             :     ++NumVSTMGened;
     314          24 :     switch (Mode) {
     315           0 :     default: llvm_unreachable("Unhandled submode!");
     316             :     case ARM_AM::ia: return ARM::VSTMSIA;
     317           0 :     case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
     318             :     }
     319             :   case ARM::VLDRD:
     320             :     ++NumVLDMGened;
     321         211 :     switch (Mode) {
     322           0 :     default: llvm_unreachable("Unhandled submode!");
     323             :     case ARM_AM::ia: return ARM::VLDMDIA;
     324           0 :     case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
     325             :     }
     326             :   case ARM::VSTRD:
     327             :     ++NumVSTMGened;
     328         100 :     switch (Mode) {
     329           0 :     default: llvm_unreachable("Unhandled submode!");
     330             :     case ARM_AM::ia: return ARM::VSTMDIA;
     331           0 :     case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
     332             :     }
     333             :   }
     334             : }
     335             : 
     336         265 : static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
     337         265 :   switch (Opcode) {
     338           0 :   default: llvm_unreachable("Unhandled opcode!");
     339             :   case ARM::LDMIA_RET:
     340             :   case ARM::LDMIA:
     341             :   case ARM::LDMIA_UPD:
     342             :   case ARM::STMIA:
     343             :   case ARM::STMIA_UPD:
     344             :   case ARM::tLDMIA:
     345             :   case ARM::tLDMIA_UPD:
     346             :   case ARM::tSTMIA_UPD:
     347             :   case ARM::t2LDMIA_RET:
     348             :   case ARM::t2LDMIA:
     349             :   case ARM::t2LDMIA_UPD:
     350             :   case ARM::t2STMIA:
     351             :   case ARM::t2STMIA_UPD:
     352             :   case ARM::VLDMSIA:
     353             :   case ARM::VLDMSIA_UPD:
     354             :   case ARM::VSTMSIA:
     355             :   case ARM::VSTMSIA_UPD:
     356             :   case ARM::VLDMDIA:
     357             :   case ARM::VLDMDIA_UPD:
     358             :   case ARM::VSTMDIA:
     359             :   case ARM::VSTMDIA_UPD:
     360             :     return ARM_AM::ia;
     361             : 
     362           0 :   case ARM::LDMDA:
     363             :   case ARM::LDMDA_UPD:
     364             :   case ARM::STMDA:
     365             :   case ARM::STMDA_UPD:
     366           0 :     return ARM_AM::da;
     367             : 
     368           0 :   case ARM::LDMDB:
     369             :   case ARM::LDMDB_UPD:
     370             :   case ARM::STMDB:
     371             :   case ARM::STMDB_UPD:
     372             :   case ARM::t2LDMDB:
     373             :   case ARM::t2LDMDB_UPD:
     374             :   case ARM::t2STMDB:
     375             :   case ARM::t2STMDB_UPD:
     376             :   case ARM::VLDMSDB_UPD:
     377             :   case ARM::VSTMSDB_UPD:
     378             :   case ARM::VLDMDDB_UPD:
     379             :   case ARM::VSTMDDB_UPD:
     380           0 :     return ARM_AM::db;
     381             : 
     382          52 :   case ARM::LDMIB:
     383             :   case ARM::LDMIB_UPD:
     384             :   case ARM::STMIB:
     385             :   case ARM::STMIB_UPD:
     386          52 :     return ARM_AM::ib;
     387             :   }
     388             : }
     389             : 
     390             : static bool isT1i32Load(unsigned Opc) {
     391       17694 :   return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
     392             : }
     393             : 
     394             : static bool isT2i32Load(unsigned Opc) {
     395       15579 :   return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
     396             : }
     397             : 
     398             : static bool isi32Load(unsigned Opc) {
     399       22663 :   return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
     400             : }
     401             : 
     402             : static bool isT1i32Store(unsigned Opc) {
     403        3498 :   return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
     404             : }
     405             : 
     406             : static bool isT2i32Store(unsigned Opc) {
     407        3019 :   return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
     408             : }
     409             : 
     410             : static bool isi32Store(unsigned Opc) {
     411        5730 :   return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
     412             : }
     413             : 
     414             : static bool isLoadSingle(unsigned Opc) {
     415        6645 :   return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
     416             : }
     417             : 
     418             : static unsigned getImmScale(unsigned Opc) {
     419             :   switch (Opc) {
     420           0 :   default: llvm_unreachable("Unhandled opcode!");
     421             :   case ARM::tLDRi:
     422             :   case ARM::tSTRi:
     423             :   case ARM::tLDRspi:
     424             :   case ARM::tSTRspi:
     425             :     return 1;
     426             :   case ARM::tLDRHi:
     427             :   case ARM::tSTRHi:
     428             :     return 2;
     429             :   case ARM::tLDRBi:
     430             :   case ARM::tSTRBi:
     431             :     return 4;
     432             :   }
     433             : }
     434             : 
     435       15021 : static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
     436       30042 :   switch (MI->getOpcode()) {
     437             :   default: return 0;
     438       11106 :   case ARM::LDRi12:
     439             :   case ARM::STRi12:
     440             :   case ARM::tLDRi:
     441             :   case ARM::tSTRi:
     442             :   case ARM::tLDRspi:
     443             :   case ARM::tSTRspi:
     444             :   case ARM::t2LDRi8:
     445             :   case ARM::t2LDRi12:
     446             :   case ARM::t2STRi8:
     447             :   case ARM::t2STRi12:
     448             :   case ARM::VLDRS:
     449             :   case ARM::VSTRS:
     450       11106 :     return 4;
     451        3650 :   case ARM::VLDRD:
     452             :   case ARM::VSTRD:
     453        3650 :     return 8;
     454         198 :   case ARM::LDMIA:
     455             :   case ARM::LDMDA:
     456             :   case ARM::LDMDB:
     457             :   case ARM::LDMIB:
     458             :   case ARM::STMIA:
     459             :   case ARM::STMDA:
     460             :   case ARM::STMDB:
     461             :   case ARM::STMIB:
     462             :   case ARM::tLDMIA:
     463             :   case ARM::tLDMIA_UPD:
     464             :   case ARM::tSTMIA_UPD:
     465             :   case ARM::t2LDMIA:
     466             :   case ARM::t2LDMDB:
     467             :   case ARM::t2STMIA:
     468             :   case ARM::t2STMDB:
     469             :   case ARM::VLDMSIA:
     470             :   case ARM::VSTMSIA:
     471         396 :     return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
     472          67 :   case ARM::VLDMDIA:
     473             :   case ARM::VSTMDIA:
     474         134 :     return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
     475             :   }
     476             : }
     477             : 
     478             : /// Update future uses of the base register with the offset introduced
     479             : /// due to writeback. This function only works on Thumb1.
     480           0 : void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
     481             :                                         MachineBasicBlock::iterator MBBI,
     482             :                                         const DebugLoc &DL, unsigned Base,
     483             :                                         unsigned WordOffset,
     484             :                                         ARMCC::CondCodes Pred,
     485             :                                         unsigned PredReg) {
     486             :   assert(isThumb1 && "Can only update base register uses for Thumb1!");
     487             :   // Start updating any instructions with immediate offsets. Insert a SUB before
     488             :   // the first non-updateable instruction (if any).
     489           0 :   for (; MBBI != MBB.end(); ++MBBI) {
     490             :     bool InsertSub = false;
     491           0 :     unsigned Opc = MBBI->getOpcode();
     492             : 
     493           0 :     if (MBBI->readsRegister(Base)) {
     494             :       int Offset;
     495             :       bool IsLoad =
     496           0 :         Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
     497             :       bool IsStore =
     498           0 :         Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
     499             : 
     500           0 :       if (IsLoad || IsStore) {
     501             :         // Loads and stores with immediate offsets can be updated, but only if
     502             :         // the new offset isn't negative.
     503             :         // The MachineOperand containing the offset immediate is the last one
     504             :         // before predicates.
     505             :         MachineOperand &MO =
     506           0 :           MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
     507             :         // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
     508           0 :         Offset = MO.getImm() - WordOffset * getImmScale(Opc);
     509             : 
     510             :         // If storing the base register, it needs to be reset first.
     511           0 :         unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
     512             : 
     513           0 :         if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
     514           0 :           MO.setImm(Offset);
     515             :         else
     516             :           InsertSub = true;
     517           0 :       } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
     518           0 :                  !definesCPSR(*MBBI)) {
     519             :         // SUBS/ADDS using this register, with a dead def of the CPSR.
     520             :         // Merge it with the update; if the merged offset is too large,
     521             :         // insert a new sub instead.
     522             :         MachineOperand &MO =
     523           0 :           MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
     524           0 :         Offset = (Opc == ARM::tSUBi8) ?
     525           0 :           MO.getImm() + WordOffset * 4 :
     526           0 :           MO.getImm() - WordOffset * 4 ;
     527           0 :         if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
     528             :           // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
     529             :           // Offset == 0.
     530             :           MO.setImm(Offset);
     531             :           // The base register has now been reset, so exit early.
     532           0 :           return;
     533             :         } else {
     534             :           InsertSub = true;
     535             :         }
     536             :       } else {
     537             :         // Can't update the instruction.
     538             :         InsertSub = true;
     539             :       }
     540           0 :     } else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) {
     541             :       // Since SUBS sets the condition flags, we can't place the base reset
     542             :       // after an instruction that has a live CPSR def.
     543             :       // The base register might also contain an argument for a function call.
     544             :       InsertSub = true;
     545             :     }
     546             : 
     547             :     if (InsertSub) {
     548             :       // An instruction above couldn't be updated, so insert a sub.
     549           0 :       BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
     550           0 :           .add(t1CondCodeOp(true))
     551           0 :           .addReg(Base)
     552           0 :           .addImm(WordOffset * 4)
     553           0 :           .addImm(Pred)
     554           0 :           .addReg(PredReg);
     555           0 :       return;
     556             :     }
     557             : 
     558           0 :     if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
     559             :       // Register got killed. Stop updating.
     560           0 :       return;
     561             :   }
     562             : 
     563             :   // End of block was reached.
     564           0 :   if (MBB.succ_size() > 0) {
     565             :     // FIXME: Because of a bug, live registers are sometimes missing from
     566             :     // the successor blocks' live-in sets. This means we can't trust that
     567             :     // information and *always* have to reset at the end of a block.
     568             :     // See PR21029.
     569             :     if (MBBI != MBB.end()) --MBBI;
     570           0 :     BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
     571           0 :         .add(t1CondCodeOp(true))
     572           0 :         .addReg(Base)
     573           0 :         .addImm(WordOffset * 4)
     574           0 :         .addImm(Pred)
     575           0 :         .addReg(PredReg);
     576             :   }
     577             : }
     578             : 
     579             : /// Return the first register of class \p RegClass that is not in \p Regs.
     580          66 : unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
     581          66 :   if (!RegClassInfoValid) {
     582          64 :     RegClassInfo.runOnMachineFunction(*MF);
     583          64 :     RegClassInfoValid = true;
     584             :   }
     585             : 
     586         318 :   for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
     587         311 :     if (!LiveRegs.contains(Reg))
     588          59 :       return Reg;
     589             :   return 0;
     590             : }
     591             : 
     592             : /// Compute live registers just before instruction \p Before (in normal schedule
     593             : /// direction). Computes backwards so multiple queries in the same block must
     594             : /// come in reverse order.
     595          66 : void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
     596             :     MachineBasicBlock::const_iterator Before) {
     597             :   // Initialize if we never queried in this block.
     598          66 :   if (!LiveRegsValid) {
     599          64 :     LiveRegs.init(*TRI);
     600          64 :     LiveRegs.addLiveOuts(MBB);
     601          64 :     LiveRegPos = MBB.end();
     602          64 :     LiveRegsValid = true;
     603             :   }
     604             :   // Move backward just before the "Before" position.
     605         938 :   while (LiveRegPos != Before) {
     606             :     --LiveRegPos;
     607        1744 :     LiveRegs.stepBackward(*LiveRegPos);
     608             :   }
     609          66 : }
     610             : 
     611             : static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
     612             :                         unsigned Reg) {
     613         839 :   for (const std::pair<unsigned, bool> &R : Regs)
     614         660 :     if (R.first == Reg)
     615             :       return true;
     616             :   return false;
     617             : }
     618             : 
     619             : /// Create and insert a LDM or STM with Base as base register and registers in
     620             : /// Regs as the register operands that would be loaded / stored.  It returns
     621             : /// true if the transformation is done.
     622         839 : MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
     623             :     MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
     624             :     int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
     625             :     ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
     626             :     ArrayRef<std::pair<unsigned, bool>> Regs) {
     627         839 :   unsigned NumRegs = Regs.size();
     628             :   assert(NumRegs > 1);
     629             : 
     630             :   // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
     631             :   // Compute liveness information for that register to make the decision.
     632        1023 :   bool SafeToClobberCPSR = !isThumb1 ||
     633         184 :     (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
     634             :      MachineBasicBlock::LQR_Dead);
     635             : 
     636         839 :   bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
     637             : 
     638             :   // Exception: If the base register is in the input reglist, Thumb1 LDM is
     639             :   // non-writeback.
     640             :   // It's also not possible to merge an STR of the base register in Thumb1.
     641        1023 :   if (isThumb1 && ContainsReg(Regs, Base)) {
     642             :     assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
     643           6 :     if (Opcode == ARM::tLDRi)
     644             :       Writeback = false;
     645           2 :     else if (Opcode == ARM::tSTRi)
     646             :       return nullptr;
     647             :   }
     648             : 
     649             :   ARM_AM::AMSubMode Mode = ARM_AM::ia;
     650             :   // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
     651             :   bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
     652         750 :   bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
     653             : 
     654         837 :   if (Offset == 4 && haveIBAndDA) {
     655             :     Mode = ARM_AM::ib;
     656         783 :   } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
     657             :     Mode = ARM_AM::da;
     658         783 :   } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
     659             :     // VLDM/VSTM do not support DB mode without also updating the base reg.
     660             :     Mode = ARM_AM::db;
     661         783 :   } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
     662             :     // Check if this is a supported opcode before inserting instructions to
     663             :     // calculate a new base register.
     664         593 :     if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
     665             : 
     666             :     // If starting offset isn't zero, insert a MI to materialize a new base.
     667             :     // But only do so if it is cost effective, i.e. merging more than two
     668             :     // loads / stores.
     669         593 :     if (NumRegs <= 2)
     670             :       return nullptr;
     671             : 
     672             :     // On Thumb1, it's not worth materializing a new base register without
     673             :     // clobbering the CPSR (i.e. not using ADDS/SUBS).
     674         439 :     if (!SafeToClobberCPSR)
     675             :       return nullptr;
     676             : 
     677             :     unsigned NewBase;
     678             :     if (isi32Load(Opcode)) {
     679             :       // If it is a load, then just use one of the destination registers
     680             :       // as the new base. Will no longer be writeback in Thumb1.
     681         746 :       NewBase = Regs[NumRegs-1].first;
     682             :       Writeback = false;
     683             :     } else {
     684             :       // Find a free register that we can use as scratch register.
     685          66 :       moveLiveRegsBefore(MBB, InsertBefore);
     686             :       // The merged instruction does not exist yet but will use several Regs if
     687             :       // it is a Store.
     688             :       if (!isLoadSingle(Opcode))
     689         289 :         for (const std::pair<unsigned, bool> &R : Regs)
     690         229 :           LiveRegs.addReg(R.first);
     691             : 
     692         111 :       NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
     693          66 :       if (NewBase == 0)
     694             :         return nullptr;
     695             :     }
     696             : 
     697             :     int BaseOpc =
     698         645 :       isThumb2 ? ARM::t2ADDri :
     699         340 :       (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
     700          12 :       (isThumb1 && Offset < 8) ? ARM::tADDi3 :
     701             :       isThumb1 ? ARM::tADDi8  : ARM::ADDri;
     702             : 
     703         432 :     if (Offset < 0) {
     704           0 :       Offset = - Offset;
     705             :       BaseOpc =
     706           0 :         isThumb2 ? ARM::t2SUBri :
     707           0 :         (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
     708             :         isThumb1 ? ARM::tSUBi8  : ARM::SUBri;
     709             :     }
     710             : 
     711         432 :     if (!TL->isLegalAddImmediate(Offset))
     712             :       // FIXME: Try add with register operand?
     713             :       return nullptr; // Probably not worth it then.
     714             : 
     715             :     // We can only append a kill flag to the add/sub input if the value is not
     716             :     // used in the register list of the stm as well.
     717         369 :     bool KillOldBase = BaseKill &&
     718           3 :       (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
     719             : 
     720         369 :     if (isThumb1) {
     721             :       // Thumb1: depending on immediate size, use either
     722             :       //   ADDS NewBase, Base, #imm3
     723             :       // or
     724             :       //   MOV  NewBase, Base
     725             :       //   ADDS NewBase, #imm8.
     726          88 :       if (Base != NewBase &&
     727          85 :           (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
     728             :         // Need to insert a MOV to the new base first.
     729           7 :         if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
     730           7 :             !STI->hasV6Ops()) {
     731             :           // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
     732           4 :           if (Pred != ARMCC::AL)
     733             :             return nullptr;
     734          12 :           BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
     735           4 :             .addReg(Base, getKillRegState(KillOldBase));
     736             :         } else
     737           6 :           BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
     738           3 :               .addReg(Base, getKillRegState(KillOldBase))
     739           3 :               .add(predOps(Pred, PredReg));
     740             : 
     741             :         // The following ADDS/SUBS becomes an update.
     742             :         Base = NewBase;
     743             :         KillOldBase = true;
     744             :       }
     745          88 :       if (BaseOpc == ARM::tADDrSPi) {
     746             :         assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
     747         152 :         BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
     748          76 :             .addReg(Base, getKillRegState(KillOldBase))
     749          76 :             .addImm(Offset / 4)
     750          76 :             .add(predOps(Pred, PredReg));
     751             :       } else
     752          24 :         BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
     753          12 :             .add(t1CondCodeOp(true))
     754          12 :             .addReg(Base, getKillRegState(KillOldBase))
     755             :             .addImm(Offset)
     756          12 :             .add(predOps(Pred, PredReg));
     757             :     } else {
     758         562 :       BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
     759         281 :           .addReg(Base, getKillRegState(KillOldBase))
     760             :           .addImm(Offset)
     761         281 :           .add(predOps(Pred, PredReg))
     762         281 :           .add(condCodeOp());
     763             :     }
     764             :     Base = NewBase;
     765             :     BaseKill = true; // New base is always killed straight away.
     766             :   }
     767             : 
     768             :   bool isDef = isLoadSingle(Opcode);
     769             : 
     770             :   // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
     771             :   // base register writeback.
     772         613 :   Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
     773         613 :   if (!Opcode)
     774             :     return nullptr;
     775             : 
     776             :   // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
     777             :   // - There is no writeback (LDM of base register),
     778             :   // - the base register is killed by the merged instruction,
     779             :   // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
     780             :   //   to reset the base register.
     781             :   // Otherwise, don't merge.
     782             :   // It's safe to return here since the code to materialize a new base register
     783             :   // above is also conditional on SafeToClobberCPSR.
     784         613 :   if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
     785             :     return nullptr;
     786             : 
     787         613 :   MachineInstrBuilder MIB;
     788             : 
     789         613 :   if (Writeback) {
     790             :     assert(isThumb1 && "expected Writeback only inThumb1");
     791          32 :     if (Opcode == ARM::tLDMIA) {
     792             :       assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
     793             :       // Update tLDMIA with writeback if necessary.
     794             :       Opcode = ARM::tLDMIA_UPD;
     795             :     }
     796             : 
     797          64 :     MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
     798             : 
     799             :     // Thumb1: we might need to set base writeback when building the MI.
     800          32 :     MIB.addReg(Base, getDefRegState(true))
     801          32 :        .addReg(Base, getKillRegState(BaseKill));
     802             : 
     803             :     // The base isn't dead after a merged instruction with writeback.
     804             :     // Insert a sub instruction after the newly formed instruction to reset.
     805          32 :     if (!BaseKill)
     806           8 :       UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
     807             :   } else {
     808             :     // No writeback, simply build the MachineInstr.
     809        1162 :     MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
     810         581 :     MIB.addReg(Base, getKillRegState(BaseKill));
     811             :   }
     812             : 
     813         613 :   MIB.addImm(Pred).addReg(PredReg);
     814             : 
     815        2881 :   for (const std::pair<unsigned, bool> &R : Regs)
     816        4085 :     MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
     817             : 
     818         613 :   return MIB.getInstr();
     819             : }
     820             : 
     821         157 : MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
     822             :     MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
     823             :     int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
     824             :     ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
     825             :     ArrayRef<std::pair<unsigned, bool>> Regs) const {
     826             :   bool IsLoad = isi32Load(Opcode);
     827             :   assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
     828             :   unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
     829             : 
     830             :   assert(Regs.size() == 2);
     831             :   MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
     832         314 :                                     TII->get(LoadStoreOpcode));
     833         157 :   if (IsLoad) {
     834          64 :     MIB.addReg(Regs[0].first, RegState::Define)
     835          64 :        .addReg(Regs[1].first, RegState::Define);
     836             :   } else {
     837         137 :     MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
     838         126 :        .addReg(Regs[1].first, getKillRegState(Regs[1].second));
     839             :   }
     840         157 :   MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
     841         157 :   return MIB.getInstr();
     842             : }
     843             : 
     844             : /// Call MergeOps and update MemOps and merges accordingly on success.
     845         996 : MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
     846         996 :   const MachineInstr *First = Cand.Instrs.front();
     847         996 :   unsigned Opcode = First->getOpcode();
     848             :   bool IsLoad = isLoadSingle(Opcode);
     849             :   SmallVector<std::pair<unsigned, bool>, 8> Regs;
     850             :   SmallVector<unsigned, 4> ImpDefs;
     851             :   DenseSet<unsigned> KilledRegs;
     852             :   DenseSet<unsigned> UsedRegs;
     853             :   // Determine list of registers and list of implicit super-register defs.
     854        4163 :   for (const MachineInstr *MI : Cand.Instrs) {
     855             :     const MachineOperand &MO = getLoadStoreRegOp(*MI);
     856        3167 :     unsigned Reg = MO.getReg();
     857             :     bool IsKill = MO.isKill();
     858        3167 :     if (IsKill)
     859             :       KilledRegs.insert(Reg);
     860        6334 :     Regs.push_back(std::make_pair(Reg, IsKill));
     861             :     UsedRegs.insert(Reg);
     862             : 
     863        3167 :     if (IsLoad) {
     864             :       // Collect any implicit defs of super-registers, after merging we can't
     865             :       // be sure anymore that we properly preserved these live ranges and must
     866             :       // removed these implicit operands.
     867        2287 :       for (const MachineOperand &MO : MI->implicit_operands()) {
     868         110 :         if (!MO.isReg() || !MO.isDef() || MO.isDead())
     869          82 :           continue;
     870             :         assert(MO.isImplicit());
     871          69 :         unsigned DefReg = MO.getReg();
     872             : 
     873          69 :         if (is_contained(ImpDefs, DefReg))
     874             :           continue;
     875             :         // We can ignore cases where the super-reg is read and written.
     876          56 :         if (MI->readsRegister(DefReg))
     877             :           continue;
     878          28 :         ImpDefs.push_back(DefReg);
     879             :       }
     880             :     }
     881             :   }
     882             : 
     883             :   // Attempt the merge.
     884             :   using iterator = MachineBasicBlock::iterator;
     885             : 
     886        1992 :   MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
     887         996 :   iterator InsertBefore = std::next(iterator(LatestMI));
     888         996 :   MachineBasicBlock &MBB = *LatestMI->getParent();
     889         996 :   unsigned Offset = getMemoryOpOffset(*First);
     890         996 :   unsigned Base = getLoadStoreBaseOp(*First).getReg();
     891             :   bool BaseKill = LatestMI->killsRegister(Base);
     892         996 :   unsigned PredReg = 0;
     893         996 :   ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg);
     894             :   DebugLoc DL = First->getDebugLoc();
     895             :   MachineInstr *Merged = nullptr;
     896         996 :   if (Cand.CanMergeToLSDouble)
     897         157 :     Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
     898             :                                    Opcode, Pred, PredReg, DL, Regs);
     899         996 :   if (!Merged && Cand.CanMergeToLSMulti)
     900         839 :     Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
     901             :                                   Opcode, Pred, PredReg, DL, Regs);
     902         996 :   if (!Merged)
     903             :     return nullptr;
     904             : 
     905             :   // Determine earliest instruction that will get removed. We then keep an
     906             :   // iterator just above it so the following erases don't invalidated it.
     907        1540 :   iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
     908             :   bool EarliestAtBegin = false;
     909         770 :   if (EarliestI == MBB.begin()) {
     910             :     EarliestAtBegin = true;
     911             :   } else {
     912         712 :     EarliestI = std::prev(EarliestI);
     913             :   }
     914             : 
     915             :   // Remove instructions which have been merged.
     916        3352 :   for (MachineInstr *MI : Cand.Instrs)
     917             :     MBB.erase(MI);
     918             : 
     919             :   // Determine range between the earliest removed instruction and the new one.
     920         770 :   if (EarliestAtBegin)
     921             :     EarliestI = MBB.begin();
     922             :   else
     923         712 :     EarliestI = std::next(EarliestI);
     924             :   auto FixupRange = make_range(EarliestI, iterator(Merged));
     925             : 
     926             :   if (isLoadSingle(Opcode)) {
     927             :     // If the previous loads defined a super-reg, then we have to mark earlier
     928             :     // operands undef; Replicate the super-reg def on the merged instruction.
     929         823 :     for (MachineInstr &MI : FixupRange) {
     930         331 :       for (unsigned &ImpDefReg : ImpDefs) {
     931          11 :         for (MachineOperand &MO : MI.implicit_operands()) {
     932           2 :           if (!MO.isReg() || MO.getReg() != ImpDefReg)
     933             :             continue;
     934             :           if (MO.readsReg())
     935             :             MO.setIsUndef();
     936           1 :           else if (MO.isDef())
     937           1 :             ImpDefReg = 0;
     938             :         }
     939             :       }
     940             :     }
     941             : 
     942         501 :     MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
     943         528 :     for (unsigned ImpDef : ImpDefs)
     944          27 :       MIB.addReg(ImpDef, RegState::ImplicitDefine);
     945             :   } else {
     946             :     // Remove kill flags: We are possibly storing the values later now.
     947             :     assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
     948         335 :     for (MachineInstr &MI : FixupRange) {
     949         368 :       for (MachineOperand &MO : MI.uses()) {
     950         302 :         if (!MO.isReg() || !MO.isKill())
     951             :           continue;
     952          10 :         if (UsedRegs.count(MO.getReg()))
     953             :           MO.setIsKill(false);
     954             :       }
     955             :     }
     956             :     assert(ImpDefs.empty());
     957             :   }
     958             : 
     959             :   return Merged;
     960             : }
     961             : 
     962             : static bool isValidLSDoubleOffset(int Offset) {
     963        2000 :   unsigned Value = abs(Offset);
     964             :   // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
     965             :   // multiplied by 4.
     966        2000 :   return (Value % 4) == 0 && Value < 1024;
     967             : }
     968             : 
     969             : /// Return true for loads/stores that can be combined to a double/multi
     970             : /// operation without increasing the requirements for alignment.
     971          46 : static bool mayCombineMisaligned(const TargetSubtargetInfo &STI,
     972             :                                  const MachineInstr &MI) {
     973             :   // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no
     974             :   // difference.
     975          46 :   unsigned Opcode = MI.getOpcode();
     976             :   if (!isi32Load(Opcode) && !isi32Store(Opcode))
     977             :     return true;
     978             : 
     979             :   // Stack pointer alignment is out of the programmers control so we can trust
     980             :   // SP-relative loads/stores.
     981          43 :   if (getLoadStoreBaseOp(MI).getReg() == ARM::SP &&
     982           4 :       STI.getFrameLowering()->getTransientStackAlignment() >= 4)
     983           4 :     return true;
     984             :   return false;
     985             : }
     986             : 
     987             : /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
     988        9228 : void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
     989        9228 :   const MachineInstr *FirstMI = MemOps[0].MI;
     990        9228 :   unsigned Opcode = FirstMI->getOpcode();
     991             :   bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
     992        9228 :   unsigned Size = getLSMultipleTransferSize(FirstMI);
     993             : 
     994             :   unsigned SIndex = 0;
     995        9228 :   unsigned EIndex = MemOps.size();
     996             :   do {
     997             :     // Look at the first instruction.
     998       10157 :     const MachineInstr *MI = MemOps[SIndex].MI;
     999       10157 :     int Offset = MemOps[SIndex].Offset;
    1000             :     const MachineOperand &PMO = getLoadStoreRegOp(*MI);
    1001       10157 :     unsigned PReg = PMO.getReg();
    1002       10157 :     unsigned PRegNum = PMO.isUndef() ? std::numeric_limits<unsigned>::max()
    1003       10157 :                                      : TRI->getEncodingValue(PReg);
    1004             :     unsigned Latest = SIndex;
    1005             :     unsigned Earliest = SIndex;
    1006             :     unsigned Count = 1;
    1007             :     bool CanMergeToLSDouble =
    1008       12593 :       STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
    1009             :     // ARM errata 602117: LDRD with base in list may result in incorrect base
    1010             :     // register when interrupted or faulted.
    1011       10157 :     if (STI->isCortexM3() && isi32Load(Opcode) &&
    1012          37 :         PReg == getLoadStoreBaseOp(*MI).getReg())
    1013             :       CanMergeToLSDouble = false;
    1014             : 
    1015             :     bool CanMergeToLSMulti = true;
    1016             :     // On swift vldm/vstm starting with an odd register number as that needs
    1017             :     // more uops than single vldrs.
    1018       10157 :     if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
    1019             :       CanMergeToLSMulti = false;
    1020             : 
    1021             :     // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
    1022             :     // deprecated; LDM to PC is fine but cannot happen here.
    1023       10157 :     if (PReg == ARM::SP || PReg == ARM::PC)
    1024             :       CanMergeToLSMulti = CanMergeToLSDouble = false;
    1025             : 
    1026             :     // Should we be conservative?
    1027       10157 :     if (AssumeMisalignedLoadStores && !mayCombineMisaligned(*STI, *MI))
    1028             :       CanMergeToLSMulti = CanMergeToLSDouble = false;
    1029             : 
    1030             :     // vldm / vstm limit are 32 for S variants, 16 for D variants.
    1031             :     unsigned Limit;
    1032       10157 :     switch (Opcode) {
    1033             :     default:
    1034             :       Limit = UINT_MAX;
    1035             :       break;
    1036        1996 :     case ARM::VLDRD:
    1037             :     case ARM::VSTRD:
    1038             :       Limit = 16;
    1039        1996 :       break;
    1040             :     }
    1041             : 
    1042             :     // Merge following instructions where possible.
    1043       12328 :     for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
    1044        3100 :       int NewOffset = MemOps[I].Offset;
    1045        3100 :       if (NewOffset != Offset + (int)Size)
    1046             :         break;
    1047        2959 :       const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
    1048        2959 :       unsigned Reg = MO.getReg();
    1049        2959 :       if (Reg == ARM::SP || Reg == ARM::PC)
    1050             :         break;
    1051        2959 :       if (Count == Limit)
    1052             :         break;
    1053             : 
    1054             :       // See if the current load/store may be part of a multi load/store.
    1055        2958 :       unsigned RegNum = MO.isUndef() ? std::numeric_limits<unsigned>::max()
    1056        2958 :                                      : TRI->getEncodingValue(Reg);
    1057             :       bool PartOfLSMulti = CanMergeToLSMulti;
    1058        2958 :       if (PartOfLSMulti) {
    1059             :         // Register numbers must be in ascending order.
    1060        2903 :         if (RegNum <= PRegNum)
    1061             :           PartOfLSMulti = false;
    1062             :         // For VFP / NEON load/store multiples, the registers must be
    1063             :         // consecutive and within the limit on the number of registers per
    1064             :         // instruction.
    1065        2156 :         else if (!isNotVFP && RegNum != PRegNum+1)
    1066             :           PartOfLSMulti = false;
    1067             :       }
    1068             :       // See if the current load/store may be part of a double load/store.
    1069        2958 :       bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
    1070             : 
    1071        2958 :       if (!PartOfLSMulti && !PartOfLSDouble)
    1072             :         break;
    1073             :       CanMergeToLSMulti &= PartOfLSMulti;
    1074             :       CanMergeToLSDouble &= PartOfLSDouble;
    1075             :       // Track MemOp with latest and earliest position (Positions are
    1076             :       // counted in reverse).
    1077        2171 :       unsigned Position = MemOps[I].Position;
    1078        4342 :       if (Position < MemOps[Latest].Position)
    1079             :         Latest = I;
    1080        1312 :       else if (Position > MemOps[Earliest].Position)
    1081             :         Earliest = I;
    1082             :       // Prepare for next MemOp.
    1083        2171 :       Offset += Size;
    1084             :       PRegNum = RegNum;
    1085             :     }
    1086             : 
    1087             :     // Form a candidate from the Ops collected so far.
    1088             :     MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
    1089       22485 :     for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
    1090       24656 :       Candidate->Instrs.push_back(MemOps[C].MI);
    1091       10157 :     Candidate->LatestMIIdx = Latest - SIndex;
    1092       10157 :     Candidate->EarliestMIIdx = Earliest - SIndex;
    1093       10157 :     Candidate->InsertPos = MemOps[Latest].Position;
    1094       10157 :     if (Count == 1)
    1095             :       CanMergeToLSMulti = CanMergeToLSDouble = false;
    1096       10157 :     Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
    1097       10157 :     Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
    1098       10157 :     Candidates.push_back(Candidate);
    1099             :     // Continue after the chain.
    1100             :     SIndex += Count;
    1101       10157 :   } while (SIndex < EIndex);
    1102        9228 : }
    1103             : 
    1104          12 : static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
    1105             :                                             ARM_AM::AMSubMode Mode) {
    1106          12 :   switch (Opc) {
    1107           0 :   default: llvm_unreachable("Unhandled opcode!");
    1108           2 :   case ARM::LDMIA:
    1109             :   case ARM::LDMDA:
    1110             :   case ARM::LDMDB:
    1111             :   case ARM::LDMIB:
    1112             :     switch (Mode) {
    1113           0 :     default: llvm_unreachable("Unhandled submode!");
    1114             :     case ARM_AM::ia: return ARM::LDMIA_UPD;
    1115             :     case ARM_AM::ib: return ARM::LDMIB_UPD;
    1116             :     case ARM_AM::da: return ARM::LDMDA_UPD;
    1117             :     case ARM_AM::db: return ARM::LDMDB_UPD;
    1118             :     }
    1119           1 :   case ARM::STMIA:
    1120             :   case ARM::STMDA:
    1121             :   case ARM::STMDB:
    1122             :   case ARM::STMIB:
    1123             :     switch (Mode) {
    1124           0 :     default: llvm_unreachable("Unhandled submode!");
    1125             :     case ARM_AM::ia: return ARM::STMIA_UPD;
    1126             :     case ARM_AM::ib: return ARM::STMIB_UPD;
    1127             :     case ARM_AM::da: return ARM::STMDA_UPD;
    1128             :     case ARM_AM::db: return ARM::STMDB_UPD;
    1129             :     }
    1130           2 :   case ARM::t2LDMIA:
    1131             :   case ARM::t2LDMDB:
    1132           2 :     switch (Mode) {
    1133           0 :     default: llvm_unreachable("Unhandled submode!");
    1134             :     case ARM_AM::ia: return ARM::t2LDMIA_UPD;
    1135           0 :     case ARM_AM::db: return ARM::t2LDMDB_UPD;
    1136             :     }
    1137           4 :   case ARM::t2STMIA:
    1138             :   case ARM::t2STMDB:
    1139           4 :     switch (Mode) {
    1140           0 :     default: llvm_unreachable("Unhandled submode!");
    1141             :     case ARM_AM::ia: return ARM::t2STMIA_UPD;
    1142           0 :     case ARM_AM::db: return ARM::t2STMDB_UPD;
    1143             :     }
    1144           0 :   case ARM::VLDMSIA:
    1145           0 :     switch (Mode) {
    1146           0 :     default: llvm_unreachable("Unhandled submode!");
    1147             :     case ARM_AM::ia: return ARM::VLDMSIA_UPD;
    1148           0 :     case ARM_AM::db: return ARM::VLDMSDB_UPD;
    1149             :     }
    1150           0 :   case ARM::VLDMDIA:
    1151           0 :     switch (Mode) {
    1152           0 :     default: llvm_unreachable("Unhandled submode!");
    1153             :     case ARM_AM::ia: return ARM::VLDMDIA_UPD;
    1154           0 :     case ARM_AM::db: return ARM::VLDMDDB_UPD;
    1155             :     }
    1156           0 :   case ARM::VSTMSIA:
    1157           0 :     switch (Mode) {
    1158           0 :     default: llvm_unreachable("Unhandled submode!");
    1159             :     case ARM_AM::ia: return ARM::VSTMSIA_UPD;
    1160           0 :     case ARM_AM::db: return ARM::VSTMSDB_UPD;
    1161             :     }
    1162           3 :   case ARM::VSTMDIA:
    1163           3 :     switch (Mode) {
    1164           0 :     default: llvm_unreachable("Unhandled submode!");
    1165             :     case ARM_AM::ia: return ARM::VSTMDIA_UPD;
    1166           0 :     case ARM_AM::db: return ARM::VSTMDDB_UPD;
    1167             :     }
    1168             :   }
    1169             : }
    1170             : 
    1171             : /// Check if the given instruction increments or decrements a register and
    1172             : /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
    1173             : /// generated by the instruction are possibly read as well.
    1174        6032 : static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
    1175             :                                   ARMCC::CondCodes Pred, unsigned PredReg) {
    1176             :   bool CheckCPSRDef;
    1177             :   int Scale;
    1178       12064 :   switch (MI.getOpcode()) {
    1179             :   case ARM::tADDi8:  Scale =  4; CheckCPSRDef = true; break;
    1180           0 :   case ARM::tSUBi8:  Scale = -4; CheckCPSRDef = true; break;
    1181          38 :   case ARM::t2SUBri:
    1182          38 :   case ARM::SUBri:   Scale = -1; CheckCPSRDef = true; break;
    1183         271 :   case ARM::t2ADDri:
    1184         271 :   case ARM::ADDri:   Scale =  1; CheckCPSRDef = true; break;
    1185          11 :   case ARM::tADDspi: Scale =  4; CheckCPSRDef = false; break;
    1186          14 :   case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
    1187             :   default: return 0;
    1188             :   }
    1189             : 
    1190             :   unsigned MIPredReg;
    1191         499 :   if (MI.getOperand(0).getReg() != Reg ||
    1192         278 :       MI.getOperand(1).getReg() != Reg ||
    1193         447 :       getInstrPredicate(MI, MIPredReg) != Pred ||
    1194          93 :       MIPredReg != PredReg)
    1195         241 :     return 0;
    1196             : 
    1197          93 :   if (CheckCPSRDef && definesCPSR(MI))
    1198             :     return 0;
    1199          93 :   return MI.getOperand(2).getImm() * Scale;
    1200             : }
    1201             : 
    1202             : /// Searches for an increment or decrement of \p Reg before \p MBBI.
    1203             : static MachineBasicBlock::iterator
    1204        3578 : findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
    1205             :                  ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
    1206        3578 :   Offset = 0;
    1207        3578 :   MachineBasicBlock &MBB = *MBBI->getParent();
    1208             :   MachineBasicBlock::iterator BeginMBBI = MBB.begin();
    1209        3578 :   MachineBasicBlock::iterator EndMBBI = MBB.end();
    1210        3578 :   if (MBBI == BeginMBBI)
    1211        1073 :     return EndMBBI;
    1212             : 
    1213             :   // Skip debug values.
    1214        2505 :   MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
    1215           5 :   while (PrevMBBI->isDebugInstr() && PrevMBBI != BeginMBBI)
    1216             :     --PrevMBBI;
    1217             : 
    1218        2505 :   Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
    1219        2522 :   return Offset == 0 ? EndMBBI : PrevMBBI;
    1220             : }
    1221             : 
    1222             : /// Searches for a increment or decrement of \p Reg after \p MBBI.
    1223             : static MachineBasicBlock::iterator
    1224        3567 : findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
    1225             :                 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
    1226        3567 :   Offset = 0;
    1227        3567 :   MachineBasicBlock &MBB = *MBBI->getParent();
    1228        3567 :   MachineBasicBlock::iterator EndMBBI = MBB.end();
    1229        3567 :   MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
    1230             :   // Skip debug values.
    1231        3571 :   while (NextMBBI != EndMBBI && NextMBBI->isDebugInstr())
    1232             :     ++NextMBBI;
    1233        3567 :   if (NextMBBI == EndMBBI)
    1234          40 :     return EndMBBI;
    1235             : 
    1236        3527 :   Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
    1237        3603 :   return Offset == 0 ? EndMBBI : NextMBBI;
    1238             : }
    1239             : 
    1240             : /// Fold proceeding/trailing inc/dec of base register into the
    1241             : /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
    1242             : ///
    1243             : /// stmia rn, <ra, rb, rc>
    1244             : /// rn := rn + 4 * 3;
    1245             : /// =>
    1246             : /// stmia rn!, <ra, rb, rc>
    1247             : ///
    1248             : /// rn := rn - 4 * 3;
    1249             : /// ldmia rn, <ra, rb, rc>
    1250             : /// =>
    1251             : /// ldmdb rn!, <ra, rb, rc>
    1252           0 : bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
    1253             :   // Thumb1 is already using updating loads/stores.
    1254           0 :   if (isThumb1) return false;
    1255             : 
    1256           0 :   const MachineOperand &BaseOP = MI->getOperand(0);
    1257           0 :   unsigned Base = BaseOP.getReg();
    1258             :   bool BaseKill = BaseOP.isKill();
    1259           0 :   unsigned PredReg = 0;
    1260           0 :   ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
    1261           0 :   unsigned Opcode = MI->getOpcode();
    1262             :   DebugLoc DL = MI->getDebugLoc();
    1263             : 
    1264             :   // Can't use an updating ld/st if the base register is also a dest
    1265             :   // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
    1266           0 :   for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
    1267           0 :     if (MI->getOperand(i).getReg() == Base)
    1268           0 :       return false;
    1269             : 
    1270           0 :   int Bytes = getLSMultipleTransferSize(MI);
    1271           0 :   MachineBasicBlock &MBB = *MI->getParent();
    1272             :   MachineBasicBlock::iterator MBBI(MI);
    1273             :   int Offset;
    1274             :   MachineBasicBlock::iterator MergeInstr
    1275           0 :     = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
    1276           0 :   ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
    1277           0 :   if (Mode == ARM_AM::ia && Offset == -Bytes) {
    1278             :     Mode = ARM_AM::db;
    1279           0 :   } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
    1280             :     Mode = ARM_AM::da;
    1281             :   } else {
    1282           0 :     MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
    1283           0 :     if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
    1284           0 :         ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) {
    1285             : 
    1286             :       // We couldn't find an inc/dec to merge. But if the base is dead, we
    1287             :       // can still change to a writeback form as that will save us 2 bytes
    1288             :       // of code size. It can create WAW hazards though, so only do it if
    1289             :       // we're minimizing code size.
    1290           0 :       if (!MBB.getParent()->getFunction().optForMinSize() || !BaseKill)
    1291           0 :         return false;
    1292             : 
    1293             :       bool HighRegsUsed = false;
    1294           0 :       for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
    1295           0 :         if (MI->getOperand(i).getReg() >= ARM::R8) {
    1296             :           HighRegsUsed = true;
    1297             :           break;
    1298             :         }
    1299             : 
    1300           0 :       if (!HighRegsUsed)
    1301             :         MergeInstr = MBB.end();
    1302             :       else
    1303           0 :         return false;
    1304             :     }
    1305             :   }
    1306           0 :   if (MergeInstr != MBB.end())
    1307           0 :     MBB.erase(MergeInstr);
    1308             : 
    1309           0 :   unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
    1310           0 :   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
    1311           0 :     .addReg(Base, getDefRegState(true)) // WB base register
    1312           0 :     .addReg(Base, getKillRegState(BaseKill))
    1313           0 :     .addImm(Pred).addReg(PredReg);
    1314             : 
    1315             :   // Transfer the rest of operands.
    1316           0 :   for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
    1317           0 :     MIB.add(MI->getOperand(OpNum));
    1318             : 
    1319             :   // Transfer memoperands.
    1320           0 :   MIB.setMemRefs(MI->memoperands());
    1321             : 
    1322           0 :   MBB.erase(MBBI);
    1323           0 :   return true;
    1324             : }
    1325             : 
    1326             : static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
    1327             :                                              ARM_AM::AddrOpc Mode) {
    1328           0 :   switch (Opc) {
    1329             :   case ARM::LDRi12:
    1330             :     return ARM::LDR_PRE_IMM;
    1331           0 :   case ARM::STRi12:
    1332             :     return ARM::STR_PRE_IMM;
    1333           0 :   case ARM::VLDRS:
    1334             :     return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
    1335           0 :   case ARM::VLDRD:
    1336             :     return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
    1337           0 :   case ARM::VSTRS:
    1338             :     return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
    1339           0 :   case ARM::VSTRD:
    1340             :     return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
    1341           0 :   case ARM::t2LDRi8:
    1342             :   case ARM::t2LDRi12:
    1343             :     return ARM::t2LDR_PRE;
    1344           0 :   case ARM::t2STRi8:
    1345             :   case ARM::t2STRi12:
    1346             :     return ARM::t2STR_PRE;
    1347           0 :   default: llvm_unreachable("Unhandled opcode!");
    1348             :   }
    1349             : }
    1350             : 
    1351             : static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
    1352             :                                               ARM_AM::AddrOpc Mode) {
    1353           0 :   switch (Opc) {
    1354             :   case ARM::LDRi12:
    1355             :     return ARM::LDR_POST_IMM;
    1356           0 :   case ARM::STRi12:
    1357             :     return ARM::STR_POST_IMM;
    1358           0 :   case ARM::VLDRS:
    1359             :     return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
    1360           0 :   case ARM::VLDRD:
    1361             :     return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
    1362           0 :   case ARM::VSTRS:
    1363             :     return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
    1364           0 :   case ARM::VSTRD:
    1365             :     return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
    1366           0 :   case ARM::t2LDRi8:
    1367             :   case ARM::t2LDRi12:
    1368             :     return ARM::t2LDR_POST;
    1369           0 :   case ARM::t2STRi8:
    1370             :   case ARM::t2STRi12:
    1371             :     return ARM::t2STR_POST;
    1372           0 :   default: llvm_unreachable("Unhandled opcode!");
    1373             :   }
    1374             : }
    1375             : 
    1376             : /// Fold proceeding/trailing inc/dec of base register into the
    1377             : /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
    1378           0 : bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
    1379             :   // Thumb1 doesn't have updating LDR/STR.
    1380             :   // FIXME: Use LDM/STM with single register instead.
    1381           0 :   if (isThumb1) return false;
    1382             : 
    1383           0 :   unsigned Base = getLoadStoreBaseOp(*MI).getReg();
    1384             :   bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
    1385           0 :   unsigned Opcode = MI->getOpcode();
    1386             :   DebugLoc DL = MI->getDebugLoc();
    1387           0 :   bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
    1388           0 :                 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
    1389           0 :   bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
    1390             :   if (isi32Load(Opcode) || isi32Store(Opcode))
    1391           0 :     if (MI->getOperand(2).getImm() != 0)
    1392           0 :       return false;
    1393           0 :   if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
    1394           0 :     return false;
    1395             : 
    1396             :   // Can't do the merge if the destination register is the same as the would-be
    1397             :   // writeback register.
    1398           0 :   if (MI->getOperand(0).getReg() == Base)
    1399           0 :     return false;
    1400             : 
    1401           0 :   unsigned PredReg = 0;
    1402           0 :   ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
    1403           0 :   int Bytes = getLSMultipleTransferSize(MI);
    1404           0 :   MachineBasicBlock &MBB = *MI->getParent();
    1405             :   MachineBasicBlock::iterator MBBI(MI);
    1406             :   int Offset;
    1407             :   MachineBasicBlock::iterator MergeInstr
    1408           0 :     = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
    1409             :   unsigned NewOpc;
    1410           0 :   if (!isAM5 && Offset == Bytes) {
    1411             :     NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
    1412           0 :   } else if (Offset == -Bytes) {
    1413             :     NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
    1414             :   } else {
    1415           0 :     MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
    1416           0 :     if (Offset == Bytes) {
    1417             :       NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
    1418           0 :     } else if (!isAM5 && Offset == -Bytes) {
    1419             :       NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
    1420             :     } else
    1421           0 :       return false;
    1422             :   }
    1423           0 :   MBB.erase(MergeInstr);
    1424             : 
    1425           0 :   ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
    1426             : 
    1427             :   bool isLd = isLoadSingle(Opcode);
    1428           0 :   if (isAM5) {
    1429             :     // VLDM[SD]_UPD, VSTM[SD]_UPD
    1430             :     // (There are no base-updating versions of VLDR/VSTR instructions, but the
    1431             :     // updating load/store-multiple instructions can be used with only one
    1432             :     // register.)
    1433           0 :     MachineOperand &MO = MI->getOperand(0);
    1434           0 :     BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
    1435           0 :       .addReg(Base, getDefRegState(true)) // WB base register
    1436           0 :       .addReg(Base, getKillRegState(isLd ? BaseKill : false))
    1437           0 :       .addImm(Pred).addReg(PredReg)
    1438             :       .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
    1439           0 :                             getKillRegState(MO.isKill())));
    1440           0 :   } else if (isLd) {
    1441           0 :     if (isAM2) {
    1442             :       // LDR_PRE, LDR_POST
    1443           0 :       if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
    1444           0 :         BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
    1445           0 :           .addReg(Base, RegState::Define)
    1446           0 :           .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
    1447             :       } else {
    1448           0 :         int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
    1449           0 :         BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
    1450           0 :             .addReg(Base, RegState::Define)
    1451           0 :             .addReg(Base)
    1452           0 :             .addReg(0)
    1453           0 :             .addImm(Imm)
    1454           0 :             .add(predOps(Pred, PredReg));
    1455             :       }
    1456             :     } else {
    1457             :       // t2LDR_PRE, t2LDR_POST
    1458           0 :       BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
    1459           0 :           .addReg(Base, RegState::Define)
    1460           0 :           .addReg(Base)
    1461           0 :           .addImm(Offset)
    1462           0 :           .add(predOps(Pred, PredReg));
    1463             :     }
    1464             :   } else {
    1465           0 :     MachineOperand &MO = MI->getOperand(0);
    1466             :     // FIXME: post-indexed stores use am2offset_imm, which still encodes
    1467             :     // the vestigal zero-reg offset register. When that's fixed, this clause
    1468             :     // can be removed entirely.
    1469           0 :     if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
    1470           0 :       int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
    1471             :       // STR_PRE, STR_POST
    1472           0 :       BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
    1473           0 :           .addReg(MO.getReg(), getKillRegState(MO.isKill()))
    1474           0 :           .addReg(Base)
    1475           0 :           .addReg(0)
    1476           0 :           .addImm(Imm)
    1477           0 :           .add(predOps(Pred, PredReg));
    1478             :     } else {
    1479             :       // t2STR_PRE, t2STR_POST
    1480           0 :       BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
    1481           0 :           .addReg(MO.getReg(), getKillRegState(MO.isKill()))
    1482           0 :           .addReg(Base)
    1483           0 :           .addImm(Offset)
    1484           0 :           .add(predOps(Pred, PredReg));
    1485             :     }
    1486             :   }
    1487           0 :   MBB.erase(MBBI);
    1488             : 
    1489           0 :   return true;
    1490             : }
    1491             : 
    1492           0 : bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
    1493           0 :   unsigned Opcode = MI.getOpcode();
    1494             :   assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
    1495             :          "Must have t2STRDi8 or t2LDRDi8");
    1496           0 :   if (MI.getOperand(3).getImm() != 0)
    1497           0 :     return false;
    1498             : 
    1499             :   // Behaviour for writeback is undefined if base register is the same as one
    1500             :   // of the others.
    1501             :   const MachineOperand &BaseOp = MI.getOperand(2);
    1502           0 :   unsigned Base = BaseOp.getReg();
    1503             :   const MachineOperand &Reg0Op = MI.getOperand(0);
    1504             :   const MachineOperand &Reg1Op = MI.getOperand(1);
    1505           0 :   if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
    1506           0 :     return false;
    1507             : 
    1508             :   unsigned PredReg;
    1509           0 :   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
    1510             :   MachineBasicBlock::iterator MBBI(MI);
    1511           0 :   MachineBasicBlock &MBB = *MI.getParent();
    1512             :   int Offset;
    1513             :   MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
    1514           0 :                                                             PredReg, Offset);
    1515             :   unsigned NewOpc;
    1516           0 :   if (Offset == 8 || Offset == -8) {
    1517           0 :     NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
    1518             :   } else {
    1519           0 :     MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
    1520           0 :     if (Offset == 8 || Offset == -8) {
    1521           0 :       NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
    1522             :     } else
    1523           0 :       return false;
    1524             :   }
    1525           0 :   MBB.erase(MergeInstr);
    1526             : 
    1527             :   DebugLoc DL = MI.getDebugLoc();
    1528           0 :   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
    1529           0 :   if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
    1530           0 :     MIB.add(Reg0Op).add(Reg1Op).addReg(BaseOp.getReg(), RegState::Define);
    1531             :   } else {
    1532             :     assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
    1533           0 :     MIB.addReg(BaseOp.getReg(), RegState::Define).add(Reg0Op).add(Reg1Op);
    1534             :   }
    1535           0 :   MIB.addReg(BaseOp.getReg(), RegState::Kill)
    1536           0 :      .addImm(Offset).addImm(Pred).addReg(PredReg);
    1537             :   assert(TII->get(Opcode).getNumOperands() == 6 &&
    1538             :          TII->get(NewOpc).getNumOperands() == 7 &&
    1539             :          "Unexpected number of operands in Opcode specification.");
    1540             : 
    1541             :   // Transfer implicit operands.
    1542           0 :   for (const MachineOperand &MO : MI.implicit_operands())
    1543             :     MIB.add(MO);
    1544           0 :   MIB.setMemRefs(MI.memoperands());
    1545             : 
    1546           0 :   MBB.erase(MBBI);
    1547             :   return true;
    1548             : }
    1549             : 
    1550             : /// Returns true if instruction is a memory operation that this pass is capable
    1551             : /// of operating on.
    1552      277487 : static bool isMemoryOp(const MachineInstr &MI) {
    1553      277487 :   unsigned Opcode = MI.getOpcode();
    1554      277487 :   switch (Opcode) {
    1555             :   case ARM::VLDRS:
    1556             :   case ARM::VSTRS:
    1557             :   case ARM::VLDRD:
    1558             :   case ARM::VSTRD:
    1559             :   case ARM::LDRi12:
    1560             :   case ARM::STRi12:
    1561             :   case ARM::tLDRi:
    1562             :   case ARM::tSTRi:
    1563             :   case ARM::tLDRspi:
    1564             :   case ARM::tSTRspi:
    1565             :   case ARM::t2LDRi8:
    1566             :   case ARM::t2LDRi12:
    1567             :   case ARM::t2STRi8:
    1568             :   case ARM::t2STRi12:
    1569             :     break;
    1570             :   default:
    1571             :     return false;
    1572             :   }
    1573       62854 :   if (!MI.getOperand(1).isReg())
    1574             :     return false;
    1575             : 
    1576             :   // When no memory operands are present, conservatively assume unaligned,
    1577             :   // volatile, unfoldable.
    1578       24790 :   if (!MI.hasOneMemOperand())
    1579             :     return false;
    1580             : 
    1581       23601 :   const MachineMemOperand &MMO = **MI.memoperands_begin();
    1582             : 
    1583             :   // Don't touch volatile memory accesses - we may be changing their order.
    1584       47202 :   if (MMO.isVolatile())
    1585             :     return false;
    1586             : 
    1587             :   // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
    1588             :   // not.
    1589       21758 :   if (MMO.getAlignment() < 4)
    1590             :     return false;
    1591             : 
    1592             :   // str <undef> could probably be eliminated entirely, but for now we just want
    1593             :   // to avoid making a mess of it.
    1594             :   // FIXME: Use str <undef> as a wildcard to enable better stm folding.
    1595       42988 :   if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
    1596             :     return false;
    1597             : 
    1598             :   // Likewise don't mess with references to undefined addresses.
    1599       21494 :   if (MI.getOperand(1).isUndef())
    1600          94 :     return false;
    1601             : 
    1602             :   return true;
    1603             : }
    1604             : 
    1605          10 : static void InsertLDR_STR(MachineBasicBlock &MBB,
    1606             :                           MachineBasicBlock::iterator &MBBI, int Offset,
    1607             :                           bool isDef, unsigned NewOpc, unsigned Reg,
    1608             :                           bool RegDeadKill, bool RegUndef, unsigned BaseReg,
    1609             :                           bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred,
    1610             :                           unsigned PredReg, const TargetInstrInfo *TII) {
    1611          10 :   if (isDef) {
    1612           2 :     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
    1613           4 :                                       TII->get(NewOpc))
    1614           2 :       .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
    1615           2 :       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
    1616           2 :     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
    1617             :   } else {
    1618           8 :     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
    1619          16 :                                       TII->get(NewOpc))
    1620           8 :       .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
    1621           8 :       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
    1622           8 :     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
    1623             :   }
    1624          10 : }
    1625             : 
    1626      134489 : bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
    1627             :                                           MachineBasicBlock::iterator &MBBI) {
    1628             :   MachineInstr *MI = &*MBBI;
    1629      134489 :   unsigned Opcode = MI->getOpcode();
    1630             :   // FIXME: Code/comments below check Opcode == t2STRDi8, but this check returns
    1631             :   // if we see this opcode.
    1632      134489 :   if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
    1633             :     return false;
    1634             : 
    1635         154 :   const MachineOperand &BaseOp = MI->getOperand(2);
    1636         154 :   unsigned BaseReg = BaseOp.getReg();
    1637         154 :   unsigned EvenReg = MI->getOperand(0).getReg();
    1638         154 :   unsigned OddReg  = MI->getOperand(1).getReg();
    1639         154 :   unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
    1640         154 :   unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
    1641             : 
    1642             :   // ARM errata 602117: LDRD with base in list may result in incorrect base
    1643             :   // register when interrupted or faulted.
    1644          43 :   bool Errata602117 = EvenReg == BaseReg &&
    1645         154 :     (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
    1646             :   // ARM LDRD/STRD needs consecutive registers.
    1647         154 :   bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
    1648         103 :     (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
    1649             : 
    1650         154 :   if (!Errata602117 && !NonConsecutiveRegs)
    1651             :     return false;
    1652             : 
    1653          17 :   bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
    1654          17 :   bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
    1655          17 :   bool EvenDeadKill = isLd ?
    1656          17 :     MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
    1657          17 :   bool EvenUndef = MI->getOperand(0).isUndef();
    1658          17 :   bool OddDeadKill  = isLd ?
    1659             :     MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
    1660             :   bool OddUndef = MI->getOperand(1).isUndef();
    1661             :   bool BaseKill = BaseOp.isKill();
    1662             :   bool BaseUndef = BaseOp.isUndef();
    1663             :   assert((isT2 || MI->getOperand(3).getReg() == ARM::NoRegister) &&
    1664             :          "register offset not handled below");
    1665          17 :   int OffImm = getMemoryOpOffset(*MI);
    1666          17 :   unsigned PredReg = 0;
    1667          17 :   ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
    1668             : 
    1669          17 :   if (OddRegNum > EvenRegNum && OffImm == 0) {
    1670             :     // Ascending register numbers and no offset. It's safe to change it to a
    1671             :     // ldm or stm.
    1672             :     unsigned NewOpc = (isLd)
    1673          12 :       ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
    1674             :       : (isT2 ? ARM::t2STMIA : ARM::STMIA);
    1675          12 :     if (isLd) {
    1676           6 :       BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
    1677           2 :         .addReg(BaseReg, getKillRegState(BaseKill))
    1678           2 :         .addImm(Pred).addReg(PredReg)
    1679           4 :         .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
    1680           2 :         .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
    1681             :       ++NumLDRD2LDM;
    1682             :     } else {
    1683          30 :       BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
    1684          10 :         .addReg(BaseReg, getKillRegState(BaseKill))
    1685          10 :         .addImm(Pred).addReg(PredReg)
    1686             :         .addReg(EvenReg,
    1687          20 :                 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
    1688             :         .addReg(OddReg,
    1689          10 :                 getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
    1690             :       ++NumSTRD2STM;
    1691             :     }
    1692             :   } else {
    1693             :     // Split into two instructions.
    1694             :     unsigned NewOpc = (isLd)
    1695           5 :       ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
    1696             :       : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
    1697             :     // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
    1698             :     // so adjust and use t2LDRi12 here for that.
    1699             :     unsigned NewOpc2 = (isLd)
    1700           5 :       ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
    1701             :       : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
    1702             :     // If this is a load, make sure the first load does not clobber the base
    1703             :     // register before the second load reads it.
    1704           5 :     if (isLd && TRI->regsOverlap(EvenReg, BaseReg)) {
    1705             :       assert(!TRI->regsOverlap(OddReg, BaseReg));
    1706           1 :       InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
    1707             :                     false, BaseReg, false, BaseUndef, Pred, PredReg, TII);
    1708           1 :       InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
    1709             :                     false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII);
    1710             :     } else {
    1711           4 :       if (OddReg == EvenReg && EvenDeadKill) {
    1712             :         // If the two source operands are the same, the kill marker is
    1713             :         // probably on the first one. e.g.
    1714             :         // t2STRDi8 killed %r5, %r5, killed %r9, 0, 14, %reg0
    1715             :         EvenDeadKill = false;
    1716             :         OddDeadKill = true;
    1717             :       }
    1718             :       // Never kill the base register in the first instruction.
    1719           4 :       if (EvenReg == BaseReg)
    1720             :         EvenDeadKill = false;
    1721           4 :       InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
    1722             :                     EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII);
    1723           4 :       InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
    1724             :                     OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII);
    1725             :     }
    1726             :     if (isLd)
    1727             :       ++NumLDRD2LDR;
    1728             :     else
    1729             :       ++NumSTRD2STR;
    1730             :   }
    1731             : 
    1732          17 :   MBBI = MBB.erase(MBBI);
    1733          17 :   return true;
    1734             : }
    1735             : 
    1736             : /// An optimization pass to turn multiple LDR / STR ops of the same base and
    1737             : /// incrementing offset into LDM / STM ops.
    1738       18397 : bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
    1739       18397 :   MemOpQueue MemOps;
    1740             :   unsigned CurrBase = 0;
    1741             :   unsigned CurrOpc = ~0u;
    1742             :   ARMCC::CondCodes CurrPred = ARMCC::AL;
    1743             :   unsigned Position = 0;
    1744             :   assert(Candidates.size() == 0);
    1745             :   assert(MergeBaseCandidates.size() == 0);
    1746       18397 :   LiveRegsValid = false;
    1747             : 
    1748      152886 :   for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
    1749      134489 :        I = MBBI) {
    1750             :     // The instruction in front of the iterator is the one we look at.
    1751      134489 :     MBBI = std::prev(I);
    1752      134489 :     if (FixInvalidRegPairOp(MBB, MBBI))
    1753             :       continue;
    1754      134472 :     ++Position;
    1755             : 
    1756      134472 :     if (isMemoryOp(*MBBI)) {
    1757       15086 :       unsigned Opcode = MBBI->getOpcode();
    1758       15086 :       const MachineOperand &MO = MBBI->getOperand(0);
    1759       15086 :       unsigned Reg = MO.getReg();
    1760       15086 :       unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
    1761       15086 :       unsigned PredReg = 0;
    1762       15086 :       ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg);
    1763       15086 :       int Offset = getMemoryOpOffset(*MBBI);
    1764       15086 :       if (CurrBase == 0) {
    1765             :         // Start of a new chain.
    1766             :         CurrBase = Base;
    1767             :         CurrOpc  = Opcode;
    1768             :         CurrPred = Pred;
    1769        9228 :         MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
    1770       12328 :         continue;
    1771             :       }
    1772             :       // Note: No need to match PredReg in the next if.
    1773        5858 :       if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
    1774             :         // Watch out for:
    1775             :         //   r4 := ldr [r0, #8]
    1776             :         //   r4 := ldr [r0, #4]
    1777             :         // or
    1778             :         //   r0 := ldr [r0]
    1779             :         // If a load overrides the base register or a register loaded by
    1780             :         // another load in our chain, we cannot take this instruction.
    1781             :         bool Overlap = false;
    1782             :         if (isLoadSingle(Opcode)) {
    1783        2075 :           Overlap = (Base == Reg);
    1784        2075 :           if (!Overlap) {
    1785        6322 :             for (const MemOpQueueEntry &E : MemOps) {
    1786        4338 :               if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
    1787             :                 Overlap = true;
    1788             :                 break;
    1789             :               }
    1790             :             }
    1791             :           }
    1792             :         }
    1793             : 
    1794        2075 :         if (!Overlap) {
    1795             :           // Check offset and sort memory operation into the current chain.
    1796        3101 :           if (Offset > MemOps.back().Offset) {
    1797        1028 :             MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
    1798        1028 :             continue;
    1799             :           } else {
    1800             :             MemOpQueue::iterator MI, ME;
    1801        2328 :             for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
    1802        2328 :               if (Offset < MI->Offset) {
    1803             :                 // Found a place to insert.
    1804             :                 break;
    1805             :               }
    1806         256 :               if (Offset == MI->Offset) {
    1807             :                 // Collision, abort.
    1808             :                 MI = ME;
    1809             :                 break;
    1810             :               }
    1811             :             }
    1812        2073 :             if (MI != MemOps.end()) {
    1813        2072 :               MemOps.insert(MI, MemOpQueueEntry(*MBBI, Offset, Position));
    1814        2072 :               continue;
    1815             :             }
    1816             :           }
    1817             :         }
    1818             :       }
    1819             : 
    1820             :       // Don't advance the iterator; The op will start a new chain next.
    1821        2758 :       MBBI = I;
    1822             :       --Position;
    1823             :       // Fallthrough to look into existing chain.
    1824             :     } else if (MBBI->isDebugInstr()) {
    1825             :       continue;
    1826      119256 :     } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
    1827             :                MBBI->getOpcode() == ARM::t2STRDi8) {
    1828             :       // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
    1829             :       // remember them because we may still be able to merge add/sub into them.
    1830         129 :       MergeBaseCandidates.push_back(&*MBBI);
    1831             :     }
    1832             : 
    1833             :     // If we are here then the chain is broken; Extract candidates for a merge.
    1834      122014 :     if (MemOps.size() > 0) {
    1835        7793 :       FormCandidates(MemOps);
    1836             :       // Reset for the next chain.
    1837             :       CurrBase = 0;
    1838             :       CurrOpc = ~0u;
    1839             :       CurrPred = ARMCC::AL;
    1840             :       MemOps.clear();
    1841             :     }
    1842             :   }
    1843       18397 :   if (MemOps.size() > 0)
    1844        1435 :     FormCandidates(MemOps);
    1845             : 
    1846             :   // Sort candidates so they get processed from end to begin of the basic
    1847             :   // block later; This is necessary for liveness calculation.
    1848             :   auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
    1849           0 :     return M0->InsertPos < M1->InsertPos;
    1850             :   };
    1851       18397 :   llvm::sort(Candidates, LessThan);
    1852             : 
    1853             :   // Go through list of candidates and merge.
    1854             :   bool Changed = false;
    1855       28554 :   for (const MergeCandidate *Candidate : Candidates) {
    1856       10157 :     if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
    1857         996 :       MachineInstr *Merged = MergeOpsUpdate(*Candidate);
    1858             :       // Merge preceding/trailing base inc/dec into the merged op.
    1859         996 :       if (Merged) {
    1860             :         Changed = true;
    1861         770 :         unsigned Opcode = Merged->getOpcode();
    1862         770 :         if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
    1863         157 :           MergeBaseUpdateLSDouble(*Merged);
    1864             :         else
    1865         613 :           MergeBaseUpdateLSMultiple(Merged);
    1866             :       } else {
    1867         811 :         for (MachineInstr *MI : Candidate->Instrs) {
    1868         585 :           if (MergeBaseUpdateLoadStore(MI))
    1869             :             Changed = true;
    1870             :         }
    1871             :       }
    1872             :     } else {
    1873             :       assert(Candidate->Instrs.size() == 1);
    1874        9161 :       if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
    1875             :         Changed = true;
    1876             :     }
    1877             :   }
    1878             :   Candidates.clear();
    1879             :   // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
    1880       18526 :   for (MachineInstr *MI : MergeBaseCandidates)
    1881         129 :     MergeBaseUpdateLSDouble(*MI);
    1882             :   MergeBaseCandidates.clear();
    1883             : 
    1884       18397 :   return Changed;
    1885             : }
    1886             : 
    1887             : /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
    1888             : /// into the preceding stack restore so it directly restore the value of LR
    1889             : /// into pc.
    1890             : ///   ldmfd sp!, {..., lr}
    1891             : ///   bx lr
    1892             : /// or
    1893             : ///   ldmfd sp!, {..., lr}
    1894             : ///   mov pc, lr
    1895             : /// =>
    1896             : ///   ldmfd sp!, {..., pc}
    1897       14022 : bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
    1898             :   // Thumb1 LDM doesn't allow high registers.
    1899       14022 :   if (isThumb1) return false;
    1900       12433 :   if (MBB.empty()) return false;
    1901             : 
    1902       12392 :   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
    1903       12392 :   if (MBBI != MBB.begin() && MBBI != MBB.end() &&
    1904       11140 :       (MBBI->getOpcode() == ARM::BX_RET ||
    1905        4988 :        MBBI->getOpcode() == ARM::tBX_RET ||
    1906             :        MBBI->getOpcode() == ARM::MOVPCLR)) {
    1907        6152 :     MachineBasicBlock::iterator PrevI = std::prev(MBBI);
    1908             :     // Ignore any debug instructions.
    1909           7 :     while (PrevI->isDebugInstr() && PrevI != MBB.begin())
    1910             :       --PrevI;
    1911             :     MachineInstr &PrevMI = *PrevI;
    1912             :     unsigned Opcode = PrevMI.getOpcode();
    1913        6152 :     if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
    1914        6144 :         Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
    1915        6144 :         Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
    1916          25 :       MachineOperand &MO = PrevMI.getOperand(PrevMI.getNumOperands() - 1);
    1917          25 :       if (MO.getReg() != ARM::LR)
    1918          25 :         return false;
    1919          20 :       unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
    1920             :       assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
    1921             :               Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
    1922          20 :       PrevMI.setDesc(TII->get(NewOpc));
    1923          20 :       MO.setReg(ARM::PC);
    1924          20 :       PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI);
    1925          20 :       MBB.erase(MBBI);
    1926             :       // We now restore LR into PC so it is not live-out of the return block
    1927             :       // anymore: Clear the CSI Restored bit.
    1928          20 :       MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo();
    1929             :       // CSI should be fixed after PrologEpilog Insertion
    1930             :       assert(MFI.isCalleeSavedInfoValid() && "CSI should be valid");
    1931          20 :       for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
    1932          20 :         if (Info.getReg() == ARM::LR) {
    1933             :           Info.setRestored(false);
    1934             :           break;
    1935             :         }
    1936             :       }
    1937          20 :       return true;
    1938             :     }
    1939             :   }
    1940             :   return false;
    1941             : }
    1942             : 
    1943           0 : bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
    1944           0 :   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
    1945           0 :   if (MBBI == MBB.begin() || MBBI == MBB.end() ||
    1946           0 :       MBBI->getOpcode() != ARM::tBX_RET)
    1947           0 :     return false;
    1948             : 
    1949           0 :   MachineBasicBlock::iterator Prev = MBBI;
    1950             :   --Prev;
    1951           0 :   if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR))
    1952           0 :     return false;
    1953             : 
    1954           0 :   for (auto Use : Prev->uses())
    1955           0 :     if (Use.isKill()) {
    1956             :       assert(STI->hasV4TOps());
    1957           0 :       BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
    1958           0 :           .addReg(Use.getReg(), RegState::Kill)
    1959           0 :           .add(predOps(ARMCC::AL))
    1960             :           .copyImplicitOps(*MBBI);
    1961           0 :       MBB.erase(MBBI);
    1962           0 :       MBB.erase(Prev);
    1963           0 :       return true;
    1964             :     }
    1965             : 
    1966           0 :   llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
    1967             : }
    1968             : 
    1969       13379 : bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
    1970       13379 :   if (skipFunction(Fn.getFunction()))
    1971             :     return false;
    1972             : 
    1973       13371 :   MF = &Fn;
    1974       13371 :   STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
    1975       13371 :   TL = STI->getTargetLowering();
    1976       13371 :   AFI = Fn.getInfo<ARMFunctionInfo>();
    1977       13371 :   TII = STI->getInstrInfo();
    1978       13371 :   TRI = STI->getRegisterInfo();
    1979             : 
    1980       13371 :   RegClassInfoValid = false;
    1981       13371 :   isThumb2 = AFI->isThumb2Function();
    1982       25589 :   isThumb1 = AFI->isThumbFunction() && !isThumb2;
    1983             : 
    1984             :   bool Modified = false;
    1985       31768 :   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
    1986             :        ++MFI) {
    1987             :     MachineBasicBlock &MBB = *MFI;
    1988       18397 :     Modified |= LoadStoreMultipleOpti(MBB);
    1989       18397 :     if (STI->hasV5TOps())
    1990       14022 :       Modified |= MergeReturnIntoLDM(MBB);
    1991       18397 :     if (isThumb1)
    1992        2164 :       Modified |= CombineMovBx(MBB);
    1993             :   }
    1994             : 
    1995       13371 :   Allocator.DestroyAll();
    1996       13371 :   return Modified;
    1997             : }
    1998             : 
    1999             : #define ARM_PREALLOC_LOAD_STORE_OPT_NAME                                       \
    2000             :   "ARM pre- register allocation load / store optimization pass"
    2001             : 
    2002             : namespace {
    2003             : 
    2004             :   /// Pre- register allocation pass that move load / stores from consecutive
    2005             :   /// locations close to make it more likely they will be combined later.
    2006             :   struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
    2007             :     static char ID;
    2008             : 
    2009             :     AliasAnalysis *AA;
    2010             :     const DataLayout *TD;
    2011             :     const TargetInstrInfo *TII;
    2012             :     const TargetRegisterInfo *TRI;
    2013             :     const ARMSubtarget *STI;
    2014             :     MachineRegisterInfo *MRI;
    2015             :     MachineFunction *MF;
    2016             : 
    2017        2571 :     ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
    2018             : 
    2019             :     bool runOnMachineFunction(MachineFunction &Fn) override;
    2020             : 
    2021        2559 :     StringRef getPassName() const override {
    2022        2559 :       return ARM_PREALLOC_LOAD_STORE_OPT_NAME;
    2023             :     }
    2024             : 
    2025        2559 :     void getAnalysisUsage(AnalysisUsage &AU) const override {
    2026             :       AU.addRequired<AAResultsWrapperPass>();
    2027        2559 :       MachineFunctionPass::getAnalysisUsage(AU);
    2028        2559 :     }
    2029             : 
    2030             :   private:
    2031             :     bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
    2032             :                           unsigned &NewOpc, unsigned &EvenReg,
    2033             :                           unsigned &OddReg, unsigned &BaseReg,
    2034             :                           int &Offset,
    2035             :                           unsigned &PredReg, ARMCC::CondCodes &Pred,
    2036             :                           bool &isT2);
    2037             :     bool RescheduleOps(MachineBasicBlock *MBB,
    2038             :                        SmallVectorImpl<MachineInstr *> &Ops,
    2039             :                        unsigned Base, bool isLd,
    2040             :                        DenseMap<MachineInstr*, unsigned> &MI2LocMap);
    2041             :     bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
    2042             :   };
    2043             : 
    2044             : } // end anonymous namespace
    2045             : 
    2046             : char ARMPreAllocLoadStoreOpt::ID = 0;
    2047             : 
    2048      199024 : INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
    2049             :                 ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
    2050             : 
    2051       13378 : bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
    2052       13378 :   if (AssumeMisalignedLoadStores || skipFunction(Fn.getFunction()))
    2053          27 :     return false;
    2054             : 
    2055       13351 :   TD = &Fn.getDataLayout();
    2056       13351 :   STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
    2057       13351 :   TII = STI->getInstrInfo();
    2058       13351 :   TRI = STI->getRegisterInfo();
    2059       13351 :   MRI = &Fn.getRegInfo();
    2060       13351 :   MF  = &Fn;
    2061       13351 :   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
    2062             : 
    2063             :   bool Modified = false;
    2064       31923 :   for (MachineBasicBlock &MFI : Fn)
    2065       18572 :     Modified |= RescheduleLoadStoreInstrs(&MFI);
    2066             : 
    2067             :   return Modified;
    2068             : }
    2069             : 
    2070         697 : static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
    2071             :                                       MachineBasicBlock::iterator I,
    2072             :                                       MachineBasicBlock::iterator E,
    2073             :                                       SmallPtrSetImpl<MachineInstr*> &MemOps,
    2074             :                                       SmallSet<unsigned, 4> &MemRegs,
    2075             :                                       const TargetRegisterInfo *TRI,
    2076             :                                       AliasAnalysis *AA) {
    2077             :   // Are there stores / loads / calls between them?
    2078         697 :   SmallSet<unsigned, 4> AddedRegPressure;
    2079        1766 :   while (++I != E) {
    2080        1099 :     if (I->isDebugInstr() || MemOps.count(&*I))
    2081         774 :       continue;
    2082         652 :     if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
    2083           9 :       return false;
    2084         317 :     if (I->mayStore() || (!isLd && I->mayLoad()))
    2085         103 :       for (MachineInstr *MemOp : MemOps)
    2086          86 :         if (I->mayAlias(AA, *MemOp, /*UseTBAA*/ false))
    2087          22 :           return false;
    2088        1847 :     for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
    2089        1552 :       MachineOperand &MO = I->getOperand(j);
    2090        1552 :       if (!MO.isReg())
    2091         448 :         continue;
    2092        1104 :       unsigned Reg = MO.getReg();
    2093        1104 :       if (MO.isDef() && TRI->regsOverlap(Reg, Base))
    2094           0 :         return false;
    2095        1104 :       if (Reg != Base && !MemRegs.count(Reg))
    2096         862 :         AddedRegPressure.insert(Reg);
    2097             :     }
    2098             :   }
    2099             : 
    2100             :   // Estimate register pressure increase due to the transformation.
    2101         666 :   if (MemRegs.size() <= 4)
    2102             :     // Ok if we are moving small number of instructions.
    2103             :     return true;
    2104          84 :   return AddedRegPressure.size() <= MemRegs.size() * 2;
    2105             : }
    2106             : 
    2107             : bool
    2108         389 : ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
    2109             :                                           DebugLoc &dl, unsigned &NewOpc,
    2110             :                                           unsigned &FirstReg,
    2111             :                                           unsigned &SecondReg,
    2112             :                                           unsigned &BaseReg, int &Offset,
    2113             :                                           unsigned &PredReg,
    2114             :                                           ARMCC::CondCodes &Pred,
    2115             :                                           bool &isT2) {
    2116             :   // Make sure we're allowed to generate LDRD/STRD.
    2117         389 :   if (!STI->hasV5TEOps())
    2118             :     return false;
    2119             : 
    2120             :   // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
    2121             :   unsigned Scale = 1;
    2122         333 :   unsigned Opcode = Op0->getOpcode();
    2123         333 :   if (Opcode == ARM::LDRi12) {
    2124          37 :     NewOpc = ARM::LDRD;
    2125         296 :   } else if (Opcode == ARM::STRi12) {
    2126          68 :     NewOpc = ARM::STRD;
    2127         228 :   } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
    2128          58 :     NewOpc = ARM::t2LDRDi8;
    2129             :     Scale = 4;
    2130          58 :     isT2 = true;
    2131         170 :   } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
    2132          98 :     NewOpc = ARM::t2STRDi8;
    2133             :     Scale = 4;
    2134          98 :     isT2 = true;
    2135             :   } else {
    2136             :     return false;
    2137             :   }
    2138             : 
    2139             :   // Make sure the base address satisfies i64 ld / st alignment requirement.
    2140             :   // At the moment, we ignore the memoryoperand's value.
    2141             :   // If we want to use AliasAnalysis, we should check it accordingly.
    2142         522 :   if (!Op0->hasOneMemOperand() ||
    2143         261 :       (*Op0->memoperands_begin())->isVolatile())
    2144           0 :     return false;
    2145             : 
    2146         261 :   unsigned Align = (*Op0->memoperands_begin())->getAlignment();
    2147         261 :   const Function &Func = MF->getFunction();
    2148         261 :   unsigned ReqAlign = STI->hasV6Ops()
    2149         261 :     ? TD->getABITypeAlignment(Type::getInt64Ty(Func.getContext()))
    2150             :     : 8;  // Pre-v6 need 8-byte align
    2151         261 :   if (Align < ReqAlign)
    2152             :     return false;
    2153             : 
    2154             :   // Then make sure the immediate offset fits.
    2155         231 :   int OffImm = getMemoryOpOffset(*Op0);
    2156         231 :   if (isT2) {
    2157         132 :     int Limit = (1 << 8) * Scale;
    2158         132 :     if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
    2159             :       return false;
    2160         131 :     Offset = OffImm;
    2161             :   } else {
    2162             :     ARM_AM::AddrOpc AddSub = ARM_AM::add;
    2163          99 :     if (OffImm < 0) {
    2164             :       AddSub = ARM_AM::sub;
    2165           0 :       OffImm = - OffImm;
    2166             :     }
    2167          99 :     int Limit = (1 << 8) * Scale;
    2168          99 :     if (OffImm >= Limit || (OffImm & (Scale-1)))
    2169             :       return false;
    2170          99 :     Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
    2171             :   }
    2172         230 :   FirstReg = Op0->getOperand(0).getReg();
    2173         230 :   SecondReg = Op1->getOperand(0).getReg();
    2174         230 :   if (FirstReg == SecondReg)
    2175             :     return false;
    2176         225 :   BaseReg = Op0->getOperand(1).getReg();
    2177         225 :   Pred = getInstrPredicate(*Op0, PredReg);
    2178             :   dl = Op0->getDebugLoc();
    2179         225 :   return true;
    2180             : }
    2181             : 
    2182         681 : bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
    2183             :                                  SmallVectorImpl<MachineInstr *> &Ops,
    2184             :                                  unsigned Base, bool isLd,
    2185             :                                  DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
    2186             :   bool RetVal = false;
    2187             : 
    2188             :   // Sort by offset (in reverse order).
    2189             :   llvm::sort(Ops, [](const MachineInstr *LHS, const MachineInstr *RHS) {
    2190           0 :     int LOffset = getMemoryOpOffset(*LHS);
    2191           0 :     int ROffset = getMemoryOpOffset(*RHS);
    2192             :     assert(LHS == RHS || LOffset != ROffset);
    2193           0 :     return LOffset > ROffset;
    2194             :   });
    2195             : 
    2196             :   // The loads / stores of the same base are in order. Scan them from first to
    2197             :   // last and check for the following:
    2198             :   // 1. Any def of base.
    2199             :   // 2. Any gaps.
    2200        1430 :   while (Ops.size() > 1) {
    2201             :     unsigned FirstLoc = ~0U;
    2202             :     unsigned LastLoc = 0;
    2203             :     MachineInstr *FirstOp = nullptr;
    2204             :     MachineInstr *LastOp = nullptr;
    2205             :     int LastOffset = 0;
    2206             :     unsigned LastOpcode = 0;
    2207             :     unsigned LastBytes = 0;
    2208             :     unsigned NumMove = 0;
    2209        3012 :     for (int i = Ops.size() - 1; i >= 0; --i) {
    2210             :       // Make sure each operation has the same kind.
    2211        2367 :       MachineInstr *Op = Ops[i];
    2212             :       unsigned LSMOpcode
    2213        2367 :         = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
    2214        2367 :       if (LastOpcode && LSMOpcode != LastOpcode)
    2215             :         break;
    2216             : 
    2217             :       // Check that we have a continuous set of offsets.
    2218        2364 :       int Offset = getMemoryOpOffset(*Op);
    2219        2364 :       unsigned Bytes = getLSMultipleTransferSize(Op);
    2220        2364 :       if (LastBytes) {
    2221        1615 :         if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
    2222             :           break;
    2223             :       }
    2224             : 
    2225             :       // Don't try to reschedule too many instructions.
    2226        2302 :       if (NumMove == 8) // FIXME: Tune this limit.
    2227             :         break;
    2228             : 
    2229             :       // Found a mergable instruction; save information about it.
    2230        2263 :       ++NumMove;
    2231             :       LastOffset = Offset;
    2232             :       LastBytes = Bytes;
    2233             :       LastOpcode = LSMOpcode;
    2234             : 
    2235        2263 :       unsigned Loc = MI2LocMap[Op];
    2236        2263 :       if (Loc <= FirstLoc) {
    2237             :         FirstLoc = Loc;
    2238        1594 :         FirstOp = Op;
    2239             :       }
    2240        2263 :       if (Loc >= LastLoc) {
    2241             :         LastLoc = Loc;
    2242        1389 :         LastOp = Op;
    2243             :       }
    2244             :     }
    2245             : 
    2246         749 :     if (NumMove <= 1)
    2247             :       Ops.pop_back();
    2248             :     else {
    2249             :       SmallPtrSet<MachineInstr*, 4> MemOps;
    2250         707 :       SmallSet<unsigned, 4> MemRegs;
    2251        3635 :       for (size_t i = Ops.size() - NumMove, e = Ops.size(); i != e; ++i) {
    2252        2221 :         MemOps.insert(Ops[i]);
    2253        2221 :         MemRegs.insert(Ops[i]->getOperand(0).getReg());
    2254             :       }
    2255             : 
    2256             :       // Be conservative, if the instructions are too far apart, don't
    2257             :       // move them. We want to limit the increase of register pressure.
    2258         707 :       bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
    2259         707 :       if (DoMove)
    2260        1394 :         DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
    2261             :                                            MemOps, MemRegs, TRI, AA);
    2262         707 :       if (!DoMove) {
    2263         181 :         for (unsigned i = 0; i != NumMove; ++i)
    2264             :           Ops.pop_back();
    2265             :       } else {
    2266             :         // This is the new location for the loads / stores.
    2267         664 :         MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
    2268        3363 :         while (InsertPos != MBB->end() &&
    2269        1678 :                (MemOps.count(&*InsertPos) || InsertPos->isDebugInstr()))
    2270             :           ++InsertPos;
    2271             : 
    2272             :         // If we are moving a pair of loads / stores, see if it makes sense
    2273             :         // to try to allocate a pair of registers that can form register pairs.
    2274         664 :         MachineInstr *Op0 = Ops.back();
    2275         664 :         MachineInstr *Op1 = Ops[Ops.size()-2];
    2276         664 :         unsigned FirstReg = 0, SecondReg = 0;
    2277         664 :         unsigned BaseReg = 0, PredReg = 0;
    2278         664 :         ARMCC::CondCodes Pred = ARMCC::AL;
    2279         664 :         bool isT2 = false;
    2280         664 :         unsigned NewOpc = 0;
    2281         664 :         int Offset = 0;
    2282         664 :         DebugLoc dl;
    2283         664 :         if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
    2284             :                                              FirstReg, SecondReg, BaseReg,
    2285             :                                              Offset, PredReg, Pred, isT2)) {
    2286             :           Ops.pop_back();
    2287             :           Ops.pop_back();
    2288             : 
    2289         225 :           const MCInstrDesc &MCID = TII->get(NewOpc);
    2290         225 :           const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
    2291         225 :           MRI->constrainRegClass(FirstReg, TRC);
    2292         225 :           MRI->constrainRegClass(SecondReg, TRC);
    2293             : 
    2294             :           // Form the pair instruction.
    2295         225 :           if (isLd) {
    2296          83 :             MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
    2297          83 :               .addReg(FirstReg, RegState::Define)
    2298          83 :               .addReg(SecondReg, RegState::Define)
    2299          83 :               .addReg(BaseReg);
    2300             :             // FIXME: We're converting from LDRi12 to an insn that still
    2301             :             // uses addrmode2, so we need an explicit offset reg. It should
    2302             :             // always by reg0 since we're transforming LDRi12s.
    2303          83 :             if (!isT2)
    2304          34 :               MIB.addReg(0);
    2305          83 :             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
    2306          83 :             MIB.cloneMergedMemRefs({Op0, Op1});
    2307             :             LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
    2308             :             ++NumLDRDFormed;
    2309             :           } else {
    2310         142 :             MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
    2311         142 :               .addReg(FirstReg)
    2312         142 :               .addReg(SecondReg)
    2313         142 :               .addReg(BaseReg);
    2314             :             // FIXME: We're converting from LDRi12 to an insn that still
    2315             :             // uses addrmode2, so we need an explicit offset reg. It should
    2316             :             // always by reg0 since we're transforming STRi12s.
    2317         142 :             if (!isT2)
    2318          64 :               MIB.addReg(0);
    2319         142 :             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
    2320         142 :             MIB.cloneMergedMemRefs({Op0, Op1});
    2321             :             LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
    2322             :             ++NumSTRDFormed;
    2323             :           }
    2324             :           MBB->erase(Op0);
    2325             :           MBB->erase(Op1);
    2326             : 
    2327         225 :           if (!isT2) {
    2328             :             // Add register allocation hints to form register pairs.
    2329          98 :             MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
    2330          98 :             MRI->setRegAllocationHint(SecondReg,  ARMRI::RegPairOdd, FirstReg);
    2331             :           }
    2332             :         } else {
    2333        2072 :           for (unsigned i = 0; i != NumMove; ++i) {
    2334        1633 :             MachineInstr *Op = Ops.back();
    2335             :             Ops.pop_back();
    2336        1633 :             MBB->splice(InsertPos, MBB, Op);
    2337             :           }
    2338             :         }
    2339             : 
    2340             :         NumLdStMoved += NumMove;
    2341             :         RetVal = true;
    2342             :       }
    2343             :     }
    2344             :   }
    2345             : 
    2346         681 :   return RetVal;
    2347             : }
    2348             : 
    2349             : bool
    2350       18572 : ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
    2351             :   bool RetVal = false;
    2352             : 
    2353             :   DenseMap<MachineInstr*, unsigned> MI2LocMap;
    2354             :   DenseMap<unsigned, SmallVector<MachineInstr *, 4>> Base2LdsMap;
    2355             :   DenseMap<unsigned, SmallVector<MachineInstr *, 4>> Base2StsMap;
    2356             :   SmallVector<unsigned, 4> LdBases;
    2357             :   SmallVector<unsigned, 4> StBases;
    2358             : 
    2359             :   unsigned Loc = 0;
    2360             :   MachineBasicBlock::iterator MBBI = MBB->begin();
    2361             :   MachineBasicBlock::iterator E = MBB->end();
    2362       45030 :   while (MBBI != E) {
    2363      169450 :     for (; MBBI != E; ++MBBI) {
    2364             :       MachineInstr &MI = *MBBI;
    2365      329024 :       if (MI.isCall() || MI.isTerminator()) {
    2366             :         // Stop at barriers.
    2367             :         ++MBBI;
    2368       25014 :         break;
    2369             :       }
    2370             : 
    2371             :       if (!MI.isDebugInstr())
    2372      142889 :         MI2LocMap[&MI] = ++Loc;
    2373             : 
    2374      143015 :       if (!isMemoryOp(MI))
    2375      136701 :         continue;
    2376        6314 :       unsigned PredReg = 0;
    2377        6314 :       if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
    2378             :         continue;
    2379             : 
    2380        6314 :       int Opc = MI.getOpcode();
    2381             :       bool isLd = isLoadSingle(Opc);
    2382        6314 :       unsigned Base = MI.getOperand(1).getReg();
    2383        6314 :       int Offset = getMemoryOpOffset(MI);
    2384             : 
    2385             :       bool StopHere = false;
    2386        6314 :       if (isLd) {
    2387             :         DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator BI =
    2388        3583 :           Base2LdsMap.find(Base);
    2389        3583 :         if (BI != Base2LdsMap.end()) {
    2390        2449 :           for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
    2391        3838 :             if (Offset == getMemoryOpOffset(*BI->second[i])) {
    2392             :               StopHere = true;
    2393             :               break;
    2394             :             }
    2395             :           }
    2396         551 :           if (!StopHere)
    2397         530 :             BI->second.push_back(&MI);
    2398             :         } else {
    2399        3032 :           Base2LdsMap[Base].push_back(&MI);
    2400        3032 :           LdBases.push_back(Base);
    2401             :         }
    2402             :       } else {
    2403             :         DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator BI =
    2404        2731 :           Base2StsMap.find(Base);
    2405        2731 :         if (BI != Base2StsMap.end()) {
    2406       18177 :           for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
    2407       34178 :             if (Offset == getMemoryOpOffset(*BI->second[i])) {
    2408             :               StopHere = true;
    2409             :               break;
    2410             :             }
    2411             :           }
    2412        1090 :           if (!StopHere)
    2413        1088 :             BI->second.push_back(&MI);
    2414             :         } else {
    2415        1641 :           Base2StsMap[Base].push_back(&MI);
    2416        1641 :           StBases.push_back(Base);
    2417             :         }
    2418             :       }
    2419             : 
    2420        1641 :       if (StopHere) {
    2421             :         // Found a duplicate (a base+offset combination that's seen earlier).
    2422             :         // Backtrack.
    2423          23 :         --Loc;
    2424          23 :         break;
    2425             :       }
    2426             :     }
    2427             : 
    2428             :     // Re-schedule loads.
    2429       29490 :     for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
    2430        6064 :       unsigned Base = LdBases[i];
    2431        3032 :       SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
    2432        3032 :       if (Lds.size() > 1)
    2433         264 :         RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
    2434             :     }
    2435             : 
    2436             :     // Re-schedule stores.
    2437       28099 :     for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
    2438        3282 :       unsigned Base = StBases[i];
    2439        1641 :       SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
    2440        1641 :       if (Sts.size() > 1)
    2441         417 :         RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
    2442             :     }
    2443             : 
    2444       26458 :     if (MBBI != E) {
    2445        8160 :       Base2LdsMap.clear();
    2446        8160 :       Base2StsMap.clear();
    2447             :       LdBases.clear();
    2448             :       StBases.clear();
    2449             :     }
    2450             :   }
    2451             : 
    2452       18572 :   return RetVal;
    2453             : }
    2454             : 
    2455             : /// Returns an instance of the load / store optimization pass.
    2456        5138 : FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
    2457        5138 :   if (PreAlloc)
    2458        5138 :     return new ARMPreAllocLoadStoreOpt();
    2459        2569 :   return new ARMLoadStoreOpt();
    2460             : }

Generated by: LCOV version 1.13