LLVM  3.7.0
ARMLoadStoreOptimizer.cpp
Go to the documentation of this file.
1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file This file contains a pass that performs load / store related peephole
11 /// optimizations. This pass should be run after register allocation.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ARM.h"
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
22 #include "ThumbRegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/Support/Allocator.h"
41 #include "llvm/Support/Debug.h"
47 using namespace llvm;
48 
49 #define DEBUG_TYPE "arm-ldst-opt"
50 
51 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
52 STATISTIC(NumSTMGened , "Number of stm instructions generated");
53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
62 
63 namespace {
64  /// Post- register allocation pass the combine load / store instructions to
65  /// form ldm / stm instructions.
66  struct ARMLoadStoreOpt : public MachineFunctionPass {
67  static char ID;
68  ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
69 
70  const MachineFunction *MF;
71  const TargetInstrInfo *TII;
72  const TargetRegisterInfo *TRI;
73  const MachineRegisterInfo *MRI;
74  const ARMSubtarget *STI;
75  const TargetLowering *TL;
76  ARMFunctionInfo *AFI;
77  LivePhysRegs LiveRegs;
78  RegisterClassInfo RegClassInfo;
80  bool LiveRegsValid;
81  bool RegClassInfoValid;
82  bool isThumb1, isThumb2;
83 
84  bool runOnMachineFunction(MachineFunction &Fn) override;
85 
86  const char *getPassName() const override {
87  return "ARM load / store optimization pass";
88  }
89 
90  private:
91  /// A set of load/store MachineInstrs with same base register sorted by
92  /// offset.
93  struct MemOpQueueEntry {
95  int Offset; ///< Load/Store offset.
96  unsigned Position; ///< Position as counted from end of basic block.
97  MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position)
98  : MI(MI), Offset(Offset), Position(Position) {}
99  };
100  typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
101 
102  /// A set of MachineInstrs that fulfill (nearly all) conditions to get
103  /// merged into a LDM/STM.
104  struct MergeCandidate {
105  /// List of instructions ordered by load/store offset.
107  /// Index in Instrs of the instruction being latest in the schedule.
108  unsigned LatestMIIdx;
109  /// Index in Instrs of the instruction being earliest in the schedule.
110  unsigned EarliestMIIdx;
111  /// Index into the basic block where the merged instruction will be
112  /// inserted. (See MemOpQueueEntry.Position)
113  unsigned InsertPos;
114  /// Whether the instructions can be merged into a ldm/stm instruction.
115  bool CanMergeToLSMulti;
116  /// Whether the instructions can be merged into a ldrd/strd instruction.
117  bool CanMergeToLSDouble;
118  };
121 
122  void moveLiveRegsBefore(const MachineBasicBlock &MBB,
124  unsigned findFreeReg(const TargetRegisterClass &RegClass);
125  void UpdateBaseRegUses(MachineBasicBlock &MBB,
127  DebugLoc DL, unsigned Base, unsigned WordOffset,
128  ARMCC::CondCodes Pred, unsigned PredReg);
129  MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,
130  MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
131  bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
132  DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs);
133  MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,
134  MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
135  bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
136  DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const;
137  void FormCandidates(const MemOpQueue &MemOps);
138  MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
139  bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
141  bool MergeBaseUpdateLoadStore(MachineInstr *MI);
142  bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
143  bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
144  bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
145  };
146  char ARMLoadStoreOpt::ID = 0;
147 }
148 
149 static bool definesCPSR(const MachineInstr *MI) {
150  for (const auto &MO : MI->operands()) {
151  if (!MO.isReg())
152  continue;
153  if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
154  // If the instruction has live CPSR def, then it's not safe to fold it
155  // into load / store.
156  return true;
157  }
158 
159  return false;
160 }
161 
162 static int getMemoryOpOffset(const MachineInstr *MI) {
163  unsigned Opcode = MI->getOpcode();
164  bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
165  unsigned NumOperands = MI->getDesc().getNumOperands();
166  unsigned OffField = MI->getOperand(NumOperands-3).getImm();
167 
168  if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
169  Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
170  Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
171  Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
172  return OffField;
173 
174  // Thumb1 immediate offsets are scaled by 4
175  if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
176  Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
177  return OffField * 4;
178 
179  int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
180  : ARM_AM::getAM5Offset(OffField) * 4;
181  ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
182  : ARM_AM::getAM5Op(OffField);
183 
184  if (Op == ARM_AM::sub)
185  return -Offset;
186 
187  return Offset;
188 }
189 
191  return MI.getOperand(1);
192 }
193 
195  return MI.getOperand(0);
196 }
197 
198 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
199  switch (Opcode) {
200  default: llvm_unreachable("Unhandled opcode!");
201  case ARM::LDRi12:
202  ++NumLDMGened;
203  switch (Mode) {
204  default: llvm_unreachable("Unhandled submode!");
205  case ARM_AM::ia: return ARM::LDMIA;
206  case ARM_AM::da: return ARM::LDMDA;
207  case ARM_AM::db: return ARM::LDMDB;
208  case ARM_AM::ib: return ARM::LDMIB;
209  }
210  case ARM::STRi12:
211  ++NumSTMGened;
212  switch (Mode) {
213  default: llvm_unreachable("Unhandled submode!");
214  case ARM_AM::ia: return ARM::STMIA;
215  case ARM_AM::da: return ARM::STMDA;
216  case ARM_AM::db: return ARM::STMDB;
217  case ARM_AM::ib: return ARM::STMIB;
218  }
219  case ARM::tLDRi:
220  case ARM::tLDRspi:
221  // tLDMIA is writeback-only - unless the base register is in the input
222  // reglist.
223  ++NumLDMGened;
224  switch (Mode) {
225  default: llvm_unreachable("Unhandled submode!");
226  case ARM_AM::ia: return ARM::tLDMIA;
227  }
228  case ARM::tSTRi:
229  case ARM::tSTRspi:
230  // There is no non-writeback tSTMIA either.
231  ++NumSTMGened;
232  switch (Mode) {
233  default: llvm_unreachable("Unhandled submode!");
234  case ARM_AM::ia: return ARM::tSTMIA_UPD;
235  }
236  case ARM::t2LDRi8:
237  case ARM::t2LDRi12:
238  ++NumLDMGened;
239  switch (Mode) {
240  default: llvm_unreachable("Unhandled submode!");
241  case ARM_AM::ia: return ARM::t2LDMIA;
242  case ARM_AM::db: return ARM::t2LDMDB;
243  }
244  case ARM::t2STRi8:
245  case ARM::t2STRi12:
246  ++NumSTMGened;
247  switch (Mode) {
248  default: llvm_unreachable("Unhandled submode!");
249  case ARM_AM::ia: return ARM::t2STMIA;
250  case ARM_AM::db: return ARM::t2STMDB;
251  }
252  case ARM::VLDRS:
253  ++NumVLDMGened;
254  switch (Mode) {
255  default: llvm_unreachable("Unhandled submode!");
256  case ARM_AM::ia: return ARM::VLDMSIA;
257  case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
258  }
259  case ARM::VSTRS:
260  ++NumVSTMGened;
261  switch (Mode) {
262  default: llvm_unreachable("Unhandled submode!");
263  case ARM_AM::ia: return ARM::VSTMSIA;
264  case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
265  }
266  case ARM::VLDRD:
267  ++NumVLDMGened;
268  switch (Mode) {
269  default: llvm_unreachable("Unhandled submode!");
270  case ARM_AM::ia: return ARM::VLDMDIA;
271  case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
272  }
273  case ARM::VSTRD:
274  ++NumVSTMGened;
275  switch (Mode) {
276  default: llvm_unreachable("Unhandled submode!");
277  case ARM_AM::ia: return ARM::VSTMDIA;
278  case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
279  }
280  }
281 }
282 
284  switch (Opcode) {
285  default: llvm_unreachable("Unhandled opcode!");
286  case ARM::LDMIA_RET:
287  case ARM::LDMIA:
288  case ARM::LDMIA_UPD:
289  case ARM::STMIA:
290  case ARM::STMIA_UPD:
291  case ARM::tLDMIA:
292  case ARM::tLDMIA_UPD:
293  case ARM::tSTMIA_UPD:
294  case ARM::t2LDMIA_RET:
295  case ARM::t2LDMIA:
296  case ARM::t2LDMIA_UPD:
297  case ARM::t2STMIA:
298  case ARM::t2STMIA_UPD:
299  case ARM::VLDMSIA:
300  case ARM::VLDMSIA_UPD:
301  case ARM::VSTMSIA:
302  case ARM::VSTMSIA_UPD:
303  case ARM::VLDMDIA:
304  case ARM::VLDMDIA_UPD:
305  case ARM::VSTMDIA:
306  case ARM::VSTMDIA_UPD:
307  return ARM_AM::ia;
308 
309  case ARM::LDMDA:
310  case ARM::LDMDA_UPD:
311  case ARM::STMDA:
312  case ARM::STMDA_UPD:
313  return ARM_AM::da;
314 
315  case ARM::LDMDB:
316  case ARM::LDMDB_UPD:
317  case ARM::STMDB:
318  case ARM::STMDB_UPD:
319  case ARM::t2LDMDB:
320  case ARM::t2LDMDB_UPD:
321  case ARM::t2STMDB:
322  case ARM::t2STMDB_UPD:
323  case ARM::VLDMSDB_UPD:
324  case ARM::VSTMSDB_UPD:
325  case ARM::VLDMDDB_UPD:
326  case ARM::VSTMDDB_UPD:
327  return ARM_AM::db;
328 
329  case ARM::LDMIB:
330  case ARM::LDMIB_UPD:
331  case ARM::STMIB:
332  case ARM::STMIB_UPD:
333  return ARM_AM::ib;
334  }
335 }
336 
337 static bool isT1i32Load(unsigned Opc) {
338  return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
339 }
340 
341 static bool isT2i32Load(unsigned Opc) {
342  return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
343 }
344 
345 static bool isi32Load(unsigned Opc) {
346  return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
347 }
348 
349 static bool isT1i32Store(unsigned Opc) {
350  return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
351 }
352 
353 static bool isT2i32Store(unsigned Opc) {
354  return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
355 }
356 
357 static bool isi32Store(unsigned Opc) {
358  return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
359 }
360 
361 static bool isLoadSingle(unsigned Opc) {
362  return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
363 }
364 
365 static unsigned getImmScale(unsigned Opc) {
366  switch (Opc) {
367  default: llvm_unreachable("Unhandled opcode!");
368  case ARM::tLDRi:
369  case ARM::tSTRi:
370  case ARM::tLDRspi:
371  case ARM::tSTRspi:
372  return 1;
373  case ARM::tLDRHi:
374  case ARM::tSTRHi:
375  return 2;
376  case ARM::tLDRBi:
377  case ARM::tSTRBi:
378  return 4;
379  }
380 }
381 
382 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
383  switch (MI->getOpcode()) {
384  default: return 0;
385  case ARM::LDRi12:
386  case ARM::STRi12:
387  case ARM::tLDRi:
388  case ARM::tSTRi:
389  case ARM::tLDRspi:
390  case ARM::tSTRspi:
391  case ARM::t2LDRi8:
392  case ARM::t2LDRi12:
393  case ARM::t2STRi8:
394  case ARM::t2STRi12:
395  case ARM::VLDRS:
396  case ARM::VSTRS:
397  return 4;
398  case ARM::VLDRD:
399  case ARM::VSTRD:
400  return 8;
401  case ARM::LDMIA:
402  case ARM::LDMDA:
403  case ARM::LDMDB:
404  case ARM::LDMIB:
405  case ARM::STMIA:
406  case ARM::STMDA:
407  case ARM::STMDB:
408  case ARM::STMIB:
409  case ARM::tLDMIA:
410  case ARM::tLDMIA_UPD:
411  case ARM::tSTMIA_UPD:
412  case ARM::t2LDMIA:
413  case ARM::t2LDMDB:
414  case ARM::t2STMIA:
415  case ARM::t2STMDB:
416  case ARM::VLDMSIA:
417  case ARM::VSTMSIA:
418  return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
419  case ARM::VLDMDIA:
420  case ARM::VSTMDIA:
421  return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
422  }
423 }
424 
425 /// Update future uses of the base register with the offset introduced
426 /// due to writeback. This function only works on Thumb1.
427 void
428 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
430  DebugLoc DL, unsigned Base,
431  unsigned WordOffset,
432  ARMCC::CondCodes Pred, unsigned PredReg) {
433  assert(isThumb1 && "Can only update base register uses for Thumb1!");
434  // Start updating any instructions with immediate offsets. Insert a SUB before
435  // the first non-updateable instruction (if any).
436  for (; MBBI != MBB.end(); ++MBBI) {
437  bool InsertSub = false;
438  unsigned Opc = MBBI->getOpcode();
439 
440  if (MBBI->readsRegister(Base)) {
441  int Offset;
442  bool IsLoad =
443  Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
444  bool IsStore =
445  Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
446 
447  if (IsLoad || IsStore) {
448  // Loads and stores with immediate offsets can be updated, but only if
449  // the new offset isn't negative.
450  // The MachineOperand containing the offset immediate is the last one
451  // before predicates.
452  MachineOperand &MO =
453  MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
454  // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
455  Offset = MO.getImm() - WordOffset * getImmScale(Opc);
456 
457  // If storing the base register, it needs to be reset first.
458  unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
459 
460  if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
461  MO.setImm(Offset);
462  else
463  InsertSub = true;
464 
465  } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
466  !definesCPSR(MBBI)) {
467  // SUBS/ADDS using this register, with a dead def of the CPSR.
468  // Merge it with the update; if the merged offset is too large,
469  // insert a new sub instead.
470  MachineOperand &MO =
471  MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
472  Offset = (Opc == ARM::tSUBi8) ?
473  MO.getImm() + WordOffset * 4 :
474  MO.getImm() - WordOffset * 4 ;
475  if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
476  // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
477  // Offset == 0.
478  MO.setImm(Offset);
479  // The base register has now been reset, so exit early.
480  return;
481  } else {
482  InsertSub = true;
483  }
484 
485  } else {
486  // Can't update the instruction.
487  InsertSub = true;
488  }
489 
490  } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
491  // Since SUBS sets the condition flags, we can't place the base reset
492  // after an instruction that has a live CPSR def.
493  // The base register might also contain an argument for a function call.
494  InsertSub = true;
495  }
496 
497  if (InsertSub) {
498  // An instruction above couldn't be updated, so insert a sub.
499  AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
500  .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
501  return;
502  }
503 
504  if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
505  // Register got killed. Stop updating.
506  return;
507  }
508 
509  // End of block was reached.
510  if (MBB.succ_size() > 0) {
511  // FIXME: Because of a bug, live registers are sometimes missing from
512  // the successor blocks' live-in sets. This means we can't trust that
513  // information and *always* have to reset at the end of a block.
514  // See PR21029.
515  if (MBBI != MBB.end()) --MBBI;
517  BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
518  .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
519  }
520 }
521 
522 /// Return the first register of class \p RegClass that is not in \p Regs.
523 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
524  if (!RegClassInfoValid) {
525  RegClassInfo.runOnMachineFunction(*MF);
526  RegClassInfoValid = true;
527  }
528 
529  for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
530  if (!LiveRegs.contains(Reg))
531  return Reg;
532  return 0;
533 }
534 
535 /// Compute live registers just before instruction \p Before (in normal schedule
536 /// direction). Computes backwards so multiple queries in the same block must
537 /// come in reverse order.
538 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
540  // Initialize if we never queried in this block.
541  if (!LiveRegsValid) {
542  LiveRegs.init(TRI);
543  LiveRegs.addLiveOuts(&MBB, true);
544  LiveRegPos = MBB.end();
545  LiveRegsValid = true;
546  }
547  // Move backward just before the "Before" position.
548  while (LiveRegPos != Before) {
549  --LiveRegPos;
550  LiveRegs.stepBackward(*LiveRegPos);
551  }
552 }
553 
554 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
555  unsigned Reg) {
556  for (const std::pair<unsigned, bool> &R : Regs)
557  if (R.first == Reg)
558  return true;
559  return false;
560 }
561 
562 /// Create and insert a LDM or STM with Base as base register and registers in
563 /// Regs as the register operands that would be loaded / stored. It returns
564 /// true if the transformation is done.
565 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
566  MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
567  bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
568  DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) {
569  unsigned NumRegs = Regs.size();
570  assert(NumRegs > 1);
571 
572  // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
573  // Compute liveness information for that register to make the decision.
574  bool SafeToClobberCPSR = !isThumb1 ||
575  (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
577 
578  bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
579 
580  // Exception: If the base register is in the input reglist, Thumb1 LDM is
581  // non-writeback.
582  // It's also not possible to merge an STR of the base register in Thumb1.
583  if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) {
584  assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
585  if (Opcode == ARM::tLDRi) {
586  Writeback = false;
587  } else if (Opcode == ARM::tSTRi) {
588  return nullptr;
589  }
590  }
591 
593  // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
594  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
595  bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
596 
597  if (Offset == 4 && haveIBAndDA) {
598  Mode = ARM_AM::ib;
599  } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
600  Mode = ARM_AM::da;
601  } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
602  // VLDM/VSTM do not support DB mode without also updating the base reg.
603  Mode = ARM_AM::db;
604  } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
605  // Check if this is a supported opcode before inserting instructions to
606  // calculate a new base register.
607  if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
608 
609  // If starting offset isn't zero, insert a MI to materialize a new base.
610  // But only do so if it is cost effective, i.e. merging more than two
611  // loads / stores.
612  if (NumRegs <= 2)
613  return nullptr;
614 
615  // On Thumb1, it's not worth materializing a new base register without
616  // clobbering the CPSR (i.e. not using ADDS/SUBS).
617  if (!SafeToClobberCPSR)
618  return nullptr;
619 
620  unsigned NewBase;
621  if (isi32Load(Opcode)) {
622  // If it is a load, then just use one of the destination register to
623  // use as the new base.
624  NewBase = Regs[NumRegs-1].first;
625  } else {
626  // Find a free register that we can use as scratch register.
627  moveLiveRegsBefore(MBB, InsertBefore);
628  // The merged instruction does not exist yet but will use several Regs if
629  // it is a Store.
630  if (!isLoadSingle(Opcode))
631  for (const std::pair<unsigned, bool> &R : Regs)
632  LiveRegs.addReg(R.first);
633 
634  NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
635  if (NewBase == 0)
636  return nullptr;
637  }
638 
639  int BaseOpc =
640  isThumb2 ? ARM::t2ADDri :
641  (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
642  (isThumb1 && Offset < 8) ? ARM::tADDi3 :
643  isThumb1 ? ARM::tADDi8 : ARM::ADDri;
644 
645  if (Offset < 0) {
646  Offset = - Offset;
647  BaseOpc =
648  isThumb2 ? ARM::t2SUBri :
649  (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
650  isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
651  }
652 
653  if (!TL->isLegalAddImmediate(Offset))
654  // FIXME: Try add with register operand?
655  return nullptr; // Probably not worth it then.
656 
657  // We can only append a kill flag to the add/sub input if the value is not
658  // used in the register list of the stm as well.
659  bool KillOldBase = BaseKill &&
660  (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
661 
662  if (isThumb1) {
663  // Thumb1: depending on immediate size, use either
664  // ADDS NewBase, Base, #imm3
665  // or
666  // MOV NewBase, Base
667  // ADDS NewBase, #imm8.
668  if (Base != NewBase &&
669  (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
670  // Need to insert a MOV to the new base first.
671  if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
672  !STI->hasV6Ops()) {
673  // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
674  if (Pred != ARMCC::AL)
675  return nullptr;
676  BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
677  .addReg(Base, getKillRegState(KillOldBase));
678  } else
679  BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
680  .addReg(Base, getKillRegState(KillOldBase))
681  .addImm(Pred).addReg(PredReg);
682 
683  // The following ADDS/SUBS becomes an update.
684  Base = NewBase;
685  KillOldBase = true;
686  }
687  if (BaseOpc == ARM::tADDrSPi) {
688  assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
689  BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
690  .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4)
691  .addImm(Pred).addReg(PredReg);
692  } else
694  BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true)
695  .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
696  .addImm(Pred).addReg(PredReg);
697  } else {
698  BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
699  .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
700  .addImm(Pred).addReg(PredReg).addReg(0);
701  }
702  Base = NewBase;
703  BaseKill = true; // New base is always killed straight away.
704  }
705 
706  bool isDef = isLoadSingle(Opcode);
707 
708  // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
709  // base register writeback.
710  Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
711  if (!Opcode)
712  return nullptr;
713 
714  // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
715  // - There is no writeback (LDM of base register),
716  // - the base register is killed by the merged instruction,
717  // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
718  // to reset the base register.
719  // Otherwise, don't merge.
720  // It's safe to return here since the code to materialize a new base register
721  // above is also conditional on SafeToClobberCPSR.
722  if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
723  return nullptr;
724 
726 
727  if (Writeback) {
728  if (Opcode == ARM::tLDMIA)
729  // Update tLDMIA with writeback if necessary.
730  Opcode = ARM::tLDMIA_UPD;
731 
732  MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
733 
734  // Thumb1: we might need to set base writeback when building the MI.
735  MIB.addReg(Base, getDefRegState(true))
736  .addReg(Base, getKillRegState(BaseKill));
737 
738  // The base isn't dead after a merged instruction with writeback.
739  // Insert a sub instruction after the newly formed instruction to reset.
740  if (!BaseKill)
741  UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
742 
743  } else {
744  // No writeback, simply build the MachineInstr.
745  MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
746  MIB.addReg(Base, getKillRegState(BaseKill));
747  }
748 
749  MIB.addImm(Pred).addReg(PredReg);
750 
751  for (const std::pair<unsigned, bool> &R : Regs)
752  MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
753 
754  return MIB.getInstr();
755 }
756 
757 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB,
758  MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
759  bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
760  DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const {
761  bool IsLoad = isi32Load(Opcode);
762  assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
763  unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
764 
765  assert(Regs.size() == 2);
766  MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
767  TII->get(LoadStoreOpcode));
768  if (IsLoad) {
769  MIB.addReg(Regs[0].first, RegState::Define)
770  .addReg(Regs[1].first, RegState::Define);
771  } else {
772  MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
773  .addReg(Regs[1].first, getKillRegState(Regs[1].second));
774  }
775  MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
776  return MIB.getInstr();
777 }
778 
779 /// Call MergeOps and update MemOps and merges accordingly on success.
780 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
781  const MachineInstr *First = Cand.Instrs.front();
782  unsigned Opcode = First->getOpcode();
783  bool IsLoad = isLoadSingle(Opcode);
785  SmallVector<unsigned, 4> ImpDefs;
786  DenseSet<unsigned> KilledRegs;
787  // Determine list of registers and list of implicit super-register defs.
788  for (const MachineInstr *MI : Cand.Instrs) {
789  const MachineOperand &MO = getLoadStoreRegOp(*MI);
790  unsigned Reg = MO.getReg();
791  bool IsKill = MO.isKill();
792  if (IsKill)
793  KilledRegs.insert(Reg);
794  Regs.push_back(std::make_pair(Reg, IsKill));
795 
796  if (IsLoad) {
797  // Collect any implicit defs of super-registers, after merging we can't
798  // be sure anymore that we properly preserved these live ranges and must
799  // removed these implicit operands.
800  for (const MachineOperand &MO : MI->implicit_operands()) {
801  if (!MO.isReg() || !MO.isDef() || MO.isDead())
802  continue;
803  assert(MO.isImplicit());
804  unsigned DefReg = MO.getReg();
805 
806  if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end())
807  continue;
808  // We can ignore cases where the super-reg is read and written.
809  if (MI->readsRegister(DefReg))
810  continue;
811  ImpDefs.push_back(DefReg);
812  }
813  }
814  }
815 
816  // Attempt the merge.
817  typedef MachineBasicBlock::iterator iterator;
818  MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
819  iterator InsertBefore = std::next(iterator(LatestMI));
820  MachineBasicBlock &MBB = *LatestMI->getParent();
821  unsigned Offset = getMemoryOpOffset(First);
822  unsigned Base = getLoadStoreBaseOp(*First).getReg();
823  bool BaseKill = LatestMI->killsRegister(Base);
824  unsigned PredReg = 0;
825  ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);
826  DebugLoc DL = First->getDebugLoc();
827  MachineInstr *Merged = nullptr;
828  if (Cand.CanMergeToLSDouble)
829  Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
830  Opcode, Pred, PredReg, DL, Regs);
831  if (!Merged && Cand.CanMergeToLSMulti)
832  Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
833  Opcode, Pred, PredReg, DL, Regs);
834  if (!Merged)
835  return nullptr;
836 
837  // Determine earliest instruction that will get removed. We then keep an
838  // iterator just above it so the following erases don't invalidated it.
839  iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
840  bool EarliestAtBegin = false;
841  if (EarliestI == MBB.begin()) {
842  EarliestAtBegin = true;
843  } else {
844  EarliestI = std::prev(EarliestI);
845  }
846 
847  // Remove instructions which have been merged.
848  for (MachineInstr *MI : Cand.Instrs)
849  MBB.erase(MI);
850 
851  // Determine range between the earliest removed instruction and the new one.
852  if (EarliestAtBegin)
853  EarliestI = MBB.begin();
854  else
855  EarliestI = std::next(EarliestI);
856  auto FixupRange = make_range(EarliestI, iterator(Merged));
857 
858  if (isLoadSingle(Opcode)) {
859  // If the previous loads defined a super-reg, then we have to mark earlier
860  // operands undef; Replicate the super-reg def on the merged instruction.
861  for (MachineInstr &MI : FixupRange) {
862  for (unsigned &ImpDefReg : ImpDefs) {
863  for (MachineOperand &MO : MI.implicit_operands()) {
864  if (!MO.isReg() || MO.getReg() != ImpDefReg)
865  continue;
866  if (MO.readsReg())
867  MO.setIsUndef();
868  else if (MO.isDef())
869  ImpDefReg = 0;
870  }
871  }
872  }
873 
874  MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
875  for (unsigned ImpDef : ImpDefs)
876  MIB.addReg(ImpDef, RegState::ImplicitDefine);
877  } else {
878  // Remove kill flags: We are possibly storing the values later now.
879  assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
880  for (MachineInstr &MI : FixupRange) {
881  for (MachineOperand &MO : MI.uses()) {
882  if (!MO.isReg() || !MO.isKill())
883  continue;
884  if (KilledRegs.count(MO.getReg()))
885  MO.setIsKill(false);
886  }
887  }
888  assert(ImpDefs.empty());
889  }
890 
891  return Merged;
892 }
893 
894 static bool isValidLSDoubleOffset(int Offset) {
895  unsigned Value = abs(Offset);
896  // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
897  // multiplied by 4.
898  return (Value % 4) == 0 && Value < 1024;
899 }
900 
901 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
902 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
903  const MachineInstr *FirstMI = MemOps[0].MI;
904  unsigned Opcode = FirstMI->getOpcode();
905  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
906  unsigned Size = getLSMultipleTransferSize(FirstMI);
907 
908  unsigned SIndex = 0;
909  unsigned EIndex = MemOps.size();
910  do {
911  // Look at the first instruction.
912  const MachineInstr *MI = MemOps[SIndex].MI;
913  int Offset = MemOps[SIndex].Offset;
914  const MachineOperand &PMO = getLoadStoreRegOp(*MI);
915  unsigned PReg = PMO.getReg();
916  unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
917  unsigned Latest = SIndex;
918  unsigned Earliest = SIndex;
919  unsigned Count = 1;
920  bool CanMergeToLSDouble =
921  STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
922  // ARM errata 602117: LDRD with base in list may result in incorrect base
923  // register when interrupted or faulted.
924  if (STI->isCortexM3() && isi32Load(Opcode) &&
925  PReg == getLoadStoreBaseOp(*MI).getReg())
926  CanMergeToLSDouble = false;
927 
928  bool CanMergeToLSMulti = true;
929  // On swift vldm/vstm starting with an odd register number as that needs
930  // more uops than single vldrs.
931  if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
932  CanMergeToLSMulti = false;
933 
934  // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
935  // deprecated; LDM to PC is fine but cannot happen here.
936  if (PReg == ARM::SP || PReg == ARM::PC)
937  CanMergeToLSMulti = CanMergeToLSDouble = false;
938 
939  // Merge following instructions where possible.
940  for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
941  int NewOffset = MemOps[I].Offset;
942  if (NewOffset != Offset + (int)Size)
943  break;
944  const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
945  unsigned Reg = MO.getReg();
946  if (Reg == ARM::SP || Reg == ARM::PC)
947  break;
948 
949  // See if the current load/store may be part of a multi load/store.
950  unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
951  bool PartOfLSMulti = CanMergeToLSMulti;
952  if (PartOfLSMulti) {
953  // Register numbers must be in ascending order.
954  if (RegNum <= PRegNum)
955  PartOfLSMulti = false;
956  // For VFP / NEON load/store multiples, the registers must be
957  // consecutive and within the limit on the number of registers per
958  // instruction.
959  else if (!isNotVFP && RegNum != PRegNum+1)
960  PartOfLSMulti = false;
961  }
962  // See if the current load/store may be part of a double load/store.
963  bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
964 
965  if (!PartOfLSMulti && !PartOfLSDouble)
966  break;
967  CanMergeToLSMulti &= PartOfLSMulti;
968  CanMergeToLSDouble &= PartOfLSDouble;
969  // Track MemOp with latest and earliest position (Positions are
970  // counted in reverse).
971  unsigned Position = MemOps[I].Position;
972  if (Position < MemOps[Latest].Position)
973  Latest = I;
974  else if (Position > MemOps[Earliest].Position)
975  Earliest = I;
976  // Prepare for next MemOp.
977  Offset += Size;
978  PRegNum = RegNum;
979  }
980 
981  // Form a candidate from the Ops collected so far.
982  MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
983  for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
984  Candidate->Instrs.push_back(MemOps[C].MI);
985  Candidate->LatestMIIdx = Latest - SIndex;
986  Candidate->EarliestMIIdx = Earliest - SIndex;
987  Candidate->InsertPos = MemOps[Latest].Position;
988  if (Count == 1)
989  CanMergeToLSMulti = CanMergeToLSDouble = false;
990  Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
991  Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
992  Candidates.push_back(Candidate);
993  // Continue after the chain.
994  SIndex += Count;
995  } while (SIndex < EIndex);
996 }
997 
998 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
999  unsigned Bytes, unsigned Limit,
1000  ARMCC::CondCodes Pred, unsigned PredReg) {
1001  unsigned MyPredReg = 0;
1002  if (!MI)
1003  return false;
1004 
1005  bool CheckCPSRDef = false;
1006  switch (MI->getOpcode()) {
1007  default: return false;
1008  case ARM::tSUBi8:
1009  case ARM::t2SUBri:
1010  case ARM::SUBri:
1011  CheckCPSRDef = true;
1012  break;
1013  case ARM::tSUBspi:
1014  break;
1015  }
1016 
1017  // Make sure the offset fits in 8 bits.
1018  if (Bytes == 0 || (Limit && Bytes >= Limit))
1019  return false;
1020 
1021  unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
1022  MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
1023  if (!(MI->getOperand(0).getReg() == Base &&
1024  MI->getOperand(1).getReg() == Base &&
1025  (MI->getOperand(2).getImm() * Scale) == Bytes &&
1026  getInstrPredicate(MI, MyPredReg) == Pred &&
1027  MyPredReg == PredReg))
1028  return false;
1029 
1030  return CheckCPSRDef ? !definesCPSR(MI) : true;
1031 }
1032 
1033 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
1034  unsigned Bytes, unsigned Limit,
1035  ARMCC::CondCodes Pred, unsigned PredReg) {
1036  unsigned MyPredReg = 0;
1037  if (!MI)
1038  return false;
1039 
1040  bool CheckCPSRDef = false;
1041  switch (MI->getOpcode()) {
1042  default: return false;
1043  case ARM::tADDi8:
1044  case ARM::t2ADDri:
1045  case ARM::ADDri:
1046  CheckCPSRDef = true;
1047  break;
1048  case ARM::tADDspi:
1049  break;
1050  }
1051 
1052  if (Bytes == 0 || (Limit && Bytes >= Limit))
1053  // Make sure the offset fits in 8 bits.
1054  return false;
1055 
1056  unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
1057  MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
1058  if (!(MI->getOperand(0).getReg() == Base &&
1059  MI->getOperand(1).getReg() == Base &&
1060  (MI->getOperand(2).getImm() * Scale) == Bytes &&
1061  getInstrPredicate(MI, MyPredReg) == Pred &&
1062  MyPredReg == PredReg))
1063  return false;
1064 
1065  return CheckCPSRDef ? !definesCPSR(MI) : true;
1066 }
1067 
1068 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1069  ARM_AM::AMSubMode Mode) {
1070  switch (Opc) {
1071  default: llvm_unreachable("Unhandled opcode!");
1072  case ARM::LDMIA:
1073  case ARM::LDMDA:
1074  case ARM::LDMDB:
1075  case ARM::LDMIB:
1076  switch (Mode) {
1077  default: llvm_unreachable("Unhandled submode!");
1078  case ARM_AM::ia: return ARM::LDMIA_UPD;
1079  case ARM_AM::ib: return ARM::LDMIB_UPD;
1080  case ARM_AM::da: return ARM::LDMDA_UPD;
1081  case ARM_AM::db: return ARM::LDMDB_UPD;
1082  }
1083  case ARM::STMIA:
1084  case ARM::STMDA:
1085  case ARM::STMDB:
1086  case ARM::STMIB:
1087  switch (Mode) {
1088  default: llvm_unreachable("Unhandled submode!");
1089  case ARM_AM::ia: return ARM::STMIA_UPD;
1090  case ARM_AM::ib: return ARM::STMIB_UPD;
1091  case ARM_AM::da: return ARM::STMDA_UPD;
1092  case ARM_AM::db: return ARM::STMDB_UPD;
1093  }
1094  case ARM::t2LDMIA:
1095  case ARM::t2LDMDB:
1096  switch (Mode) {
1097  default: llvm_unreachable("Unhandled submode!");
1098  case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1099  case ARM_AM::db: return ARM::t2LDMDB_UPD;
1100  }
1101  case ARM::t2STMIA:
1102  case ARM::t2STMDB:
1103  switch (Mode) {
1104  default: llvm_unreachable("Unhandled submode!");
1105  case ARM_AM::ia: return ARM::t2STMIA_UPD;
1106  case ARM_AM::db: return ARM::t2STMDB_UPD;
1107  }
1108  case ARM::VLDMSIA:
1109  switch (Mode) {
1110  default: llvm_unreachable("Unhandled submode!");
1111  case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1112  case ARM_AM::db: return ARM::VLDMSDB_UPD;
1113  }
1114  case ARM::VLDMDIA:
1115  switch (Mode) {
1116  default: llvm_unreachable("Unhandled submode!");
1117  case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1118  case ARM_AM::db: return ARM::VLDMDDB_UPD;
1119  }
1120  case ARM::VSTMSIA:
1121  switch (Mode) {
1122  default: llvm_unreachable("Unhandled submode!");
1123  case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1124  case ARM_AM::db: return ARM::VSTMSDB_UPD;
1125  }
1126  case ARM::VSTMDIA:
1127  switch (Mode) {
1128  default: llvm_unreachable("Unhandled submode!");
1129  case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1130  case ARM_AM::db: return ARM::VSTMDDB_UPD;
1131  }
1132  }
1133 }
1134 
1135 /// Fold proceeding/trailing inc/dec of base register into the
1136 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1137 ///
1138 /// stmia rn, <ra, rb, rc>
1139 /// rn := rn + 4 * 3;
1140 /// =>
1141 /// stmia rn!, <ra, rb, rc>
1142 ///
1143 /// rn := rn - 4 * 3;
1144 /// ldmia rn, <ra, rb, rc>
1145 /// =>
1146 /// ldmdb rn!, <ra, rb, rc>
1147 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
1148  // Thumb1 is already using updating loads/stores.
1149  if (isThumb1) return false;
1150 
1151  const MachineOperand &BaseOP = MI->getOperand(0);
1152  unsigned Base = BaseOP.getReg();
1153  bool BaseKill = BaseOP.isKill();
1154  unsigned Bytes = getLSMultipleTransferSize(MI);
1155  unsigned PredReg = 0;
1156  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1157  unsigned Opcode = MI->getOpcode();
1158  DebugLoc DL = MI->getDebugLoc();
1159 
1160  // Can't use an updating ld/st if the base register is also a dest
1161  // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1162  for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1163  if (MI->getOperand(i).getReg() == Base)
1164  return false;
1165 
1166  bool DoMerge = false;
1168 
1169  // Try merging with the previous instruction.
1170  MachineBasicBlock &MBB = *MI->getParent();
1171  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1172  MachineBasicBlock::iterator MBBI(MI);
1173  if (MBBI != BeginMBBI) {
1174  MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1175  while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1176  --PrevMBBI;
1177  if (Mode == ARM_AM::ia &&
1178  isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1179  Mode = ARM_AM::db;
1180  DoMerge = true;
1181  } else if (Mode == ARM_AM::ib &&
1182  isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1183  Mode = ARM_AM::da;
1184  DoMerge = true;
1185  }
1186  if (DoMerge)
1187  MBB.erase(PrevMBBI);
1188  }
1189 
1190  // Try merging with the next instruction.
1191  MachineBasicBlock::iterator EndMBBI = MBB.end();
1192  if (!DoMerge && MBBI != EndMBBI) {
1193  MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1194  while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1195  ++NextMBBI;
1196  if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
1197  isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1198  DoMerge = true;
1199  } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
1200  isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1201  DoMerge = true;
1202  }
1203  if (DoMerge)
1204  MBB.erase(NextMBBI);
1205  }
1206 
1207  if (!DoMerge)
1208  return false;
1209 
1210  unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1211  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1212  .addReg(Base, getDefRegState(true)) // WB base register
1213  .addReg(Base, getKillRegState(BaseKill))
1214  .addImm(Pred).addReg(PredReg);
1215 
1216  // Transfer the rest of operands.
1217  for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1218  MIB.addOperand(MI->getOperand(OpNum));
1219 
1220  // Transfer memoperands.
1221  MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1222 
1223  MBB.erase(MBBI);
1224  return true;
1225 }
1226 
1227 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1228  ARM_AM::AddrOpc Mode) {
1229  switch (Opc) {
1230  case ARM::LDRi12:
1231  return ARM::LDR_PRE_IMM;
1232  case ARM::STRi12:
1233  return ARM::STR_PRE_IMM;
1234  case ARM::VLDRS:
1235  return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1236  case ARM::VLDRD:
1237  return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1238  case ARM::VSTRS:
1239  return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1240  case ARM::VSTRD:
1241  return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1242  case ARM::t2LDRi8:
1243  case ARM::t2LDRi12:
1244  return ARM::t2LDR_PRE;
1245  case ARM::t2STRi8:
1246  case ARM::t2STRi12:
1247  return ARM::t2STR_PRE;
1248  default: llvm_unreachable("Unhandled opcode!");
1249  }
1250 }
1251 
1252 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1253  ARM_AM::AddrOpc Mode) {
1254  switch (Opc) {
1255  case ARM::LDRi12:
1256  return ARM::LDR_POST_IMM;
1257  case ARM::STRi12:
1258  return ARM::STR_POST_IMM;
1259  case ARM::VLDRS:
1260  return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1261  case ARM::VLDRD:
1262  return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1263  case ARM::VSTRS:
1264  return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1265  case ARM::VSTRD:
1266  return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1267  case ARM::t2LDRi8:
1268  case ARM::t2LDRi12:
1269  return ARM::t2LDR_POST;
1270  case ARM::t2STRi8:
1271  case ARM::t2STRi12:
1272  return ARM::t2STR_POST;
1273  default: llvm_unreachable("Unhandled opcode!");
1274  }
1275 }
1276 
1277 /// Fold proceeding/trailing inc/dec of base register into the
1278 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1279 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
1280  // Thumb1 doesn't have updating LDR/STR.
1281  // FIXME: Use LDM/STM with single register instead.
1282  if (isThumb1) return false;
1283 
1284  unsigned Base = getLoadStoreBaseOp(*MI).getReg();
1285  bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
1286  unsigned Bytes = getLSMultipleTransferSize(MI);
1287  unsigned Opcode = MI->getOpcode();
1288  DebugLoc DL = MI->getDebugLoc();
1289  bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1290  Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1291  bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1292  if (isi32Load(Opcode) || isi32Store(Opcode))
1293  if (MI->getOperand(2).getImm() != 0)
1294  return false;
1295  if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1296  return false;
1297 
1298  bool isLd = isLoadSingle(Opcode);
1299  // Can't do the merge if the destination register is the same as the would-be
1300  // writeback register.
1301  if (MI->getOperand(0).getReg() == Base)
1302  return false;
1303 
1304  unsigned PredReg = 0;
1305  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1306  bool DoMerge = false;
1307  ARM_AM::AddrOpc AddSub = ARM_AM::add;
1308  unsigned NewOpc = 0;
1309  // AM2 - 12 bits, thumb2 - 8 bits.
1310  unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
1311 
1312  // Try merging with the previous instruction.
1313  MachineBasicBlock &MBB = *MI->getParent();
1314  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1315  MachineBasicBlock::iterator MBBI(MI);
1316  if (MBBI != BeginMBBI) {
1317  MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1318  while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1319  --PrevMBBI;
1320  if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1321  DoMerge = true;
1322  AddSub = ARM_AM::sub;
1323  } else if (!isAM5 &&
1324  isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1325  DoMerge = true;
1326  }
1327  if (DoMerge) {
1328  NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
1329  MBB.erase(PrevMBBI);
1330  }
1331  }
1332 
1333  // Try merging with the next instruction.
1334  MachineBasicBlock::iterator EndMBBI = MBB.end();
1335  if (!DoMerge && MBBI != EndMBBI) {
1336  MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1337  while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1338  ++NextMBBI;
1339  if (!isAM5 &&
1340  isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1341  DoMerge = true;
1342  AddSub = ARM_AM::sub;
1343  } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1344  DoMerge = true;
1345  }
1346  if (DoMerge) {
1347  NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1348  MBB.erase(NextMBBI);
1349  }
1350  }
1351 
1352  if (!DoMerge)
1353  return false;
1354 
1355  if (isAM5) {
1356  // VLDM[SD]_UPD, VSTM[SD]_UPD
1357  // (There are no base-updating versions of VLDR/VSTR instructions, but the
1358  // updating load/store-multiple instructions can be used with only one
1359  // register.)
1360  MachineOperand &MO = MI->getOperand(0);
1361  BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1362  .addReg(Base, getDefRegState(true)) // WB base register
1363  .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1364  .addImm(Pred).addReg(PredReg)
1365  .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1366  getKillRegState(MO.isKill())));
1367  } else if (isLd) {
1368  if (isAM2) {
1369  // LDR_PRE, LDR_POST
1370  if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1371  int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1372  BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1373  .addReg(Base, RegState::Define)
1374  .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1375  } else {
1376  int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1377  BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1378  .addReg(Base, RegState::Define)
1379  .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1380  }
1381  } else {
1382  int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1383  // t2LDR_PRE, t2LDR_POST
1384  BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1385  .addReg(Base, RegState::Define)
1386  .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1387  }
1388  } else {
1389  MachineOperand &MO = MI->getOperand(0);
1390  // FIXME: post-indexed stores use am2offset_imm, which still encodes
1391  // the vestigal zero-reg offset register. When that's fixed, this clause
1392  // can be removed entirely.
1393  if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1394  int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1395  // STR_PRE, STR_POST
1396  BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1397  .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1398  .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1399  } else {
1400  int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1401  // t2STR_PRE, t2STR_POST
1402  BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1403  .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1404  .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1405  }
1406  }
1407  MBB.erase(MBBI);
1408 
1409  return true;
1410 }
1411 
1412 /// Returns true if instruction is a memory operation that this pass is capable
1413 /// of operating on.
1414 static bool isMemoryOp(const MachineInstr *MI) {
1415  // When no memory operands are present, conservatively assume unaligned,
1416  // volatile, unfoldable.
1417  if (!MI->hasOneMemOperand())
1418  return false;
1419 
1420  const MachineMemOperand *MMO = *MI->memoperands_begin();
1421 
1422  // Don't touch volatile memory accesses - we may be changing their order.
1423  if (MMO->isVolatile())
1424  return false;
1425 
1426  // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1427  // not.
1428  if (MMO->getAlignment() < 4)
1429  return false;
1430 
1431  // str <undef> could probably be eliminated entirely, but for now we just want
1432  // to avoid making a mess of it.
1433  // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1434  if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1435  MI->getOperand(0).isUndef())
1436  return false;
1437 
1438  // Likewise don't mess with references to undefined addresses.
1439  if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1440  MI->getOperand(1).isUndef())
1441  return false;
1442 
1443  unsigned Opcode = MI->getOpcode();
1444  switch (Opcode) {
1445  default: break;
1446  case ARM::VLDRS:
1447  case ARM::VSTRS:
1448  return MI->getOperand(1).isReg();
1449  case ARM::VLDRD:
1450  case ARM::VSTRD:
1451  return MI->getOperand(1).isReg();
1452  case ARM::LDRi12:
1453  case ARM::STRi12:
1454  case ARM::tLDRi:
1455  case ARM::tSTRi:
1456  case ARM::tLDRspi:
1457  case ARM::tSTRspi:
1458  case ARM::t2LDRi8:
1459  case ARM::t2LDRi12:
1460  case ARM::t2STRi8:
1461  case ARM::t2STRi12:
1462  return MI->getOperand(1).isReg();
1463  }
1464  return false;
1465 }
1466 
1469  int Offset, bool isDef,
1470  DebugLoc DL, unsigned NewOpc,
1471  unsigned Reg, bool RegDeadKill, bool RegUndef,
1472  unsigned BaseReg, bool BaseKill, bool BaseUndef,
1473  bool OffKill, bool OffUndef,
1474  ARMCC::CondCodes Pred, unsigned PredReg,
1475  const TargetInstrInfo *TII, bool isT2) {
1476  if (isDef) {
1477  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1478  TII->get(NewOpc))
1479  .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1480  .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1481  MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1482  } else {
1483  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1484  TII->get(NewOpc))
1485  .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1486  .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1487  MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1488  }
1489 }
1490 
1491 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1493  MachineInstr *MI = &*MBBI;
1494  unsigned Opcode = MI->getOpcode();
1495  if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1496  return false;
1497 
1498  const MachineOperand &BaseOp = MI->getOperand(2);
1499  unsigned BaseReg = BaseOp.getReg();
1500  unsigned EvenReg = MI->getOperand(0).getReg();
1501  unsigned OddReg = MI->getOperand(1).getReg();
1502  unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1503  unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1504 
1505  // ARM errata 602117: LDRD with base in list may result in incorrect base
1506  // register when interrupted or faulted.
1507  bool Errata602117 = EvenReg == BaseReg &&
1508  (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1509  // ARM LDRD/STRD needs consecutive registers.
1510  bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1511  (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1512 
1513  if (!Errata602117 && !NonConsecutiveRegs)
1514  return false;
1515 
1516  bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1517  bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1518  bool EvenDeadKill = isLd ?
1519  MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1520  bool EvenUndef = MI->getOperand(0).isUndef();
1521  bool OddDeadKill = isLd ?
1522  MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1523  bool OddUndef = MI->getOperand(1).isUndef();
1524  bool BaseKill = BaseOp.isKill();
1525  bool BaseUndef = BaseOp.isUndef();
1526  bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1527  bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1528  int OffImm = getMemoryOpOffset(MI);
1529  unsigned PredReg = 0;
1530  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1531 
1532  if (OddRegNum > EvenRegNum && OffImm == 0) {
1533  // Ascending register numbers and no offset. It's safe to change it to a
1534  // ldm or stm.
1535  unsigned NewOpc = (isLd)
1536  ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1537  : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1538  if (isLd) {
1539  BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1540  .addReg(BaseReg, getKillRegState(BaseKill))
1541  .addImm(Pred).addReg(PredReg)
1542  .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1543  .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1544  ++NumLDRD2LDM;
1545  } else {
1546  BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1547  .addReg(BaseReg, getKillRegState(BaseKill))
1548  .addImm(Pred).addReg(PredReg)
1549  .addReg(EvenReg,
1550  getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1551  .addReg(OddReg,
1552  getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1553  ++NumSTRD2STM;
1554  }
1555  } else {
1556  // Split into two instructions.
1557  unsigned NewOpc = (isLd)
1558  ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1559  : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1560  // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1561  // so adjust and use t2LDRi12 here for that.
1562  unsigned NewOpc2 = (isLd)
1563  ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1564  : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1565  DebugLoc dl = MBBI->getDebugLoc();
1566  // If this is a load and base register is killed, it may have been
1567  // re-defed by the load, make sure the first load does not clobber it.
1568  if (isLd &&
1569  (BaseKill || OffKill) &&
1570  (TRI->regsOverlap(EvenReg, BaseReg))) {
1571  assert(!TRI->regsOverlap(OddReg, BaseReg));
1572  InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1573  OddReg, OddDeadKill, false,
1574  BaseReg, false, BaseUndef, false, OffUndef,
1575  Pred, PredReg, TII, isT2);
1576  InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1577  EvenReg, EvenDeadKill, false,
1578  BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1579  Pred, PredReg, TII, isT2);
1580  } else {
1581  if (OddReg == EvenReg && EvenDeadKill) {
1582  // If the two source operands are the same, the kill marker is
1583  // probably on the first one. e.g.
1584  // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1585  EvenDeadKill = false;
1586  OddDeadKill = true;
1587  }
1588  // Never kill the base register in the first instruction.
1589  if (EvenReg == BaseReg)
1590  EvenDeadKill = false;
1591  InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1592  EvenReg, EvenDeadKill, EvenUndef,
1593  BaseReg, false, BaseUndef, false, OffUndef,
1594  Pred, PredReg, TII, isT2);
1595  InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1596  OddReg, OddDeadKill, OddUndef,
1597  BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1598  Pred, PredReg, TII, isT2);
1599  }
1600  if (isLd)
1601  ++NumLDRD2LDR;
1602  else
1603  ++NumSTRD2STR;
1604  }
1605 
1606  MBBI = MBB.erase(MBBI);
1607  return true;
1608 }
1609 
1610 /// An optimization pass to turn multiple LDR / STR ops of the same base and
1611 /// incrementing offset into LDM / STM ops.
1612 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1613  MemOpQueue MemOps;
1614  unsigned CurrBase = 0;
1615  unsigned CurrOpc = ~0u;
1616  ARMCC::CondCodes CurrPred = ARMCC::AL;
1617  unsigned Position = 0;
1618  assert(Candidates.size() == 0);
1619  LiveRegsValid = false;
1620 
1621  for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
1622  I = MBBI) {
1623  // The instruction in front of the iterator is the one we look at.
1624  MBBI = std::prev(I);
1625  if (FixInvalidRegPairOp(MBB, MBBI))
1626  continue;
1627  ++Position;
1628 
1629  if (isMemoryOp(MBBI)) {
1630  unsigned Opcode = MBBI->getOpcode();
1631  const MachineOperand &MO = MBBI->getOperand(0);
1632  unsigned Reg = MO.getReg();
1633  unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
1634  unsigned PredReg = 0;
1635  ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1636  int Offset = getMemoryOpOffset(MBBI);
1637  if (CurrBase == 0) {
1638  // Start of a new chain.
1639  CurrBase = Base;
1640  CurrOpc = Opcode;
1641  CurrPred = Pred;
1642  MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1643  continue;
1644  }
1645  // Note: No need to match PredReg in the next if.
1646  if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1647  // Watch out for:
1648  // r4 := ldr [r0, #8]
1649  // r4 := ldr [r0, #4]
1650  // or
1651  // r0 := ldr [r0]
1652  // If a load overrides the base register or a register loaded by
1653  // another load in our chain, we cannot take this instruction.
1654  bool Overlap = false;
1655  if (isLoadSingle(Opcode)) {
1656  Overlap = (Base == Reg);
1657  if (!Overlap) {
1658  for (const MemOpQueueEntry &E : MemOps) {
1659  if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1660  Overlap = true;
1661  break;
1662  }
1663  }
1664  }
1665  }
1666 
1667  if (!Overlap) {
1668  // Check offset and sort memory operation into the current chain.
1669  if (Offset > MemOps.back().Offset) {
1670  MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1671  continue;
1672  } else {
1673  MemOpQueue::iterator MI, ME;
1674  for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1675  if (Offset < MI->Offset) {
1676  // Found a place to insert.
1677  break;
1678  }
1679  if (Offset == MI->Offset) {
1680  // Collision, abort.
1681  MI = ME;
1682  break;
1683  }
1684  }
1685  if (MI != MemOps.end()) {
1686  MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position));
1687  continue;
1688  }
1689  }
1690  }
1691  }
1692 
1693  // Don't advance the iterator; The op will start a new chain next.
1694  MBBI = I;
1695  --Position;
1696  // Fallthrough to look into existing chain.
1697  } else if (MBBI->isDebugValue())
1698  continue;
1699 
1700  // If we are here then the chain is broken; Extract candidates for a merge.
1701  if (MemOps.size() > 0) {
1702  FormCandidates(MemOps);
1703  // Reset for the next chain.
1704  CurrBase = 0;
1705  CurrOpc = ~0u;
1706  CurrPred = ARMCC::AL;
1707  MemOps.clear();
1708  }
1709  }
1710  if (MemOps.size() > 0)
1711  FormCandidates(MemOps);
1712 
1713  // Sort candidates so they get processed from end to begin of the basic
1714  // block later; This is necessary for liveness calculation.
1715  auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1716  return M0->InsertPos < M1->InsertPos;
1717  };
1718  std::sort(Candidates.begin(), Candidates.end(), LessThan);
1719 
1720  // Go through list of candidates and merge.
1721  bool Changed = false;
1722  for (const MergeCandidate *Candidate : Candidates) {
1723  if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
1724  MachineInstr *Merged = MergeOpsUpdate(*Candidate);
1725  // Merge preceding/trailing base inc/dec into the merged op.
1726  if (Merged) {
1727  Changed = true;
1728  unsigned Opcode = Merged->getOpcode();
1729  if (Opcode != ARM::t2STRDi8 && Opcode != ARM::t2LDRDi8)
1730  MergeBaseUpdateLSMultiple(Merged);
1731  } else {
1732  for (MachineInstr *MI : Candidate->Instrs) {
1733  if (MergeBaseUpdateLoadStore(MI))
1734  Changed = true;
1735  }
1736  }
1737  } else {
1738  assert(Candidate->Instrs.size() == 1);
1739  if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
1740  Changed = true;
1741  }
1742  }
1743  Candidates.clear();
1744 
1745  return Changed;
1746 }
1747 
1748 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
1749 /// into the preceding stack restore so it directly restore the value of LR
1750 /// into pc.
1751 /// ldmfd sp!, {..., lr}
1752 /// bx lr
1753 /// or
1754 /// ldmfd sp!, {..., lr}
1755 /// mov pc, lr
1756 /// =>
1757 /// ldmfd sp!, {..., pc}
1758 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1759  // Thumb1 LDM doesn't allow high registers.
1760  if (isThumb1) return false;
1761  if (MBB.empty()) return false;
1762 
1764  if (MBBI != MBB.begin() &&
1765  (MBBI->getOpcode() == ARM::BX_RET ||
1766  MBBI->getOpcode() == ARM::tBX_RET ||
1767  MBBI->getOpcode() == ARM::MOVPCLR)) {
1768  MachineInstr *PrevMI = std::prev(MBBI);
1769  unsigned Opcode = PrevMI->getOpcode();
1770  if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1771  Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1772  Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1773  MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1774  if (MO.getReg() != ARM::LR)
1775  return false;
1776  unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1777  assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1778  Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1779  PrevMI->setDesc(TII->get(NewOpc));
1780  MO.setReg(ARM::PC);
1781  PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1782  MBB.erase(MBBI);
1783  return true;
1784  }
1785  }
1786  return false;
1787 }
1788 
1789 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1790  MF = &Fn;
1791  STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1792  TL = STI->getTargetLowering();
1793  AFI = Fn.getInfo<ARMFunctionInfo>();
1794  TII = STI->getInstrInfo();
1795  TRI = STI->getRegisterInfo();
1796  MRI = &Fn.getRegInfo();
1797  RegClassInfoValid = false;
1798  isThumb2 = AFI->isThumb2Function();
1799  isThumb1 = AFI->isThumbFunction() && !isThumb2;
1800 
1801  bool Modified = false;
1802  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1803  ++MFI) {
1804  MachineBasicBlock &MBB = *MFI;
1805  Modified |= LoadStoreMultipleOpti(MBB);
1806  if (STI->hasV5TOps())
1807  Modified |= MergeReturnIntoLDM(MBB);
1808  }
1809 
1810  Allocator.DestroyAll();
1811  return Modified;
1812 }
1813 
1814 namespace {
1815  /// Pre- register allocation pass that move load / stores from consecutive
1816  /// locations close to make it more likely they will be combined later.
1817  struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1818  static char ID;
1819  ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1820 
1821  const DataLayout *TD;
1822  const TargetInstrInfo *TII;
1823  const TargetRegisterInfo *TRI;
1824  const ARMSubtarget *STI;
1825  MachineRegisterInfo *MRI;
1826  MachineFunction *MF;
1827 
1828  bool runOnMachineFunction(MachineFunction &Fn) override;
1829 
1830  const char *getPassName() const override {
1831  return "ARM pre- register allocation load / store optimization pass";
1832  }
1833 
1834  private:
1835  bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1836  unsigned &NewOpc, unsigned &EvenReg,
1837  unsigned &OddReg, unsigned &BaseReg,
1838  int &Offset,
1839  unsigned &PredReg, ARMCC::CondCodes &Pred,
1840  bool &isT2);
1841  bool RescheduleOps(MachineBasicBlock *MBB,
1843  unsigned Base, bool isLd,
1845  bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1846  };
1847  char ARMPreAllocLoadStoreOpt::ID = 0;
1848 }
1849 
1850 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1851  TD = Fn.getTarget().getDataLayout();
1852  STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1853  TII = STI->getInstrInfo();
1854  TRI = STI->getRegisterInfo();
1855  MRI = &Fn.getRegInfo();
1856  MF = &Fn;
1857 
1858  bool Modified = false;
1859  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1860  ++MFI)
1861  Modified |= RescheduleLoadStoreInstrs(MFI);
1862 
1863  return Modified;
1864 }
1865 
1866 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1870  SmallSet<unsigned, 4> &MemRegs,
1871  const TargetRegisterInfo *TRI) {
1872  // Are there stores / loads / calls between them?
1873  // FIXME: This is overly conservative. We should make use of alias information
1874  // some day.
1875  SmallSet<unsigned, 4> AddedRegPressure;
1876  while (++I != E) {
1877  if (I->isDebugValue() || MemOps.count(&*I))
1878  continue;
1879  if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1880  return false;
1881  if (isLd && I->mayStore())
1882  return false;
1883  if (!isLd) {
1884  if (I->mayLoad())
1885  return false;
1886  // It's not safe to move the first 'str' down.
1887  // str r1, [r0]
1888  // strh r5, [r0]
1889  // str r4, [r0, #+4]
1890  if (I->mayStore())
1891  return false;
1892  }
1893  for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1894  MachineOperand &MO = I->getOperand(j);
1895  if (!MO.isReg())
1896  continue;
1897  unsigned Reg = MO.getReg();
1898  if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1899  return false;
1900  if (Reg != Base && !MemRegs.count(Reg))
1901  AddedRegPressure.insert(Reg);
1902  }
1903  }
1904 
1905  // Estimate register pressure increase due to the transformation.
1906  if (MemRegs.size() <= 4)
1907  // Ok if we are moving small number of instructions.
1908  return true;
1909  return AddedRegPressure.size() <= MemRegs.size() * 2;
1910 }
1911 
1912 
1913 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI.
1915  MachineInstr *Op1) {
1916  assert(MI->memoperands_empty() && "expected a new machineinstr");
1917  size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1918  + (Op1->memoperands_end() - Op1->memoperands_begin());
1919 
1920  MachineFunction *MF = MI->getParent()->getParent();
1921  MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1923  std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1924  MemEnd =
1925  std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1926  MI->setMemRefs(MemBegin, MemEnd);
1927 }
1928 
1929 bool
1930 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1931  DebugLoc &dl, unsigned &NewOpc,
1932  unsigned &FirstReg,
1933  unsigned &SecondReg,
1934  unsigned &BaseReg, int &Offset,
1935  unsigned &PredReg,
1936  ARMCC::CondCodes &Pred,
1937  bool &isT2) {
1938  // Make sure we're allowed to generate LDRD/STRD.
1939  if (!STI->hasV5TEOps())
1940  return false;
1941 
1942  // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1943  unsigned Scale = 1;
1944  unsigned Opcode = Op0->getOpcode();
1945  if (Opcode == ARM::LDRi12) {
1946  NewOpc = ARM::LDRD;
1947  } else if (Opcode == ARM::STRi12) {
1948  NewOpc = ARM::STRD;
1949  } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1950  NewOpc = ARM::t2LDRDi8;
1951  Scale = 4;
1952  isT2 = true;
1953  } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1954  NewOpc = ARM::t2STRDi8;
1955  Scale = 4;
1956  isT2 = true;
1957  } else {
1958  return false;
1959  }
1960 
1961  // Make sure the base address satisfies i64 ld / st alignment requirement.
1962  // At the moment, we ignore the memoryoperand's value.
1963  // If we want to use AliasAnalysis, we should check it accordingly.
1964  if (!Op0->hasOneMemOperand() ||
1965  (*Op0->memoperands_begin())->isVolatile())
1966  return false;
1967 
1968  unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1969  const Function *Func = MF->getFunction();
1970  unsigned ReqAlign = STI->hasV6Ops()
1971  ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1972  : 8; // Pre-v6 need 8-byte align
1973  if (Align < ReqAlign)
1974  return false;
1975 
1976  // Then make sure the immediate offset fits.
1977  int OffImm = getMemoryOpOffset(Op0);
1978  if (isT2) {
1979  int Limit = (1 << 8) * Scale;
1980  if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1981  return false;
1982  Offset = OffImm;
1983  } else {
1984  ARM_AM::AddrOpc AddSub = ARM_AM::add;
1985  if (OffImm < 0) {
1986  AddSub = ARM_AM::sub;
1987  OffImm = - OffImm;
1988  }
1989  int Limit = (1 << 8) * Scale;
1990  if (OffImm >= Limit || (OffImm & (Scale-1)))
1991  return false;
1992  Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1993  }
1994  FirstReg = Op0->getOperand(0).getReg();
1995  SecondReg = Op1->getOperand(0).getReg();
1996  if (FirstReg == SecondReg)
1997  return false;
1998  BaseReg = Op0->getOperand(1).getReg();
1999  Pred = getInstrPredicate(Op0, PredReg);
2000  dl = Op0->getDebugLoc();
2001  return true;
2002 }
2003 
2004 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2006  unsigned Base, bool isLd,
2007  DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2008  bool RetVal = false;
2009 
2010  // Sort by offset (in reverse order).
2011  std::sort(Ops.begin(), Ops.end(),
2012  [](const MachineInstr *LHS, const MachineInstr *RHS) {
2013  int LOffset = getMemoryOpOffset(LHS);
2014  int ROffset = getMemoryOpOffset(RHS);
2015  assert(LHS == RHS || LOffset != ROffset);
2016  return LOffset > ROffset;
2017  });
2018 
2019  // The loads / stores of the same base are in order. Scan them from first to
2020  // last and check for the following:
2021  // 1. Any def of base.
2022  // 2. Any gaps.
2023  while (Ops.size() > 1) {
2024  unsigned FirstLoc = ~0U;
2025  unsigned LastLoc = 0;
2026  MachineInstr *FirstOp = nullptr;
2027  MachineInstr *LastOp = nullptr;
2028  int LastOffset = 0;
2029  unsigned LastOpcode = 0;
2030  unsigned LastBytes = 0;
2031  unsigned NumMove = 0;
2032  for (int i = Ops.size() - 1; i >= 0; --i) {
2033  MachineInstr *Op = Ops[i];
2034  unsigned Loc = MI2LocMap[Op];
2035  if (Loc <= FirstLoc) {
2036  FirstLoc = Loc;
2037  FirstOp = Op;
2038  }
2039  if (Loc >= LastLoc) {
2040  LastLoc = Loc;
2041  LastOp = Op;
2042  }
2043 
2044  unsigned LSMOpcode
2046  if (LastOpcode && LSMOpcode != LastOpcode)
2047  break;
2048 
2049  int Offset = getMemoryOpOffset(Op);
2050  unsigned Bytes = getLSMultipleTransferSize(Op);
2051  if (LastBytes) {
2052  if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2053  break;
2054  }
2055  LastOffset = Offset;
2056  LastBytes = Bytes;
2057  LastOpcode = LSMOpcode;
2058  if (++NumMove == 8) // FIXME: Tune this limit.
2059  break;
2060  }
2061 
2062  if (NumMove <= 1)
2063  Ops.pop_back();
2064  else {
2066  SmallSet<unsigned, 4> MemRegs;
2067  for (int i = NumMove-1; i >= 0; --i) {
2068  MemOps.insert(Ops[i]);
2069  MemRegs.insert(Ops[i]->getOperand(0).getReg());
2070  }
2071 
2072  // Be conservative, if the instructions are too far apart, don't
2073  // move them. We want to limit the increase of register pressure.
2074  bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2075  if (DoMove)
2076  DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2077  MemOps, MemRegs, TRI);
2078  if (!DoMove) {
2079  for (unsigned i = 0; i != NumMove; ++i)
2080  Ops.pop_back();
2081  } else {
2082  // This is the new location for the loads / stores.
2083  MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2084  while (InsertPos != MBB->end()
2085  && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2086  ++InsertPos;
2087 
2088  // If we are moving a pair of loads / stores, see if it makes sense
2089  // to try to allocate a pair of registers that can form register pairs.
2090  MachineInstr *Op0 = Ops.back();
2091  MachineInstr *Op1 = Ops[Ops.size()-2];
2092  unsigned FirstReg = 0, SecondReg = 0;
2093  unsigned BaseReg = 0, PredReg = 0;
2094  ARMCC::CondCodes Pred = ARMCC::AL;
2095  bool isT2 = false;
2096  unsigned NewOpc = 0;
2097  int Offset = 0;
2098  DebugLoc dl;
2099  if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2100  FirstReg, SecondReg, BaseReg,
2101  Offset, PredReg, Pred, isT2)) {
2102  Ops.pop_back();
2103  Ops.pop_back();
2104 
2105  const MCInstrDesc &MCID = TII->get(NewOpc);
2106  const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2107  MRI->constrainRegClass(FirstReg, TRC);
2108  MRI->constrainRegClass(SecondReg, TRC);
2109 
2110  // Form the pair instruction.
2111  if (isLd) {
2112  MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2113  .addReg(FirstReg, RegState::Define)
2114  .addReg(SecondReg, RegState::Define)
2115  .addReg(BaseReg);
2116  // FIXME: We're converting from LDRi12 to an insn that still
2117  // uses addrmode2, so we need an explicit offset reg. It should
2118  // always by reg0 since we're transforming LDRi12s.
2119  if (!isT2)
2120  MIB.addReg(0);
2121  MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2122  concatenateMemOperands(MIB, Op0, Op1);
2123  DEBUG(dbgs() << "Formed " << *MIB << "\n");
2124  ++NumLDRDFormed;
2125  } else {
2126  MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2127  .addReg(FirstReg)
2128  .addReg(SecondReg)
2129  .addReg(BaseReg);
2130  // FIXME: We're converting from LDRi12 to an insn that still
2131  // uses addrmode2, so we need an explicit offset reg. It should
2132  // always by reg0 since we're transforming STRi12s.
2133  if (!isT2)
2134  MIB.addReg(0);
2135  MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2136  concatenateMemOperands(MIB, Op0, Op1);
2137  DEBUG(dbgs() << "Formed " << *MIB << "\n");
2138  ++NumSTRDFormed;
2139  }
2140  MBB->erase(Op0);
2141  MBB->erase(Op1);
2142 
2143  if (!isT2) {
2144  // Add register allocation hints to form register pairs.
2145  MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
2146  MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
2147  }
2148  } else {
2149  for (unsigned i = 0; i != NumMove; ++i) {
2150  MachineInstr *Op = Ops.back();
2151  Ops.pop_back();
2152  MBB->splice(InsertPos, MBB, Op);
2153  }
2154  }
2155 
2156  NumLdStMoved += NumMove;
2157  RetVal = true;
2158  }
2159  }
2160  }
2161 
2162  return RetVal;
2163 }
2164 
2165 bool
2166 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2167  bool RetVal = false;
2168 
2172  SmallVector<unsigned, 4> LdBases;
2173  SmallVector<unsigned, 4> StBases;
2174 
2175  unsigned Loc = 0;
2176  MachineBasicBlock::iterator MBBI = MBB->begin();
2177  MachineBasicBlock::iterator E = MBB->end();
2178  while (MBBI != E) {
2179  for (; MBBI != E; ++MBBI) {
2180  MachineInstr *MI = MBBI;
2181  if (MI->isCall() || MI->isTerminator()) {
2182  // Stop at barriers.
2183  ++MBBI;
2184  break;
2185  }
2186 
2187  if (!MI->isDebugValue())
2188  MI2LocMap[MI] = ++Loc;
2189 
2190  if (!isMemoryOp(MI))
2191  continue;
2192  unsigned PredReg = 0;
2193  if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2194  continue;
2195 
2196  int Opc = MI->getOpcode();
2197  bool isLd = isLoadSingle(Opc);
2198  unsigned Base = MI->getOperand(1).getReg();
2199  int Offset = getMemoryOpOffset(MI);
2200 
2201  bool StopHere = false;
2202  if (isLd) {
2204  Base2LdsMap.find(Base);
2205  if (BI != Base2LdsMap.end()) {
2206  for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2207  if (Offset == getMemoryOpOffset(BI->second[i])) {
2208  StopHere = true;
2209  break;
2210  }
2211  }
2212  if (!StopHere)
2213  BI->second.push_back(MI);
2214  } else {
2215  Base2LdsMap[Base].push_back(MI);
2216  LdBases.push_back(Base);
2217  }
2218  } else {
2220  Base2StsMap.find(Base);
2221  if (BI != Base2StsMap.end()) {
2222  for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2223  if (Offset == getMemoryOpOffset(BI->second[i])) {
2224  StopHere = true;
2225  break;
2226  }
2227  }
2228  if (!StopHere)
2229  BI->second.push_back(MI);
2230  } else {
2231  Base2StsMap[Base].push_back(MI);
2232  StBases.push_back(Base);
2233  }
2234  }
2235 
2236  if (StopHere) {
2237  // Found a duplicate (a base+offset combination that's seen earlier).
2238  // Backtrack.
2239  --Loc;
2240  break;
2241  }
2242  }
2243 
2244  // Re-schedule loads.
2245  for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2246  unsigned Base = LdBases[i];
2247  SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2248  if (Lds.size() > 1)
2249  RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2250  }
2251 
2252  // Re-schedule stores.
2253  for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2254  unsigned Base = StBases[i];
2255  SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2256  if (Sts.size() > 1)
2257  RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2258  }
2259 
2260  if (MBBI != E) {
2261  Base2LdsMap.clear();
2262  Base2StsMap.clear();
2263  LdBases.clear();
2264  StBases.clear();
2265  }
2266  }
2267 
2268  return RetVal;
2269 }
2270 
2271 
2272 /// Returns an instance of the load / store optimization pass.
2274  if (PreAlloc)
2275  return new ARMPreAllocLoadStoreOpt();
2276  return new ARMLoadStoreOpt();
2277 }
bool isImplicit() const
static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
unsigned succ_size() const
const MachineFunction * getParent() const
getParent - Return the MachineFunction containing this basic block.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:104
static bool isT1i32Load(unsigned Opc)
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:223
static unsigned char getAM3Offset(unsigned AM3Opc)
STATISTIC(NumFunctions,"Total number of functions")
iterator_range< mop_iterator > uses()
Definition: MachineInstr.h:325
DenseSet - This implements a dense probed hash-table based set.
Definition: DenseSet.h:39
static unsigned getImmScale(unsigned Opc)
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:138
void setIsUndef(bool Val=true)
bool isDead() const
const ARMTargetLowering * getTargetLowering() const override
Definition: ARMSubtarget.h:265
size_type count(PtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:276
static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, unsigned Bytes, unsigned Limit, ARMCC::CondCodes Pred, unsigned PredReg)
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:264
A debug info location.
Definition: DebugLoc.h:34
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:240
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:295
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:419
This file defines the MallocAllocator and BumpPtrAllocator interfaces.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:242
MachineMemOperand - A description of a memory reference used in the backend.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APInt.h:33
bool isReg() const
isReg - Tests if this is a MO_Register operand.
static int getMemoryOpOffset(const MachineInstr *MI)
Reg
All possible values of the reg field in the ModR/M byte.
bool isUndef() const
Windows NT (Windows on ARM)
size_type size() const
Definition: SmallSet.h:48
const MachineInstrBuilder & addImm(int64_t Val) const
addImm - Add a new immediate operand.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:271
static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, MachineBasicBlock::iterator I, MachineBasicBlock::iterator E, SmallPtrSetImpl< MachineInstr * > &MemOps, SmallSet< unsigned, 4 > &MemRegs, const TargetRegisterInfo *TRI)
const HexagonRegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
bool isKill() const
static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: ArrayRef.h:31
uint64_t getAlignment() const
getAlignment - Return the minimum known alignment in bytes of the actual memory reference.
iterator getLastNonDebugInstr()
getLastNonDebugInstr - returns an iterator to the last non-debug instruction in the basic block...
int64_t getImm() const
unsigned getUndefRegState(bool B)
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, unsigned Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been <def>ined and not <kill>ed as of just before Before...
unsigned getKillRegState(bool B)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:267
static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:120
TargetInstrInfo - Interface to description of machine instruction set.
bool isDebugValue() const
Definition: MachineInstr.h:748
unsigned getDeadRegState(bool B)
mmo_iterator memoperands_end() const
Definition: MachineInstr.h:341
unsigned getDefRegState(bool B)
bundle_iterator< MachineInstr, instr_iterator > iterator
bool regsOverlap(unsigned regA, unsigned regB) const
regsOverlap - Returns true if the two registers are equal or alias each other.
static unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
size_type count(const ValueT &V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:65
ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition, otherwise returns AL.
static unsigned char getAM5Offset(unsigned AM5Opc)
static bool isT2i32Load(unsigned Opc)
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:32
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:273
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:264
Register is known to be dead.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:352
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:294
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:69
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
BuildMI - Builder interface.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
FunctionPass * createARMLoadStoreOptimizationPass(bool PreAlloc=false)
Returns an instance of the load / store optimization pass.
void setIsKill(bool Val=true)
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:53
static bool isi32Store(unsigned Opc)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool readsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
Definition: MachineInstr.h:836
bool memoperands_empty() const
Definition: MachineInstr.h:342
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:147
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:299
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
const DataLayout * getDataLayout() const
Deprecated in 3.7, will be removed in 3.8.
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
Definition: Allocator.h:349
static cl::opt< AlignMode > Align(cl::desc("Load/store alignment support"), cl::Hidden, cl::init(NoStrictAlign), cl::values(clEnumValN(StrictAlign,"aarch64-strict-align","Disallow all unaligned memory accesses"), clEnumValN(NoStrictAlign,"aarch64-no-strict-align","Allow unaligned memory accesses"), clEnumValEnd))
static bool isMemoryOp(const MachineInstr *MI)
Returns true if instruction is a memory operation that this pass is capable of operating on...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
static AddrOpc getAM3Op(unsigned AM3Opc)
static unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, unsigned IdxMode=0)
getAM3Opc - This function encodes the addrmode3 opc field.
iterator_range< mop_iterator > implicit_operands()
Definition: MachineInstr.h:309
static const MachineOperand & getLoadStoreBaseOp(const MachineInstr &MI)
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:238
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
static bool isARMLowRegister(unsigned Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:210
Representation of each machine instruction.
Definition: MachineInstr.h:51
static bool isT2i32Store(unsigned Opc)
bundle_iterator< const MachineInstr, const_instr_iterator > const_iterator
void copyImplicitOps(MachineFunction &MF, const MachineInstr *MI)
Copy implicit register operands from specified instruction to this instruction.
static bool definesCPSR(const MachineInstr *MI)
static bool isValidLSDoubleOffset(int Offset)
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
A set of live physical registers with functions to track liveness when walking backward/forward throu...
Definition: LivePhysRegs.h:43
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:54
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:403
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:651
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
static bool isLoadSingle(unsigned Opc)
static AddrOpc getAM5Op(unsigned AM5Opc)
static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, ARM_AM::AMSubMode Mode)
unsigned getReg() const
getReg - Returns the register number.
static bool isi32Load(unsigned Opc)
bool killsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr kills the specified register.
Definition: MachineInstr.h:857
static unsigned getLSMultipleTransferSize(const MachineInstr *MI)
static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, unsigned Bytes, unsigned Limit, ARMCC::CondCodes Pred, unsigned PredReg)
LLVM Value Representation.
Definition: Value.h:69
static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, MachineInstr *Op1)
Copy Op0 and Op1 operands into a new array assigned to MI.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:185
static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode)
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
BasicBlockListType::iterator iterator
static const MachineOperand & getLoadStoreRegOp(const MachineInstr &MI)
#define DEBUG(X)
Definition: Debug.h:92
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
static bool isVolatile(Instruction *Inst)
static bool isT1i32Store(unsigned Opc)
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
addReg - Add a new virtual register operand...
MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num)
allocateMemRefsArray - Allocate an array to hold MachineMemOperand pointers.
void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd)
Assign this MachineInstr's memory reference descriptor list.
static bool ContainsReg(const ArrayRef< std::pair< unsigned, bool >> &Regs, unsigned Reg)
static const MachineInstrBuilder & AddDefaultT1CC(const MachineInstrBuilder &MIB, bool isDead=false)
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:340
static void InsertLDR_STR(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int Offset, bool isDef, DebugLoc DL, unsigned NewOpc, unsigned Reg, bool RegDeadKill, bool RegUndef, unsigned BaseReg, bool BaseKill, bool BaseUndef, bool OffKill, bool OffUndef, ARMCC::CondCodes Pred, unsigned PredReg, const TargetInstrInfo *TII, bool isT2)
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly. ...