LLVM  10.0.0svn
ARMLoadStoreOptimizer.cpp
Go to the documentation of this file.
1 //===- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file contains a pass that performs load / store related peephole
10 /// optimizations. This pass should be run after register allocation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARM.h"
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMBaseRegisterInfo.h"
17 #include "ARMISelLowering.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMSubtarget.h"
22 #include "Utils/ARMBaseInfo.h"
23 #include "llvm/ADT/ArrayRef.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/DenseSet.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/ADT/SmallPtrSet.h"
28 #include "llvm/ADT/SmallSet.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/Statistic.h"
48 #include "llvm/IR/DataLayout.h"
49 #include "llvm/IR/DebugLoc.h"
50 #include "llvm/IR/DerivedTypes.h"
51 #include "llvm/IR/Function.h"
52 #include "llvm/IR/Type.h"
53 #include "llvm/MC/MCInstrDesc.h"
54 #include "llvm/Pass.h"
55 #include "llvm/Support/Allocator.h"
57 #include "llvm/Support/Debug.h"
60 #include <algorithm>
61 #include <cassert>
62 #include <cstddef>
63 #include <cstdlib>
64 #include <iterator>
65 #include <limits>
66 #include <utility>
67 
68 using namespace llvm;
69 
70 #define DEBUG_TYPE "arm-ldst-opt"
71 
72 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
73 STATISTIC(NumSTMGened , "Number of stm instructions generated");
74 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
75 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
76 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
77 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
78 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
79 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
80 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
81 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
82 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
83 
84 /// This switch disables formation of double/multi instructions that could
85 /// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP
86 /// disabled. This can be used to create libraries that are robust even when
87 /// users provoke undefined behaviour by supplying misaligned pointers.
88 /// \see mayCombineMisaligned()
89 static cl::opt<bool>
90 AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden,
91  cl::init(false), cl::desc("Be more conservative in ARM load/store opt"));
92 
93 #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
94 
95 namespace {
96 
97  /// Post- register allocation pass the combine load / store instructions to
98  /// form ldm / stm instructions.
99  struct ARMLoadStoreOpt : public MachineFunctionPass {
100  static char ID;
101 
102  const MachineFunction *MF;
103  const TargetInstrInfo *TII;
104  const TargetRegisterInfo *TRI;
105  const ARMSubtarget *STI;
106  const TargetLowering *TL;
107  ARMFunctionInfo *AFI;
108  LivePhysRegs LiveRegs;
109  RegisterClassInfo RegClassInfo;
111  bool LiveRegsValid;
112  bool RegClassInfoValid;
113  bool isThumb1, isThumb2;
114 
115  ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
116 
117  bool runOnMachineFunction(MachineFunction &Fn) override;
118 
119  MachineFunctionProperties getRequiredProperties() const override {
122  }
123 
124  StringRef getPassName() const override { return ARM_LOAD_STORE_OPT_NAME; }
125 
126  private:
127  /// A set of load/store MachineInstrs with same base register sorted by
128  /// offset.
129  struct MemOpQueueEntry {
130  MachineInstr *MI;
131  int Offset; ///< Load/Store offset.
132  unsigned Position; ///< Position as counted from end of basic block.
133 
134  MemOpQueueEntry(MachineInstr &MI, int Offset, unsigned Position)
135  : MI(&MI), Offset(Offset), Position(Position) {}
136  };
137  using MemOpQueue = SmallVector<MemOpQueueEntry, 8>;
138 
139  /// A set of MachineInstrs that fulfill (nearly all) conditions to get
140  /// merged into a LDM/STM.
141  struct MergeCandidate {
142  /// List of instructions ordered by load/store offset.
144 
145  /// Index in Instrs of the instruction being latest in the schedule.
146  unsigned LatestMIIdx;
147 
148  /// Index in Instrs of the instruction being earliest in the schedule.
149  unsigned EarliestMIIdx;
150 
151  /// Index into the basic block where the merged instruction will be
152  /// inserted. (See MemOpQueueEntry.Position)
153  unsigned InsertPos;
154 
155  /// Whether the instructions can be merged into a ldm/stm instruction.
156  bool CanMergeToLSMulti;
157 
158  /// Whether the instructions can be merged into a ldrd/strd instruction.
159  bool CanMergeToLSDouble;
160  };
163  SmallVector<MachineInstr*,4> MergeBaseCandidates;
164 
165  void moveLiveRegsBefore(const MachineBasicBlock &MBB,
167  unsigned findFreeReg(const TargetRegisterClass &RegClass);
168  void UpdateBaseRegUses(MachineBasicBlock &MBB,
169  MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
170  unsigned Base, unsigned WordOffset,
171  ARMCC::CondCodes Pred, unsigned PredReg);
172  MachineInstr *CreateLoadStoreMulti(
174  int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
175  ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
176  ArrayRef<std::pair<unsigned, bool>> Regs,
177  ArrayRef<MachineInstr*> Instrs);
178  MachineInstr *CreateLoadStoreDouble(
180  int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
181  ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
182  ArrayRef<std::pair<unsigned, bool>> Regs,
183  ArrayRef<MachineInstr*> Instrs) const;
184  void FormCandidates(const MemOpQueue &MemOps);
185  MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
186  bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
188  bool MergeBaseUpdateLoadStore(MachineInstr *MI);
189  bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
190  bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
191  bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
192  bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
193  bool CombineMovBx(MachineBasicBlock &MBB);
194  };
195 
196 } // end anonymous namespace
197 
198 char ARMLoadStoreOpt::ID = 0;
199 
200 INITIALIZE_PASS(ARMLoadStoreOpt, "arm-ldst-opt", ARM_LOAD_STORE_OPT_NAME, false,
201  false)
202 
203 static bool definesCPSR(const MachineInstr &MI) {
204  for (const auto &MO : MI.operands()) {
205  if (!MO.isReg())
206  continue;
207  if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
208  // If the instruction has live CPSR def, then it's not safe to fold it
209  // into load / store.
210  return true;
211  }
212 
213  return false;
214 }
215 
216 static int getMemoryOpOffset(const MachineInstr &MI) {
217  unsigned Opcode = MI.getOpcode();
218  bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
219  unsigned NumOperands = MI.getDesc().getNumOperands();
220  unsigned OffField = MI.getOperand(NumOperands - 3).getImm();
221 
222  if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
223  Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
224  Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
225  Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
226  return OffField;
227 
228  // Thumb1 immediate offsets are scaled by 4
229  if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
230  Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
231  return OffField * 4;
232 
233  int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
234  : ARM_AM::getAM5Offset(OffField) * 4;
235  ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
236  : ARM_AM::getAM5Op(OffField);
237 
238  if (Op == ARM_AM::sub)
239  return -Offset;
240 
241  return Offset;
242 }
243 
245  return MI.getOperand(1);
246 }
247 
249  return MI.getOperand(0);
250 }
251 
252 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
253  switch (Opcode) {
254  default: llvm_unreachable("Unhandled opcode!");
255  case ARM::LDRi12:
256  ++NumLDMGened;
257  switch (Mode) {
258  default: llvm_unreachable("Unhandled submode!");
259  case ARM_AM::ia: return ARM::LDMIA;
260  case ARM_AM::da: return ARM::LDMDA;
261  case ARM_AM::db: return ARM::LDMDB;
262  case ARM_AM::ib: return ARM::LDMIB;
263  }
264  case ARM::STRi12:
265  ++NumSTMGened;
266  switch (Mode) {
267  default: llvm_unreachable("Unhandled submode!");
268  case ARM_AM::ia: return ARM::STMIA;
269  case ARM_AM::da: return ARM::STMDA;
270  case ARM_AM::db: return ARM::STMDB;
271  case ARM_AM::ib: return ARM::STMIB;
272  }
273  case ARM::tLDRi:
274  case ARM::tLDRspi:
275  // tLDMIA is writeback-only - unless the base register is in the input
276  // reglist.
277  ++NumLDMGened;
278  switch (Mode) {
279  default: llvm_unreachable("Unhandled submode!");
280  case ARM_AM::ia: return ARM::tLDMIA;
281  }
282  case ARM::tSTRi:
283  case ARM::tSTRspi:
284  // There is no non-writeback tSTMIA either.
285  ++NumSTMGened;
286  switch (Mode) {
287  default: llvm_unreachable("Unhandled submode!");
288  case ARM_AM::ia: return ARM::tSTMIA_UPD;
289  }
290  case ARM::t2LDRi8:
291  case ARM::t2LDRi12:
292  ++NumLDMGened;
293  switch (Mode) {
294  default: llvm_unreachable("Unhandled submode!");
295  case ARM_AM::ia: return ARM::t2LDMIA;
296  case ARM_AM::db: return ARM::t2LDMDB;
297  }
298  case ARM::t2STRi8:
299  case ARM::t2STRi12:
300  ++NumSTMGened;
301  switch (Mode) {
302  default: llvm_unreachable("Unhandled submode!");
303  case ARM_AM::ia: return ARM::t2STMIA;
304  case ARM_AM::db: return ARM::t2STMDB;
305  }
306  case ARM::VLDRS:
307  ++NumVLDMGened;
308  switch (Mode) {
309  default: llvm_unreachable("Unhandled submode!");
310  case ARM_AM::ia: return ARM::VLDMSIA;
311  case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
312  }
313  case ARM::VSTRS:
314  ++NumVSTMGened;
315  switch (Mode) {
316  default: llvm_unreachable("Unhandled submode!");
317  case ARM_AM::ia: return ARM::VSTMSIA;
318  case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
319  }
320  case ARM::VLDRD:
321  ++NumVLDMGened;
322  switch (Mode) {
323  default: llvm_unreachable("Unhandled submode!");
324  case ARM_AM::ia: return ARM::VLDMDIA;
325  case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
326  }
327  case ARM::VSTRD:
328  ++NumVSTMGened;
329  switch (Mode) {
330  default: llvm_unreachable("Unhandled submode!");
331  case ARM_AM::ia: return ARM::VSTMDIA;
332  case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
333  }
334  }
335 }
336 
338  switch (Opcode) {
339  default: llvm_unreachable("Unhandled opcode!");
340  case ARM::LDMIA_RET:
341  case ARM::LDMIA:
342  case ARM::LDMIA_UPD:
343  case ARM::STMIA:
344  case ARM::STMIA_UPD:
345  case ARM::tLDMIA:
346  case ARM::tLDMIA_UPD:
347  case ARM::tSTMIA_UPD:
348  case ARM::t2LDMIA_RET:
349  case ARM::t2LDMIA:
350  case ARM::t2LDMIA_UPD:
351  case ARM::t2STMIA:
352  case ARM::t2STMIA_UPD:
353  case ARM::VLDMSIA:
354  case ARM::VLDMSIA_UPD:
355  case ARM::VSTMSIA:
356  case ARM::VSTMSIA_UPD:
357  case ARM::VLDMDIA:
358  case ARM::VLDMDIA_UPD:
359  case ARM::VSTMDIA:
360  case ARM::VSTMDIA_UPD:
361  return ARM_AM::ia;
362 
363  case ARM::LDMDA:
364  case ARM::LDMDA_UPD:
365  case ARM::STMDA:
366  case ARM::STMDA_UPD:
367  return ARM_AM::da;
368 
369  case ARM::LDMDB:
370  case ARM::LDMDB_UPD:
371  case ARM::STMDB:
372  case ARM::STMDB_UPD:
373  case ARM::t2LDMDB:
374  case ARM::t2LDMDB_UPD:
375  case ARM::t2STMDB:
376  case ARM::t2STMDB_UPD:
377  case ARM::VLDMSDB_UPD:
378  case ARM::VSTMSDB_UPD:
379  case ARM::VLDMDDB_UPD:
380  case ARM::VSTMDDB_UPD:
381  return ARM_AM::db;
382 
383  case ARM::LDMIB:
384  case ARM::LDMIB_UPD:
385  case ARM::STMIB:
386  case ARM::STMIB_UPD:
387  return ARM_AM::ib;
388  }
389 }
390 
391 static bool isT1i32Load(unsigned Opc) {
392  return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
393 }
394 
395 static bool isT2i32Load(unsigned Opc) {
396  return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
397 }
398 
399 static bool isi32Load(unsigned Opc) {
400  return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
401 }
402 
403 static bool isT1i32Store(unsigned Opc) {
404  return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
405 }
406 
407 static bool isT2i32Store(unsigned Opc) {
408  return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
409 }
410 
411 static bool isi32Store(unsigned Opc) {
412  return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
413 }
414 
415 static bool isLoadSingle(unsigned Opc) {
416  return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
417 }
418 
419 static unsigned getImmScale(unsigned Opc) {
420  switch (Opc) {
421  default: llvm_unreachable("Unhandled opcode!");
422  case ARM::tLDRi:
423  case ARM::tSTRi:
424  case ARM::tLDRspi:
425  case ARM::tSTRspi:
426  return 1;
427  case ARM::tLDRHi:
428  case ARM::tSTRHi:
429  return 2;
430  case ARM::tLDRBi:
431  case ARM::tSTRBi:
432  return 4;
433  }
434 }
435 
436 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
437  switch (MI->getOpcode()) {
438  default: return 0;
439  case ARM::LDRi12:
440  case ARM::STRi12:
441  case ARM::tLDRi:
442  case ARM::tSTRi:
443  case ARM::tLDRspi:
444  case ARM::tSTRspi:
445  case ARM::t2LDRi8:
446  case ARM::t2LDRi12:
447  case ARM::t2STRi8:
448  case ARM::t2STRi12:
449  case ARM::VLDRS:
450  case ARM::VSTRS:
451  return 4;
452  case ARM::VLDRD:
453  case ARM::VSTRD:
454  return 8;
455  case ARM::LDMIA:
456  case ARM::LDMDA:
457  case ARM::LDMDB:
458  case ARM::LDMIB:
459  case ARM::STMIA:
460  case ARM::STMDA:
461  case ARM::STMDB:
462  case ARM::STMIB:
463  case ARM::tLDMIA:
464  case ARM::tLDMIA_UPD:
465  case ARM::tSTMIA_UPD:
466  case ARM::t2LDMIA:
467  case ARM::t2LDMDB:
468  case ARM::t2STMIA:
469  case ARM::t2STMDB:
470  case ARM::VLDMSIA:
471  case ARM::VSTMSIA:
472  return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
473  case ARM::VLDMDIA:
474  case ARM::VSTMDIA:
475  return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
476  }
477 }
478 
479 /// Update future uses of the base register with the offset introduced
480 /// due to writeback. This function only works on Thumb1.
481 void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
483  const DebugLoc &DL, unsigned Base,
484  unsigned WordOffset,
485  ARMCC::CondCodes Pred,
486  unsigned PredReg) {
487  assert(isThumb1 && "Can only update base register uses for Thumb1!");
488  // Start updating any instructions with immediate offsets. Insert a SUB before
489  // the first non-updateable instruction (if any).
490  for (; MBBI != MBB.end(); ++MBBI) {
491  bool InsertSub = false;
492  unsigned Opc = MBBI->getOpcode();
493 
494  if (MBBI->readsRegister(Base)) {
495  int Offset;
496  bool IsLoad =
497  Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
498  bool IsStore =
499  Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
500 
501  if (IsLoad || IsStore) {
502  // Loads and stores with immediate offsets can be updated, but only if
503  // the new offset isn't negative.
504  // The MachineOperand containing the offset immediate is the last one
505  // before predicates.
506  MachineOperand &MO =
507  MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
508  // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
509  Offset = MO.getImm() - WordOffset * getImmScale(Opc);
510 
511  // If storing the base register, it needs to be reset first.
512  Register InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
513 
514  if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
515  MO.setImm(Offset);
516  else
517  InsertSub = true;
518  } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
519  !definesCPSR(*MBBI)) {
520  // SUBS/ADDS using this register, with a dead def of the CPSR.
521  // Merge it with the update; if the merged offset is too large,
522  // insert a new sub instead.
523  MachineOperand &MO =
524  MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
525  Offset = (Opc == ARM::tSUBi8) ?
526  MO.getImm() + WordOffset * 4 :
527  MO.getImm() - WordOffset * 4 ;
528  if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
529  // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
530  // Offset == 0.
531  MO.setImm(Offset);
532  // The base register has now been reset, so exit early.
533  return;
534  } else {
535  InsertSub = true;
536  }
537  } else {
538  // Can't update the instruction.
539  InsertSub = true;
540  }
541  } else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) {
542  // Since SUBS sets the condition flags, we can't place the base reset
543  // after an instruction that has a live CPSR def.
544  // The base register might also contain an argument for a function call.
545  InsertSub = true;
546  }
547 
548  if (InsertSub) {
549  // An instruction above couldn't be updated, so insert a sub.
550  BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
551  .add(t1CondCodeOp(true))
552  .addReg(Base)
553  .addImm(WordOffset * 4)
554  .addImm(Pred)
555  .addReg(PredReg);
556  return;
557  }
558 
559  if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
560  // Register got killed. Stop updating.
561  return;
562  }
563 
564  // End of block was reached.
565  if (MBB.succ_size() > 0) {
566  // FIXME: Because of a bug, live registers are sometimes missing from
567  // the successor blocks' live-in sets. This means we can't trust that
568  // information and *always* have to reset at the end of a block.
569  // See PR21029.
570  if (MBBI != MBB.end()) --MBBI;
571  BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
572  .add(t1CondCodeOp(true))
573  .addReg(Base)
574  .addImm(WordOffset * 4)
575  .addImm(Pred)
576  .addReg(PredReg);
577  }
578 }
579 
580 /// Return the first register of class \p RegClass that is not in \p Regs.
581 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
582  if (!RegClassInfoValid) {
583  RegClassInfo.runOnMachineFunction(*MF);
584  RegClassInfoValid = true;
585  }
586 
587  for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
588  if (!LiveRegs.contains(Reg))
589  return Reg;
590  return 0;
591 }
592 
593 /// Compute live registers just before instruction \p Before (in normal schedule
594 /// direction). Computes backwards so multiple queries in the same block must
595 /// come in reverse order.
596 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
598  // Initialize if we never queried in this block.
599  if (!LiveRegsValid) {
600  LiveRegs.init(*TRI);
601  LiveRegs.addLiveOuts(MBB);
602  LiveRegPos = MBB.end();
603  LiveRegsValid = true;
604  }
605  // Move backward just before the "Before" position.
606  while (LiveRegPos != Before) {
607  --LiveRegPos;
608  LiveRegs.stepBackward(*LiveRegPos);
609  }
610 }
611 
612 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
613  unsigned Reg) {
614  for (const std::pair<unsigned, bool> &R : Regs)
615  if (R.first == Reg)
616  return true;
617  return false;
618 }
619 
620 /// Create and insert a LDM or STM with Base as base register and registers in
621 /// Regs as the register operands that would be loaded / stored. It returns
622 /// true if the transformation is done.
623 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
625  int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
626  ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
627  ArrayRef<std::pair<unsigned, bool>> Regs,
628  ArrayRef<MachineInstr*> Instrs) {
629  unsigned NumRegs = Regs.size();
630  assert(NumRegs > 1);
631 
632  // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
633  // Compute liveness information for that register to make the decision.
634  bool SafeToClobberCPSR = !isThumb1 ||
635  (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
637 
638  bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
639 
640  // Exception: If the base register is in the input reglist, Thumb1 LDM is
641  // non-writeback.
642  // It's also not possible to merge an STR of the base register in Thumb1.
643  if (isThumb1 && ContainsReg(Regs, Base)) {
644  assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
645  if (Opcode == ARM::tLDRi)
646  Writeback = false;
647  else if (Opcode == ARM::tSTRi)
648  return nullptr;
649  }
650 
652  // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
653  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
654  bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
655 
656  if (Offset == 4 && haveIBAndDA) {
657  Mode = ARM_AM::ib;
658  } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
659  Mode = ARM_AM::da;
660  } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
661  // VLDM/VSTM do not support DB mode without also updating the base reg.
662  Mode = ARM_AM::db;
663  } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
664  // Check if this is a supported opcode before inserting instructions to
665  // calculate a new base register.
666  if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
667 
668  // If starting offset isn't zero, insert a MI to materialize a new base.
669  // But only do so if it is cost effective, i.e. merging more than two
670  // loads / stores.
671  if (NumRegs <= 2)
672  return nullptr;
673 
674  // On Thumb1, it's not worth materializing a new base register without
675  // clobbering the CPSR (i.e. not using ADDS/SUBS).
676  if (!SafeToClobberCPSR)
677  return nullptr;
678 
679  unsigned NewBase;
680  if (isi32Load(Opcode)) {
681  // If it is a load, then just use one of the destination registers
682  // as the new base. Will no longer be writeback in Thumb1.
683  NewBase = Regs[NumRegs-1].first;
684  Writeback = false;
685  } else {
686  // Find a free register that we can use as scratch register.
687  moveLiveRegsBefore(MBB, InsertBefore);
688  // The merged instruction does not exist yet but will use several Regs if
689  // it is a Store.
690  if (!isLoadSingle(Opcode))
691  for (const std::pair<unsigned, bool> &R : Regs)
692  LiveRegs.addReg(R.first);
693 
694  NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
695  if (NewBase == 0)
696  return nullptr;
697  }
698 
699  int BaseOpc =
700  isThumb2 ? ARM::t2ADDri :
701  (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
702  (isThumb1 && Offset < 8) ? ARM::tADDi3 :
703  isThumb1 ? ARM::tADDi8 : ARM::ADDri;
704 
705  if (Offset < 0) {
706  Offset = - Offset;
707  BaseOpc =
708  isThumb2 ? ARM::t2SUBri :
709  (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
710  isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
711  }
712 
713  if (!TL->isLegalAddImmediate(Offset))
714  // FIXME: Try add with register operand?
715  return nullptr; // Probably not worth it then.
716 
717  // We can only append a kill flag to the add/sub input if the value is not
718  // used in the register list of the stm as well.
719  bool KillOldBase = BaseKill &&
720  (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
721 
722  if (isThumb1) {
723  // Thumb1: depending on immediate size, use either
724  // ADDS NewBase, Base, #imm3
725  // or
726  // MOV NewBase, Base
727  // ADDS NewBase, #imm8.
728  if (Base != NewBase &&
729  (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
730  // Need to insert a MOV to the new base first.
731  if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
732  !STI->hasV6Ops()) {
733  // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
734  if (Pred != ARMCC::AL)
735  return nullptr;
736  BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
737  .addReg(Base, getKillRegState(KillOldBase));
738  } else
739  BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
740  .addReg(Base, getKillRegState(KillOldBase))
741  .add(predOps(Pred, PredReg));
742 
743  // The following ADDS/SUBS becomes an update.
744  Base = NewBase;
745  KillOldBase = true;
746  }
747  if (BaseOpc == ARM::tADDrSPi) {
748  assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
749  BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
750  .addReg(Base, getKillRegState(KillOldBase))
751  .addImm(Offset / 4)
752  .add(predOps(Pred, PredReg));
753  } else
754  BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
755  .add(t1CondCodeOp(true))
756  .addReg(Base, getKillRegState(KillOldBase))
757  .addImm(Offset)
758  .add(predOps(Pred, PredReg));
759  } else {
760  BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
761  .addReg(Base, getKillRegState(KillOldBase))
762  .addImm(Offset)
763  .add(predOps(Pred, PredReg))
764  .add(condCodeOp());
765  }
766  Base = NewBase;
767  BaseKill = true; // New base is always killed straight away.
768  }
769 
770  bool isDef = isLoadSingle(Opcode);
771 
772  // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
773  // base register writeback.
774  Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
775  if (!Opcode)
776  return nullptr;
777 
778  // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
779  // - There is no writeback (LDM of base register),
780  // - the base register is killed by the merged instruction,
781  // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
782  // to reset the base register.
783  // Otherwise, don't merge.
784  // It's safe to return here since the code to materialize a new base register
785  // above is also conditional on SafeToClobberCPSR.
786  if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
787  return nullptr;
788 
790 
791  if (Writeback) {
792  assert(isThumb1 && "expected Writeback only inThumb1");
793  if (Opcode == ARM::tLDMIA) {
794  assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
795  // Update tLDMIA with writeback if necessary.
796  Opcode = ARM::tLDMIA_UPD;
797  }
798 
799  MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
800 
801  // Thumb1: we might need to set base writeback when building the MI.
802  MIB.addReg(Base, getDefRegState(true))
803  .addReg(Base, getKillRegState(BaseKill));
804 
805  // The base isn't dead after a merged instruction with writeback.
806  // Insert a sub instruction after the newly formed instruction to reset.
807  if (!BaseKill)
808  UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
809  } else {
810  // No writeback, simply build the MachineInstr.
811  MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
812  MIB.addReg(Base, getKillRegState(BaseKill));
813  }
814 
815  MIB.addImm(Pred).addReg(PredReg);
816 
817  for (const std::pair<unsigned, bool> &R : Regs)
818  MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
819 
820  MIB.cloneMergedMemRefs(Instrs);
821 
822  return MIB.getInstr();
823 }
824 
825 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
827  int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
828  ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
829  ArrayRef<std::pair<unsigned, bool>> Regs,
830  ArrayRef<MachineInstr*> Instrs) const {
831  bool IsLoad = isi32Load(Opcode);
832  assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
833  unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
834 
835  assert(Regs.size() == 2);
836  MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
837  TII->get(LoadStoreOpcode));
838  if (IsLoad) {
839  MIB.addReg(Regs[0].first, RegState::Define)
840  .addReg(Regs[1].first, RegState::Define);
841  } else {
842  MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
843  .addReg(Regs[1].first, getKillRegState(Regs[1].second));
844  }
845  MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
846  MIB.cloneMergedMemRefs(Instrs);
847  return MIB.getInstr();
848 }
849 
850 /// Call MergeOps and update MemOps and merges accordingly on success.
851 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
852  const MachineInstr *First = Cand.Instrs.front();
853  unsigned Opcode = First->getOpcode();
854  bool IsLoad = isLoadSingle(Opcode);
856  SmallVector<unsigned, 4> ImpDefs;
857  DenseSet<unsigned> KilledRegs;
858  DenseSet<unsigned> UsedRegs;
859  // Determine list of registers and list of implicit super-register defs.
860  for (const MachineInstr *MI : Cand.Instrs) {
861  const MachineOperand &MO = getLoadStoreRegOp(*MI);
862  Register Reg = MO.getReg();
863  bool IsKill = MO.isKill();
864  if (IsKill)
865  KilledRegs.insert(Reg);
866  Regs.push_back(std::make_pair(Reg, IsKill));
867  UsedRegs.insert(Reg);
868 
869  if (IsLoad) {
870  // Collect any implicit defs of super-registers, after merging we can't
871  // be sure anymore that we properly preserved these live ranges and must
872  // removed these implicit operands.
873  for (const MachineOperand &MO : MI->implicit_operands()) {
874  if (!MO.isReg() || !MO.isDef() || MO.isDead())
875  continue;
876  assert(MO.isImplicit());
877  Register DefReg = MO.getReg();
878 
879  if (is_contained(ImpDefs, DefReg))
880  continue;
881  // We can ignore cases where the super-reg is read and written.
882  if (MI->readsRegister(DefReg))
883  continue;
884  ImpDefs.push_back(DefReg);
885  }
886  }
887  }
888 
889  // Attempt the merge.
890  using iterator = MachineBasicBlock::iterator;
891 
892  MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
893  iterator InsertBefore = std::next(iterator(LatestMI));
894  MachineBasicBlock &MBB = *LatestMI->getParent();
895  unsigned Offset = getMemoryOpOffset(*First);
896  Register Base = getLoadStoreBaseOp(*First).getReg();
897  bool BaseKill = LatestMI->killsRegister(Base);
898  unsigned PredReg = 0;
899  ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg);
900  DebugLoc DL = First->getDebugLoc();
901  MachineInstr *Merged = nullptr;
902  if (Cand.CanMergeToLSDouble)
903  Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
904  Opcode, Pred, PredReg, DL, Regs,
905  Cand.Instrs);
906  if (!Merged && Cand.CanMergeToLSMulti)
907  Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
908  Opcode, Pred, PredReg, DL, Regs, Cand.Instrs);
909  if (!Merged)
910  return nullptr;
911 
912  // Determine earliest instruction that will get removed. We then keep an
913  // iterator just above it so the following erases don't invalidated it.
914  iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
915  bool EarliestAtBegin = false;
916  if (EarliestI == MBB.begin()) {
917  EarliestAtBegin = true;
918  } else {
919  EarliestI = std::prev(EarliestI);
920  }
921 
922  // Remove instructions which have been merged.
923  for (MachineInstr *MI : Cand.Instrs)
924  MBB.erase(MI);
925 
926  // Determine range between the earliest removed instruction and the new one.
927  if (EarliestAtBegin)
928  EarliestI = MBB.begin();
929  else
930  EarliestI = std::next(EarliestI);
931  auto FixupRange = make_range(EarliestI, iterator(Merged));
932 
933  if (isLoadSingle(Opcode)) {
934  // If the previous loads defined a super-reg, then we have to mark earlier
935  // operands undef; Replicate the super-reg def on the merged instruction.
936  for (MachineInstr &MI : FixupRange) {
937  for (unsigned &ImpDefReg : ImpDefs) {
938  for (MachineOperand &MO : MI.implicit_operands()) {
939  if (!MO.isReg() || MO.getReg() != ImpDefReg)
940  continue;
941  if (MO.readsReg())
942  MO.setIsUndef();
943  else if (MO.isDef())
944  ImpDefReg = 0;
945  }
946  }
947  }
948 
949  MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
950  for (unsigned ImpDef : ImpDefs)
951  MIB.addReg(ImpDef, RegState::ImplicitDefine);
952  } else {
953  // Remove kill flags: We are possibly storing the values later now.
954  assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
955  for (MachineInstr &MI : FixupRange) {
956  for (MachineOperand &MO : MI.uses()) {
957  if (!MO.isReg() || !MO.isKill())
958  continue;
959  if (UsedRegs.count(MO.getReg()))
960  MO.setIsKill(false);
961  }
962  }
963  assert(ImpDefs.empty());
964  }
965 
966  return Merged;
967 }
968 
969 static bool isValidLSDoubleOffset(int Offset) {
970  unsigned Value = abs(Offset);
971  // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
972  // multiplied by 4.
973  return (Value % 4) == 0 && Value < 1024;
974 }
975 
976 /// Return true for loads/stores that can be combined to a double/multi
977 /// operation without increasing the requirements for alignment.
979  const MachineInstr &MI) {
980  // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no
981  // difference.
982  unsigned Opcode = MI.getOpcode();
983  if (!isi32Load(Opcode) && !isi32Store(Opcode))
984  return true;
985 
986  // Stack pointer alignment is out of the programmers control so we can trust
987  // SP-relative loads/stores.
988  if (getLoadStoreBaseOp(MI).getReg() == ARM::SP &&
990  return true;
991  return false;
992 }
993 
994 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
995 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
996  const MachineInstr *FirstMI = MemOps[0].MI;
997  unsigned Opcode = FirstMI->getOpcode();
998  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
999  unsigned Size = getLSMultipleTransferSize(FirstMI);
1000 
1001  unsigned SIndex = 0;
1002  unsigned EIndex = MemOps.size();
1003  do {
1004  // Look at the first instruction.
1005  const MachineInstr *MI = MemOps[SIndex].MI;
1006  int Offset = MemOps[SIndex].Offset;
1007  const MachineOperand &PMO = getLoadStoreRegOp(*MI);
1008  Register PReg = PMO.getReg();
1009  unsigned PRegNum = PMO.isUndef() ? std::numeric_limits<unsigned>::max()
1010  : TRI->getEncodingValue(PReg);
1011  unsigned Latest = SIndex;
1012  unsigned Earliest = SIndex;
1013  unsigned Count = 1;
1014  bool CanMergeToLSDouble =
1015  STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
1016  // ARM errata 602117: LDRD with base in list may result in incorrect base
1017  // register when interrupted or faulted.
1018  if (STI->isCortexM3() && isi32Load(Opcode) &&
1019  PReg == getLoadStoreBaseOp(*MI).getReg())
1020  CanMergeToLSDouble = false;
1021 
1022  bool CanMergeToLSMulti = true;
1023  // On swift vldm/vstm starting with an odd register number as that needs
1024  // more uops than single vldrs.
1025  if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
1026  CanMergeToLSMulti = false;
1027 
1028  // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
1029  // deprecated; LDM to PC is fine but cannot happen here.
1030  if (PReg == ARM::SP || PReg == ARM::PC)
1031  CanMergeToLSMulti = CanMergeToLSDouble = false;
1032 
1033  // Should we be conservative?
1035  CanMergeToLSMulti = CanMergeToLSDouble = false;
1036 
1037  // vldm / vstm limit are 32 for S variants, 16 for D variants.
1038  unsigned Limit;
1039  switch (Opcode) {
1040  default:
1041  Limit = UINT_MAX;
1042  break;
1043  case ARM::VLDRD:
1044  case ARM::VSTRD:
1045  Limit = 16;
1046  break;
1047  }
1048 
1049  // Merge following instructions where possible.
1050  for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
1051  int NewOffset = MemOps[I].Offset;
1052  if (NewOffset != Offset + (int)Size)
1053  break;
1054  const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
1055  Register Reg = MO.getReg();
1056  if (Reg == ARM::SP || Reg == ARM::PC)
1057  break;
1058  if (Count == Limit)
1059  break;
1060 
1061  // See if the current load/store may be part of a multi load/store.
1062  unsigned RegNum = MO.isUndef() ? std::numeric_limits<unsigned>::max()
1063  : TRI->getEncodingValue(Reg);
1064  bool PartOfLSMulti = CanMergeToLSMulti;
1065  if (PartOfLSMulti) {
1066  // Register numbers must be in ascending order.
1067  if (RegNum <= PRegNum)
1068  PartOfLSMulti = false;
1069  // For VFP / NEON load/store multiples, the registers must be
1070  // consecutive and within the limit on the number of registers per
1071  // instruction.
1072  else if (!isNotVFP && RegNum != PRegNum+1)
1073  PartOfLSMulti = false;
1074  }
1075  // See if the current load/store may be part of a double load/store.
1076  bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
1077 
1078  if (!PartOfLSMulti && !PartOfLSDouble)
1079  break;
1080  CanMergeToLSMulti &= PartOfLSMulti;
1081  CanMergeToLSDouble &= PartOfLSDouble;
1082  // Track MemOp with latest and earliest position (Positions are
1083  // counted in reverse).
1084  unsigned Position = MemOps[I].Position;
1085  if (Position < MemOps[Latest].Position)
1086  Latest = I;
1087  else if (Position > MemOps[Earliest].Position)
1088  Earliest = I;
1089  // Prepare for next MemOp.
1090  Offset += Size;
1091  PRegNum = RegNum;
1092  }
1093 
1094  // Form a candidate from the Ops collected so far.
1095  MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
1096  for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
1097  Candidate->Instrs.push_back(MemOps[C].MI);
1098  Candidate->LatestMIIdx = Latest - SIndex;
1099  Candidate->EarliestMIIdx = Earliest - SIndex;
1100  Candidate->InsertPos = MemOps[Latest].Position;
1101  if (Count == 1)
1102  CanMergeToLSMulti = CanMergeToLSDouble = false;
1103  Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
1104  Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
1105  Candidates.push_back(Candidate);
1106  // Continue after the chain.
1107  SIndex += Count;
1108  } while (SIndex < EIndex);
1109 }
1110 
1111 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1113  switch (Opc) {
1114  default: llvm_unreachable("Unhandled opcode!");
1115  case ARM::LDMIA:
1116  case ARM::LDMDA:
1117  case ARM::LDMDB:
1118  case ARM::LDMIB:
1119  switch (Mode) {
1120  default: llvm_unreachable("Unhandled submode!");
1121  case ARM_AM::ia: return ARM::LDMIA_UPD;
1122  case ARM_AM::ib: return ARM::LDMIB_UPD;
1123  case ARM_AM::da: return ARM::LDMDA_UPD;
1124  case ARM_AM::db: return ARM::LDMDB_UPD;
1125  }
1126  case ARM::STMIA:
1127  case ARM::STMDA:
1128  case ARM::STMDB:
1129  case ARM::STMIB:
1130  switch (Mode) {
1131  default: llvm_unreachable("Unhandled submode!");
1132  case ARM_AM::ia: return ARM::STMIA_UPD;
1133  case ARM_AM::ib: return ARM::STMIB_UPD;
1134  case ARM_AM::da: return ARM::STMDA_UPD;
1135  case ARM_AM::db: return ARM::STMDB_UPD;
1136  }
1137  case ARM::t2LDMIA:
1138  case ARM::t2LDMDB:
1139  switch (Mode) {
1140  default: llvm_unreachable("Unhandled submode!");
1141  case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1142  case ARM_AM::db: return ARM::t2LDMDB_UPD;
1143  }
1144  case ARM::t2STMIA:
1145  case ARM::t2STMDB:
1146  switch (Mode) {
1147  default: llvm_unreachable("Unhandled submode!");
1148  case ARM_AM::ia: return ARM::t2STMIA_UPD;
1149  case ARM_AM::db: return ARM::t2STMDB_UPD;
1150  }
1151  case ARM::VLDMSIA:
1152  switch (Mode) {
1153  default: llvm_unreachable("Unhandled submode!");
1154  case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1155  case ARM_AM::db: return ARM::VLDMSDB_UPD;
1156  }
1157  case ARM::VLDMDIA:
1158  switch (Mode) {
1159  default: llvm_unreachable("Unhandled submode!");
1160  case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1161  case ARM_AM::db: return ARM::VLDMDDB_UPD;
1162  }
1163  case ARM::VSTMSIA:
1164  switch (Mode) {
1165  default: llvm_unreachable("Unhandled submode!");
1166  case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1167  case ARM_AM::db: return ARM::VSTMSDB_UPD;
1168  }
1169  case ARM::VSTMDIA:
1170  switch (Mode) {
1171  default: llvm_unreachable("Unhandled submode!");
1172  case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1173  case ARM_AM::db: return ARM::VSTMDDB_UPD;
1174  }
1175  }
1176 }
1177 
1178 /// Check if the given instruction increments or decrements a register and
1179 /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
1180 /// generated by the instruction are possibly read as well.
1181 static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
1182  ARMCC::CondCodes Pred, unsigned PredReg) {
1183  bool CheckCPSRDef;
1184  int Scale;
1185  switch (MI.getOpcode()) {
1186  case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
1187  case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
1188  case ARM::t2SUBri:
1189  case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
1190  case ARM::t2ADDri:
1191  case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
1192  case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
1193  case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
1194  default: return 0;
1195  }
1196 
1197  unsigned MIPredReg;
1198  if (MI.getOperand(0).getReg() != Reg ||
1199  MI.getOperand(1).getReg() != Reg ||
1200  getInstrPredicate(MI, MIPredReg) != Pred ||
1201  MIPredReg != PredReg)
1202  return 0;
1203 
1204  if (CheckCPSRDef && definesCPSR(MI))
1205  return 0;
1206  return MI.getOperand(2).getImm() * Scale;
1207 }
1208 
1209 /// Searches for an increment or decrement of \p Reg before \p MBBI.
1212  ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
1213  Offset = 0;
1214  MachineBasicBlock &MBB = *MBBI->getParent();
1215  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1216  MachineBasicBlock::iterator EndMBBI = MBB.end();
1217  if (MBBI == BeginMBBI)
1218  return EndMBBI;
1219 
1220  // Skip debug values.
1221  MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1222  while (PrevMBBI->isDebugInstr() && PrevMBBI != BeginMBBI)
1223  --PrevMBBI;
1224 
1225  Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
1226  return Offset == 0 ? EndMBBI : PrevMBBI;
1227 }
1228 
1229 /// Searches for a increment or decrement of \p Reg after \p MBBI.
1232  ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
1233  Offset = 0;
1234  MachineBasicBlock &MBB = *MBBI->getParent();
1235  MachineBasicBlock::iterator EndMBBI = MBB.end();
1236  MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1237  // Skip debug values.
1238  while (NextMBBI != EndMBBI && NextMBBI->isDebugInstr())
1239  ++NextMBBI;
1240  if (NextMBBI == EndMBBI)
1241  return EndMBBI;
1242 
1243  Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
1244  return Offset == 0 ? EndMBBI : NextMBBI;
1245 }
1246 
1247 /// Fold proceeding/trailing inc/dec of base register into the
1248 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1249 ///
1250 /// stmia rn, <ra, rb, rc>
1251 /// rn := rn + 4 * 3;
1252 /// =>
1253 /// stmia rn!, <ra, rb, rc>
1254 ///
1255 /// rn := rn - 4 * 3;
1256 /// ldmia rn, <ra, rb, rc>
1257 /// =>
1258 /// ldmdb rn!, <ra, rb, rc>
1259 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
1260  // Thumb1 is already using updating loads/stores.
1261  if (isThumb1) return false;
1262 
1263  const MachineOperand &BaseOP = MI->getOperand(0);
1264  Register Base = BaseOP.getReg();
1265  bool BaseKill = BaseOP.isKill();
1266  unsigned PredReg = 0;
1267  ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1268  unsigned Opcode = MI->getOpcode();
1269  DebugLoc DL = MI->getDebugLoc();
1270 
1271  // Can't use an updating ld/st if the base register is also a dest
1272  // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1273  for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1274  if (MI->getOperand(i).getReg() == Base)
1275  return false;
1276 
1277  int Bytes = getLSMultipleTransferSize(MI);
1278  MachineBasicBlock &MBB = *MI->getParent();
1279  MachineBasicBlock::iterator MBBI(MI);
1280  int Offset;
1281  MachineBasicBlock::iterator MergeInstr
1282  = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1284  if (Mode == ARM_AM::ia && Offset == -Bytes) {
1285  Mode = ARM_AM::db;
1286  } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
1287  Mode = ARM_AM::da;
1288  } else {
1289  MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1290  if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
1291  ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) {
1292 
1293  // We couldn't find an inc/dec to merge. But if the base is dead, we
1294  // can still change to a writeback form as that will save us 2 bytes
1295  // of code size. It can create WAW hazards though, so only do it if
1296  // we're minimizing code size.
1297  if (!STI->hasMinSize() || !BaseKill)
1298  return false;
1299 
1300  bool HighRegsUsed = false;
1301  for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1302  if (MI->getOperand(i).getReg() >= ARM::R8) {
1303  HighRegsUsed = true;
1304  break;
1305  }
1306 
1307  if (!HighRegsUsed)
1308  MergeInstr = MBB.end();
1309  else
1310  return false;
1311  }
1312  }
1313  if (MergeInstr != MBB.end())
1314  MBB.erase(MergeInstr);
1315 
1316  unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1317  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1318  .addReg(Base, getDefRegState(true)) // WB base register
1319  .addReg(Base, getKillRegState(BaseKill))
1320  .addImm(Pred).addReg(PredReg);
1321 
1322  // Transfer the rest of operands.
1323  for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1324  MIB.add(MI->getOperand(OpNum));
1325 
1326  // Transfer memoperands.
1327  MIB.setMemRefs(MI->memoperands());
1328 
1329  MBB.erase(MBBI);
1330  return true;
1331 }
1332 
1333 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1335  switch (Opc) {
1336  case ARM::LDRi12:
1337  return ARM::LDR_PRE_IMM;
1338  case ARM::STRi12:
1339  return ARM::STR_PRE_IMM;
1340  case ARM::VLDRS:
1341  return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1342  case ARM::VLDRD:
1343  return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1344  case ARM::VSTRS:
1345  return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1346  case ARM::VSTRD:
1347  return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1348  case ARM::t2LDRi8:
1349  case ARM::t2LDRi12:
1350  return ARM::t2LDR_PRE;
1351  case ARM::t2STRi8:
1352  case ARM::t2STRi12:
1353  return ARM::t2STR_PRE;
1354  default: llvm_unreachable("Unhandled opcode!");
1355  }
1356 }
1357 
1358 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1360  switch (Opc) {
1361  case ARM::LDRi12:
1362  return ARM::LDR_POST_IMM;
1363  case ARM::STRi12:
1364  return ARM::STR_POST_IMM;
1365  case ARM::VLDRS:
1366  return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1367  case ARM::VLDRD:
1368  return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1369  case ARM::VSTRS:
1370  return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1371  case ARM::VSTRD:
1372  return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1373  case ARM::t2LDRi8:
1374  case ARM::t2LDRi12:
1375  return ARM::t2LDR_POST;
1376  case ARM::t2STRi8:
1377  case ARM::t2STRi12:
1378  return ARM::t2STR_POST;
1379  default: llvm_unreachable("Unhandled opcode!");
1380  }
1381 }
1382 
1383 /// Fold proceeding/trailing inc/dec of base register into the
1384 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1385 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
1386  // Thumb1 doesn't have updating LDR/STR.
1387  // FIXME: Use LDM/STM with single register instead.
1388  if (isThumb1) return false;
1389 
1390  Register Base = getLoadStoreBaseOp(*MI).getReg();
1391  bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
1392  unsigned Opcode = MI->getOpcode();
1393  DebugLoc DL = MI->getDebugLoc();
1394  bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1395  Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1396  bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1397  if (isi32Load(Opcode) || isi32Store(Opcode))
1398  if (MI->getOperand(2).getImm() != 0)
1399  return false;
1400  if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1401  return false;
1402 
1403  // Can't do the merge if the destination register is the same as the would-be
1404  // writeback register.
1405  if (MI->getOperand(0).getReg() == Base)
1406  return false;
1407 
1408  unsigned PredReg = 0;
1409  ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1410  int Bytes = getLSMultipleTransferSize(MI);
1411  MachineBasicBlock &MBB = *MI->getParent();
1412  MachineBasicBlock::iterator MBBI(MI);
1413  int Offset;
1414  MachineBasicBlock::iterator MergeInstr
1415  = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1416  unsigned NewOpc;
1417  if (!isAM5 && Offset == Bytes) {
1418  NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1419  } else if (Offset == -Bytes) {
1420  NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1421  } else {
1422  MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1423  if (Offset == Bytes) {
1424  NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1425  } else if (!isAM5 && Offset == -Bytes) {
1426  NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1427  } else
1428  return false;
1429  }
1430  MBB.erase(MergeInstr);
1431 
1432  ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
1433 
1434  bool isLd = isLoadSingle(Opcode);
1435  if (isAM5) {
1436  // VLDM[SD]_UPD, VSTM[SD]_UPD
1437  // (There are no base-updating versions of VLDR/VSTR instructions, but the
1438  // updating load/store-multiple instructions can be used with only one
1439  // register.)
1440  MachineOperand &MO = MI->getOperand(0);
1441  BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1442  .addReg(Base, getDefRegState(true)) // WB base register
1443  .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1444  .addImm(Pred).addReg(PredReg)
1445  .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1446  getKillRegState(MO.isKill())))
1447  .cloneMemRefs(*MI);
1448  } else if (isLd) {
1449  if (isAM2) {
1450  // LDR_PRE, LDR_POST
1451  if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1452  BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1453  .addReg(Base, RegState::Define)
1454  .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg)
1455  .cloneMemRefs(*MI);
1456  } else {
1457  int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1458  BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1459  .addReg(Base, RegState::Define)
1460  .addReg(Base)
1461  .addReg(0)
1462  .addImm(Imm)
1463  .add(predOps(Pred, PredReg))
1464  .cloneMemRefs(*MI);
1465  }
1466  } else {
1467  // t2LDR_PRE, t2LDR_POST
1468  BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1469  .addReg(Base, RegState::Define)
1470  .addReg(Base)
1471  .addImm(Offset)
1472  .add(predOps(Pred, PredReg))
1473  .cloneMemRefs(*MI);
1474  }
1475  } else {
1476  MachineOperand &MO = MI->getOperand(0);
1477  // FIXME: post-indexed stores use am2offset_imm, which still encodes
1478  // the vestigal zero-reg offset register. When that's fixed, this clause
1479  // can be removed entirely.
1480  if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1481  int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1482  // STR_PRE, STR_POST
1483  BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1484  .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1485  .addReg(Base)
1486  .addReg(0)
1487  .addImm(Imm)
1488  .add(predOps(Pred, PredReg))
1489  .cloneMemRefs(*MI);
1490  } else {
1491  // t2STR_PRE, t2STR_POST
1492  BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1493  .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1494  .addReg(Base)
1495  .addImm(Offset)
1496  .add(predOps(Pred, PredReg))
1497  .cloneMemRefs(*MI);
1498  }
1499  }
1500  MBB.erase(MBBI);
1501 
1502  return true;
1503 }
1504 
1505 bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
1506  unsigned Opcode = MI.getOpcode();
1507  assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
1508  "Must have t2STRDi8 or t2LDRDi8");
1509  if (MI.getOperand(3).getImm() != 0)
1510  return false;
1511 
1512  // Behaviour for writeback is undefined if base register is the same as one
1513  // of the others.
1514  const MachineOperand &BaseOp = MI.getOperand(2);
1515  Register Base = BaseOp.getReg();
1516  const MachineOperand &Reg0Op = MI.getOperand(0);
1517  const MachineOperand &Reg1Op = MI.getOperand(1);
1518  if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
1519  return false;
1520 
1521  unsigned PredReg;
1522  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1523  MachineBasicBlock::iterator MBBI(MI);
1524  MachineBasicBlock &MBB = *MI.getParent();
1525  int Offset;
1526  MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
1527  PredReg, Offset);
1528  unsigned NewOpc;
1529  if (Offset == 8 || Offset == -8) {
1530  NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
1531  } else {
1532  MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1533  if (Offset == 8 || Offset == -8) {
1534  NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
1535  } else
1536  return false;
1537  }
1538  MBB.erase(MergeInstr);
1539 
1540  DebugLoc DL = MI.getDebugLoc();
1541  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
1542  if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
1543  MIB.add(Reg0Op).add(Reg1Op).addReg(BaseOp.getReg(), RegState::Define);
1544  } else {
1545  assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
1546  MIB.addReg(BaseOp.getReg(), RegState::Define).add(Reg0Op).add(Reg1Op);
1547  }
1548  MIB.addReg(BaseOp.getReg(), RegState::Kill)
1549  .addImm(Offset).addImm(Pred).addReg(PredReg);
1550  assert(TII->get(Opcode).getNumOperands() == 6 &&
1551  TII->get(NewOpc).getNumOperands() == 7 &&
1552  "Unexpected number of operands in Opcode specification.");
1553 
1554  // Transfer implicit operands.
1555  for (const MachineOperand &MO : MI.implicit_operands())
1556  MIB.add(MO);
1557  MIB.cloneMemRefs(MI);
1558 
1559  MBB.erase(MBBI);
1560  return true;
1561 }
1562 
1563 /// Returns true if instruction is a memory operation that this pass is capable
1564 /// of operating on.
1565 static bool isMemoryOp(const MachineInstr &MI) {
1566  unsigned Opcode = MI.getOpcode();
1567  switch (Opcode) {
1568  case ARM::VLDRS:
1569  case ARM::VSTRS:
1570  case ARM::VLDRD:
1571  case ARM::VSTRD:
1572  case ARM::LDRi12:
1573  case ARM::STRi12:
1574  case ARM::tLDRi:
1575  case ARM::tSTRi:
1576  case ARM::tLDRspi:
1577  case ARM::tSTRspi:
1578  case ARM::t2LDRi8:
1579  case ARM::t2LDRi12:
1580  case ARM::t2STRi8:
1581  case ARM::t2STRi12:
1582  break;
1583  default:
1584  return false;
1585  }
1586  if (!MI.getOperand(1).isReg())
1587  return false;
1588 
1589  // When no memory operands are present, conservatively assume unaligned,
1590  // volatile, unfoldable.
1591  if (!MI.hasOneMemOperand())
1592  return false;
1593 
1594  const MachineMemOperand &MMO = **MI.memoperands_begin();
1595 
1596  // Don't touch volatile memory accesses - we may be changing their order.
1597  // TODO: We could allow unordered and monotonic atomics here, but we need to
1598  // make sure the resulting ldm/stm is correctly marked as atomic.
1599  if (MMO.isVolatile() || MMO.isAtomic())
1600  return false;
1601 
1602  // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1603  // not.
1604  if (MMO.getAlignment() < 4)
1605  return false;
1606 
1607  // str <undef> could probably be eliminated entirely, but for now we just want
1608  // to avoid making a mess of it.
1609  // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1610  if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
1611  return false;
1612 
1613  // Likewise don't mess with references to undefined addresses.
1614  if (MI.getOperand(1).isUndef())
1615  return false;
1616 
1617  return true;
1618 }
1619 
1621  MachineBasicBlock::iterator &MBBI, int Offset,
1622  bool isDef, unsigned NewOpc, unsigned Reg,
1623  bool RegDeadKill, bool RegUndef, unsigned BaseReg,
1624  bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred,
1625  unsigned PredReg, const TargetInstrInfo *TII,
1626  MachineInstr *MI) {
1627  if (isDef) {
1628  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1629  TII->get(NewOpc))
1630  .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1631  .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1632  MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1633  // FIXME: This is overly conservative; the new instruction accesses 4
1634  // bytes, not 8.
1635  MIB.cloneMemRefs(*MI);
1636  } else {
1637  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1638  TII->get(NewOpc))
1639  .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1640  .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1641  MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1642  // FIXME: This is overly conservative; the new instruction accesses 4
1643  // bytes, not 8.
1644  MIB.cloneMemRefs(*MI);
1645  }
1646 }
1647 
1648 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1650  MachineInstr *MI = &*MBBI;
1651  unsigned Opcode = MI->getOpcode();
1652  // FIXME: Code/comments below check Opcode == t2STRDi8, but this check returns
1653  // if we see this opcode.
1654  if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1655  return false;
1656 
1657  const MachineOperand &BaseOp = MI->getOperand(2);
1658  Register BaseReg = BaseOp.getReg();
1659  Register EvenReg = MI->getOperand(0).getReg();
1660  Register OddReg = MI->getOperand(1).getReg();
1661  unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1662  unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1663 
1664  // ARM errata 602117: LDRD with base in list may result in incorrect base
1665  // register when interrupted or faulted.
1666  bool Errata602117 = EvenReg == BaseReg &&
1667  (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1668  // ARM LDRD/STRD needs consecutive registers.
1669  bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1670  (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1671 
1672  if (!Errata602117 && !NonConsecutiveRegs)
1673  return false;
1674 
1675  bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1676  bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1677  bool EvenDeadKill = isLd ?
1678  MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1679  bool EvenUndef = MI->getOperand(0).isUndef();
1680  bool OddDeadKill = isLd ?
1681  MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1682  bool OddUndef = MI->getOperand(1).isUndef();
1683  bool BaseKill = BaseOp.isKill();
1684  bool BaseUndef = BaseOp.isUndef();
1685  assert((isT2 || MI->getOperand(3).getReg() == ARM::NoRegister) &&
1686  "register offset not handled below");
1687  int OffImm = getMemoryOpOffset(*MI);
1688  unsigned PredReg = 0;
1689  ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1690 
1691  if (OddRegNum > EvenRegNum && OffImm == 0) {
1692  // Ascending register numbers and no offset. It's safe to change it to a
1693  // ldm or stm.
1694  unsigned NewOpc = (isLd)
1695  ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1696  : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1697  if (isLd) {
1698  BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1699  .addReg(BaseReg, getKillRegState(BaseKill))
1700  .addImm(Pred).addReg(PredReg)
1701  .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1702  .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill))
1703  .cloneMemRefs(*MI);
1704  ++NumLDRD2LDM;
1705  } else {
1706  BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1707  .addReg(BaseReg, getKillRegState(BaseKill))
1708  .addImm(Pred).addReg(PredReg)
1709  .addReg(EvenReg,
1710  getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1711  .addReg(OddReg,
1712  getKillRegState(OddDeadKill) | getUndefRegState(OddUndef))
1713  .cloneMemRefs(*MI);
1714  ++NumSTRD2STM;
1715  }
1716  } else {
1717  // Split into two instructions.
1718  unsigned NewOpc = (isLd)
1719  ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1720  : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1721  // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1722  // so adjust and use t2LDRi12 here for that.
1723  unsigned NewOpc2 = (isLd)
1724  ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1725  : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1726  // If this is a load, make sure the first load does not clobber the base
1727  // register before the second load reads it.
1728  if (isLd && TRI->regsOverlap(EvenReg, BaseReg)) {
1729  assert(!TRI->regsOverlap(OddReg, BaseReg));
1730  InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
1731  false, BaseReg, false, BaseUndef, Pred, PredReg, TII, MI);
1732  InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
1733  false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
1734  MI);
1735  } else {
1736  if (OddReg == EvenReg && EvenDeadKill) {
1737  // If the two source operands are the same, the kill marker is
1738  // probably on the first one. e.g.
1739  // t2STRDi8 killed %r5, %r5, killed %r9, 0, 14, %reg0
1740  EvenDeadKill = false;
1741  OddDeadKill = true;
1742  }
1743  // Never kill the base register in the first instruction.
1744  if (EvenReg == BaseReg)
1745  EvenDeadKill = false;
1746  InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
1747  EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII,
1748  MI);
1749  InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
1750  OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
1751  MI);
1752  }
1753  if (isLd)
1754  ++NumLDRD2LDR;
1755  else
1756  ++NumSTRD2STR;
1757  }
1758 
1759  MBBI = MBB.erase(MBBI);
1760  return true;
1761 }
1762 
1763 /// An optimization pass to turn multiple LDR / STR ops of the same base and
1764 /// incrementing offset into LDM / STM ops.
1765 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1766  MemOpQueue MemOps;
1767  unsigned CurrBase = 0;
1768  unsigned CurrOpc = ~0u;
1769  ARMCC::CondCodes CurrPred = ARMCC::AL;
1770  unsigned Position = 0;
1771  assert(Candidates.size() == 0);
1772  assert(MergeBaseCandidates.size() == 0);
1773  LiveRegsValid = false;
1774 
1775  for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
1776  I = MBBI) {
1777  // The instruction in front of the iterator is the one we look at.
1778  MBBI = std::prev(I);
1779  if (FixInvalidRegPairOp(MBB, MBBI))
1780  continue;
1781  ++Position;
1782 
1783  if (isMemoryOp(*MBBI)) {
1784  unsigned Opcode = MBBI->getOpcode();
1785  const MachineOperand &MO = MBBI->getOperand(0);
1786  Register Reg = MO.getReg();
1787  Register Base = getLoadStoreBaseOp(*MBBI).getReg();
1788  unsigned PredReg = 0;
1789  ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg);
1790  int Offset = getMemoryOpOffset(*MBBI);
1791  if (CurrBase == 0) {
1792  // Start of a new chain.
1793  CurrBase = Base;
1794  CurrOpc = Opcode;
1795  CurrPred = Pred;
1796  MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
1797  continue;
1798  }
1799  // Note: No need to match PredReg in the next if.
1800  if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1801  // Watch out for:
1802  // r4 := ldr [r0, #8]
1803  // r4 := ldr [r0, #4]
1804  // or
1805  // r0 := ldr [r0]
1806  // If a load overrides the base register or a register loaded by
1807  // another load in our chain, we cannot take this instruction.
1808  bool Overlap = false;
1809  if (isLoadSingle(Opcode)) {
1810  Overlap = (Base == Reg);
1811  if (!Overlap) {
1812  for (const MemOpQueueEntry &E : MemOps) {
1813  if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1814  Overlap = true;
1815  break;
1816  }
1817  }
1818  }
1819  }
1820 
1821  if (!Overlap) {
1822  // Check offset and sort memory operation into the current chain.
1823  if (Offset > MemOps.back().Offset) {
1824  MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
1825  continue;
1826  } else {
1827  MemOpQueue::iterator MI, ME;
1828  for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1829  if (Offset < MI->Offset) {
1830  // Found a place to insert.
1831  break;
1832  }
1833  if (Offset == MI->Offset) {
1834  // Collision, abort.
1835  MI = ME;
1836  break;
1837  }
1838  }
1839  if (MI != MemOps.end()) {
1840  MemOps.insert(MI, MemOpQueueEntry(*MBBI, Offset, Position));
1841  continue;
1842  }
1843  }
1844  }
1845  }
1846 
1847  // Don't advance the iterator; The op will start a new chain next.
1848  MBBI = I;
1849  --Position;
1850  // Fallthrough to look into existing chain.
1851  } else if (MBBI->isDebugInstr()) {
1852  continue;
1853  } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
1854  MBBI->getOpcode() == ARM::t2STRDi8) {
1855  // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
1856  // remember them because we may still be able to merge add/sub into them.
1857  MergeBaseCandidates.push_back(&*MBBI);
1858  }
1859 
1860  // If we are here then the chain is broken; Extract candidates for a merge.
1861  if (MemOps.size() > 0) {
1862  FormCandidates(MemOps);
1863  // Reset for the next chain.
1864  CurrBase = 0;
1865  CurrOpc = ~0u;
1866  CurrPred = ARMCC::AL;
1867  MemOps.clear();
1868  }
1869  }
1870  if (MemOps.size() > 0)
1871  FormCandidates(MemOps);
1872 
1873  // Sort candidates so they get processed from end to begin of the basic
1874  // block later; This is necessary for liveness calculation.
1875  auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1876  return M0->InsertPos < M1->InsertPos;
1877  };
1878  llvm::sort(Candidates, LessThan);
1879 
1880  // Go through list of candidates and merge.
1881  bool Changed = false;
1882  for (const MergeCandidate *Candidate : Candidates) {
1883  if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
1884  MachineInstr *Merged = MergeOpsUpdate(*Candidate);
1885  // Merge preceding/trailing base inc/dec into the merged op.
1886  if (Merged) {
1887  Changed = true;
1888  unsigned Opcode = Merged->getOpcode();
1889  if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
1890  MergeBaseUpdateLSDouble(*Merged);
1891  else
1892  MergeBaseUpdateLSMultiple(Merged);
1893  } else {
1894  for (MachineInstr *MI : Candidate->Instrs) {
1895  if (MergeBaseUpdateLoadStore(MI))
1896  Changed = true;
1897  }
1898  }
1899  } else {
1900  assert(Candidate->Instrs.size() == 1);
1901  if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
1902  Changed = true;
1903  }
1904  }
1905  Candidates.clear();
1906  // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
1907  for (MachineInstr *MI : MergeBaseCandidates)
1908  MergeBaseUpdateLSDouble(*MI);
1909  MergeBaseCandidates.clear();
1910 
1911  return Changed;
1912 }
1913 
1914 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
1915 /// into the preceding stack restore so it directly restore the value of LR
1916 /// into pc.
1917 /// ldmfd sp!, {..., lr}
1918 /// bx lr
1919 /// or
1920 /// ldmfd sp!, {..., lr}
1921 /// mov pc, lr
1922 /// =>
1923 /// ldmfd sp!, {..., pc}
1924 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1925  // Thumb1 LDM doesn't allow high registers.
1926  if (isThumb1) return false;
1927  if (MBB.empty()) return false;
1928 
1930  if (MBBI != MBB.begin() && MBBI != MBB.end() &&
1931  (MBBI->getOpcode() == ARM::BX_RET ||
1932  MBBI->getOpcode() == ARM::tBX_RET ||
1933  MBBI->getOpcode() == ARM::MOVPCLR)) {
1934  MachineBasicBlock::iterator PrevI = std::prev(MBBI);
1935  // Ignore any debug instructions.
1936  while (PrevI->isDebugInstr() && PrevI != MBB.begin())
1937  --PrevI;
1938  MachineInstr &PrevMI = *PrevI;
1939  unsigned Opcode = PrevMI.getOpcode();
1940  if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1941  Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1942  Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1943  MachineOperand &MO = PrevMI.getOperand(PrevMI.getNumOperands() - 1);
1944  if (MO.getReg() != ARM::LR)
1945  return false;
1946  unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1947  assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1948  Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1949  PrevMI.setDesc(TII->get(NewOpc));
1950  MO.setReg(ARM::PC);
1951  PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI);
1952  MBB.erase(MBBI);
1953  // We now restore LR into PC so it is not live-out of the return block
1954  // anymore: Clear the CSI Restored bit.
1955  MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo();
1956  // CSI should be fixed after PrologEpilog Insertion
1957  assert(MFI.isCalleeSavedInfoValid() && "CSI should be valid");
1958  for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
1959  if (Info.getReg() == ARM::LR) {
1960  Info.setRestored(false);
1961  break;
1962  }
1963  }
1964  return true;
1965  }
1966  }
1967  return false;
1968 }
1969 
1970 bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
1972  if (MBBI == MBB.begin() || MBBI == MBB.end() ||
1973  MBBI->getOpcode() != ARM::tBX_RET)
1974  return false;
1975 
1976  MachineBasicBlock::iterator Prev = MBBI;
1977  --Prev;
1978  if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR))
1979  return false;
1980 
1981  for (auto Use : Prev->uses())
1982  if (Use.isKill()) {
1983  assert(STI->hasV4TOps());
1984  BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
1985  .addReg(Use.getReg(), RegState::Kill)
1986  .add(predOps(ARMCC::AL))
1987  .copyImplicitOps(*MBBI);
1988  MBB.erase(MBBI);
1989  MBB.erase(Prev);
1990  return true;
1991  }
1992 
1993  llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
1994 }
1995 
1996 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1997  if (skipFunction(Fn.getFunction()))
1998  return false;
1999 
2000  MF = &Fn;
2001  STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
2002  TL = STI->getTargetLowering();
2003  AFI = Fn.getInfo<ARMFunctionInfo>();
2004  TII = STI->getInstrInfo();
2005  TRI = STI->getRegisterInfo();
2006 
2007  RegClassInfoValid = false;
2008  isThumb2 = AFI->isThumb2Function();
2009  isThumb1 = AFI->isThumbFunction() && !isThumb2;
2010 
2011  bool Modified = false;
2012  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
2013  ++MFI) {
2014  MachineBasicBlock &MBB = *MFI;
2015  Modified |= LoadStoreMultipleOpti(MBB);
2016  if (STI->hasV5TOps())
2017  Modified |= MergeReturnIntoLDM(MBB);
2018  if (isThumb1)
2019  Modified |= CombineMovBx(MBB);
2020  }
2021 
2022  Allocator.DestroyAll();
2023  return Modified;
2024 }
2025 
2026 #define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
2027  "ARM pre- register allocation load / store optimization pass"
2028 
2029 namespace {
2030 
2031  /// Pre- register allocation pass that move load / stores from consecutive
2032  /// locations close to make it more likely they will be combined later.
2033  struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
2034  static char ID;
2035 
2036  AliasAnalysis *AA;
2037  const DataLayout *TD;
2038  const TargetInstrInfo *TII;
2039  const TargetRegisterInfo *TRI;
2040  const ARMSubtarget *STI;
2042  MachineFunction *MF;
2043 
2044  ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
2045 
2046  bool runOnMachineFunction(MachineFunction &Fn) override;
2047 
2048  StringRef getPassName() const override {
2050  }
2051 
2052  void getAnalysisUsage(AnalysisUsage &AU) const override {
2055  }
2056 
2057  private:
2058  bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
2059  unsigned &NewOpc, unsigned &EvenReg,
2060  unsigned &OddReg, unsigned &BaseReg,
2061  int &Offset,
2062  unsigned &PredReg, ARMCC::CondCodes &Pred,
2063  bool &isT2);
2064  bool RescheduleOps(MachineBasicBlock *MBB,
2066  unsigned Base, bool isLd,
2068  bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
2069  };
2070 
2071 } // end anonymous namespace
2072 
2074 
2075 INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
2076  ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
2077 
2078 // Limit the number of instructions to be rescheduled.
2079 // FIXME: tune this limit, and/or come up with some better heuristics.
2080 static cl::opt<unsigned> InstReorderLimit("arm-prera-ldst-opt-reorder-limit",
2081  cl::init(8), cl::Hidden);
2082 
2083 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
2084  if (AssumeMisalignedLoadStores || skipFunction(Fn.getFunction()))
2085  return false;
2086 
2087  TD = &Fn.getDataLayout();
2088  STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
2089  TII = STI->getInstrInfo();
2090  TRI = STI->getRegisterInfo();
2091  MRI = &Fn.getRegInfo();
2092  MF = &Fn;
2093  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2094 
2095  bool Modified = false;
2096  for (MachineBasicBlock &MFI : Fn)
2097  Modified |= RescheduleLoadStoreInstrs(&MFI);
2098 
2099  return Modified;
2100 }
2101 
2102 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
2106  SmallSet<unsigned, 4> &MemRegs,
2107  const TargetRegisterInfo *TRI,
2108  AliasAnalysis *AA) {
2109  // Are there stores / loads / calls between them?
2110  SmallSet<unsigned, 4> AddedRegPressure;
2111  while (++I != E) {
2112  if (I->isDebugInstr() || MemOps.count(&*I))
2113  continue;
2114  if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
2115  return false;
2116  if (I->mayStore() || (!isLd && I->mayLoad()))
2117  for (MachineInstr *MemOp : MemOps)
2118  if (I->mayAlias(AA, *MemOp, /*UseTBAA*/ false))
2119  return false;
2120  for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
2121  MachineOperand &MO = I->getOperand(j);
2122  if (!MO.isReg())
2123  continue;
2124  Register Reg = MO.getReg();
2125  if (MO.isDef() && TRI->regsOverlap(Reg, Base))
2126  return false;
2127  if (Reg != Base && !MemRegs.count(Reg))
2128  AddedRegPressure.insert(Reg);
2129  }
2130  }
2131 
2132  // Estimate register pressure increase due to the transformation.
2133  if (MemRegs.size() <= 4)
2134  // Ok if we are moving small number of instructions.
2135  return true;
2136  return AddedRegPressure.size() <= MemRegs.size() * 2;
2137 }
2138 
2139 bool
2140 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
2141  DebugLoc &dl, unsigned &NewOpc,
2142  unsigned &FirstReg,
2143  unsigned &SecondReg,
2144  unsigned &BaseReg, int &Offset,
2145  unsigned &PredReg,
2146  ARMCC::CondCodes &Pred,
2147  bool &isT2) {
2148  // Make sure we're allowed to generate LDRD/STRD.
2149  if (!STI->hasV5TEOps())
2150  return false;
2151 
2152  // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
2153  unsigned Scale = 1;
2154  unsigned Opcode = Op0->getOpcode();
2155  if (Opcode == ARM::LDRi12) {
2156  NewOpc = ARM::LDRD;
2157  } else if (Opcode == ARM::STRi12) {
2158  NewOpc = ARM::STRD;
2159  } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
2160  NewOpc = ARM::t2LDRDi8;
2161  Scale = 4;
2162  isT2 = true;
2163  } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
2164  NewOpc = ARM::t2STRDi8;
2165  Scale = 4;
2166  isT2 = true;
2167  } else {
2168  return false;
2169  }
2170 
2171  // Make sure the base address satisfies i64 ld / st alignment requirement.
2172  // At the moment, we ignore the memoryoperand's value.
2173  // If we want to use AliasAnalysis, we should check it accordingly.
2174  if (!Op0->hasOneMemOperand() ||
2175  (*Op0->memoperands_begin())->isVolatile() ||
2176  (*Op0->memoperands_begin())->isAtomic())
2177  return false;
2178 
2179  unsigned Align = (*Op0->memoperands_begin())->getAlignment();
2180  const Function &Func = MF->getFunction();
2181  unsigned ReqAlign = STI->hasV6Ops()
2182  ? TD->getABITypeAlignment(Type::getInt64Ty(Func.getContext()))
2183  : 8; // Pre-v6 need 8-byte align
2184  if (Align < ReqAlign)
2185  return false;
2186 
2187  // Then make sure the immediate offset fits.
2188  int OffImm = getMemoryOpOffset(*Op0);
2189  if (isT2) {
2190  int Limit = (1 << 8) * Scale;
2191  if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2192  return false;
2193  Offset = OffImm;
2194  } else {
2195  ARM_AM::AddrOpc AddSub = ARM_AM::add;
2196  if (OffImm < 0) {
2197  AddSub = ARM_AM::sub;
2198  OffImm = - OffImm;
2199  }
2200  int Limit = (1 << 8) * Scale;
2201  if (OffImm >= Limit || (OffImm & (Scale-1)))
2202  return false;
2203  Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2204  }
2205  FirstReg = Op0->getOperand(0).getReg();
2206  SecondReg = Op1->getOperand(0).getReg();
2207  if (FirstReg == SecondReg)
2208  return false;
2209  BaseReg = Op0->getOperand(1).getReg();
2210  Pred = getInstrPredicate(*Op0, PredReg);
2211  dl = Op0->getDebugLoc();
2212  return true;
2213 }
2214 
2215 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2217  unsigned Base, bool isLd,
2218  DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2219  bool RetVal = false;
2220 
2221  // Sort by offset (in reverse order).
2222  llvm::sort(Ops, [](const MachineInstr *LHS, const MachineInstr *RHS) {
2223  int LOffset = getMemoryOpOffset(*LHS);
2224  int ROffset = getMemoryOpOffset(*RHS);
2225  assert(LHS == RHS || LOffset != ROffset);
2226  return LOffset > ROffset;
2227  });
2228 
2229  // The loads / stores of the same base are in order. Scan them from first to
2230  // last and check for the following:
2231  // 1. Any def of base.
2232  // 2. Any gaps.
2233  while (Ops.size() > 1) {
2234  unsigned FirstLoc = ~0U;
2235  unsigned LastLoc = 0;
2236  MachineInstr *FirstOp = nullptr;
2237  MachineInstr *LastOp = nullptr;
2238  int LastOffset = 0;
2239  unsigned LastOpcode = 0;
2240  unsigned LastBytes = 0;
2241  unsigned NumMove = 0;
2242  for (int i = Ops.size() - 1; i >= 0; --i) {
2243  // Make sure each operation has the same kind.
2244  MachineInstr *Op = Ops[i];
2245  unsigned LSMOpcode
2247  if (LastOpcode && LSMOpcode != LastOpcode)
2248  break;
2249 
2250  // Check that we have a continuous set of offsets.
2251  int Offset = getMemoryOpOffset(*Op);
2252  unsigned Bytes = getLSMultipleTransferSize(Op);
2253  if (LastBytes) {
2254  if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2255  break;
2256  }
2257 
2258  // Don't try to reschedule too many instructions.
2259  if (NumMove == InstReorderLimit)
2260  break;
2261 
2262  // Found a mergable instruction; save information about it.
2263  ++NumMove;
2264  LastOffset = Offset;
2265  LastBytes = Bytes;
2266  LastOpcode = LSMOpcode;
2267 
2268  unsigned Loc = MI2LocMap[Op];
2269  if (Loc <= FirstLoc) {
2270  FirstLoc = Loc;
2271  FirstOp = Op;
2272  }
2273  if (Loc >= LastLoc) {
2274  LastLoc = Loc;
2275  LastOp = Op;
2276  }
2277  }
2278 
2279  if (NumMove <= 1)
2280  Ops.pop_back();
2281  else {
2283  SmallSet<unsigned, 4> MemRegs;
2284  for (size_t i = Ops.size() - NumMove, e = Ops.size(); i != e; ++i) {
2285  MemOps.insert(Ops[i]);
2286  MemRegs.insert(Ops[i]->getOperand(0).getReg());
2287  }
2288 
2289  // Be conservative, if the instructions are too far apart, don't
2290  // move them. We want to limit the increase of register pressure.
2291  bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2292  if (DoMove)
2293  DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2294  MemOps, MemRegs, TRI, AA);
2295  if (!DoMove) {
2296  for (unsigned i = 0; i != NumMove; ++i)
2297  Ops.pop_back();
2298  } else {
2299  // This is the new location for the loads / stores.
2300  MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2301  while (InsertPos != MBB->end() &&
2302  (MemOps.count(&*InsertPos) || InsertPos->isDebugInstr()))
2303  ++InsertPos;
2304 
2305  // If we are moving a pair of loads / stores, see if it makes sense
2306  // to try to allocate a pair of registers that can form register pairs.
2307  MachineInstr *Op0 = Ops.back();
2308  MachineInstr *Op1 = Ops[Ops.size()-2];
2309  unsigned FirstReg = 0, SecondReg = 0;
2310  unsigned BaseReg = 0, PredReg = 0;
2311  ARMCC::CondCodes Pred = ARMCC::AL;
2312  bool isT2 = false;
2313  unsigned NewOpc = 0;
2314  int Offset = 0;
2315  DebugLoc dl;
2316  if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2317  FirstReg, SecondReg, BaseReg,
2318  Offset, PredReg, Pred, isT2)) {
2319  Ops.pop_back();
2320  Ops.pop_back();
2321 
2322  const MCInstrDesc &MCID = TII->get(NewOpc);
2323  const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2324  MRI->constrainRegClass(FirstReg, TRC);
2325  MRI->constrainRegClass(SecondReg, TRC);
2326 
2327  // Form the pair instruction.
2328  if (isLd) {
2329  MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2330  .addReg(FirstReg, RegState::Define)
2331  .addReg(SecondReg, RegState::Define)
2332  .addReg(BaseReg);
2333  // FIXME: We're converting from LDRi12 to an insn that still
2334  // uses addrmode2, so we need an explicit offset reg. It should
2335  // always by reg0 since we're transforming LDRi12s.
2336  if (!isT2)
2337  MIB.addReg(0);
2338  MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2339  MIB.cloneMergedMemRefs({Op0, Op1});
2340  LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
2341  ++NumLDRDFormed;
2342  } else {
2343  MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2344  .addReg(FirstReg)
2345  .addReg(SecondReg)
2346  .addReg(BaseReg);
2347  // FIXME: We're converting from LDRi12 to an insn that still
2348  // uses addrmode2, so we need an explicit offset reg. It should
2349  // always by reg0 since we're transforming STRi12s.
2350  if (!isT2)
2351  MIB.addReg(0);
2352  MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2353  MIB.cloneMergedMemRefs({Op0, Op1});
2354  LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
2355  ++NumSTRDFormed;
2356  }
2357  MBB->erase(Op0);
2358  MBB->erase(Op1);
2359 
2360  if (!isT2) {
2361  // Add register allocation hints to form register pairs.
2362  MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
2363  MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
2364  }
2365  } else {
2366  for (unsigned i = 0; i != NumMove; ++i) {
2367  MachineInstr *Op = Ops.back();
2368  Ops.pop_back();
2369  MBB->splice(InsertPos, MBB, Op);
2370  }
2371  }
2372 
2373  NumLdStMoved += NumMove;
2374  RetVal = true;
2375  }
2376  }
2377  }
2378 
2379  return RetVal;
2380 }
2381 
2382 bool
2383 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2384  bool RetVal = false;
2385 
2387  using MapIt = DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator;
2389  using BaseVec = SmallVector<unsigned, 4>;
2390  Base2InstMap Base2LdsMap;
2391  Base2InstMap Base2StsMap;
2392  BaseVec LdBases;
2393  BaseVec StBases;
2394 
2395  unsigned Loc = 0;
2396  MachineBasicBlock::iterator MBBI = MBB->begin();
2398  while (MBBI != E) {
2399  for (; MBBI != E; ++MBBI) {
2400  MachineInstr &MI = *MBBI;
2401  if (MI.isCall() || MI.isTerminator()) {
2402  // Stop at barriers.
2403  ++MBBI;
2404  break;
2405  }
2406 
2407  if (!MI.isDebugInstr())
2408  MI2LocMap[&MI] = ++Loc;
2409 
2410  if (!isMemoryOp(MI))
2411  continue;
2412  unsigned PredReg = 0;
2413  if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2414  continue;
2415 
2416  int Opc = MI.getOpcode();
2417  bool isLd = isLoadSingle(Opc);
2418  Register Base = MI.getOperand(1).getReg();
2419  int Offset = getMemoryOpOffset(MI);
2420  bool StopHere = false;
2421  auto FindBases = [&] (Base2InstMap &Base2Ops, BaseVec &Bases) {
2422  MapIt BI = Base2Ops.find(Base);
2423  if (BI == Base2Ops.end()) {
2424  Base2Ops[Base].push_back(&MI);
2425  Bases.push_back(Base);
2426  return;
2427  }
2428  for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2429  if (Offset == getMemoryOpOffset(*BI->second[i])) {
2430  StopHere = true;
2431  break;
2432  }
2433  }
2434  if (!StopHere)
2435  BI->second.push_back(&MI);
2436  };
2437 
2438  if (isLd)
2439  FindBases(Base2LdsMap, LdBases);
2440  else
2441  FindBases(Base2StsMap, StBases);
2442 
2443  if (StopHere) {
2444  // Found a duplicate (a base+offset combination that's seen earlier).
2445  // Backtrack.
2446  --Loc;
2447  break;
2448  }
2449  }
2450 
2451  // Re-schedule loads.
2452  for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2453  unsigned Base = LdBases[i];
2454  SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2455  if (Lds.size() > 1)
2456  RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2457  }
2458 
2459  // Re-schedule stores.
2460  for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2461  unsigned Base = StBases[i];
2462  SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2463  if (Sts.size() > 1)
2464  RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2465  }
2466 
2467  if (MBBI != E) {
2468  Base2LdsMap.clear();
2469  Base2StsMap.clear();
2470  LdBases.clear();
2471  StBases.clear();
2472  }
2473  }
2474 
2475  return RetVal;
2476 }
2477 
2478 /// Returns an instance of the load / store optimization pass.
2480  if (PreAlloc)
2481  return new ARMPreAllocLoadStoreOpt();
2482  return new ARMLoadStoreOpt();
2483 }
uint64_t CallInst * C
static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand *> MMOs) const
const MachineInstrBuilder & add(const MachineOperand &MO) const
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:112
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
static bool isT1i32Load(unsigned Opc)
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:656
SI Whole Quad Mode
This class represents lattice values for constants.
Definition: AllocatorList.h:23
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:509
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, unsigned Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before...
static unsigned getImmScale(unsigned Opc)
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:384
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:179
const ARMTargetLowering * getTargetLowering() const override
Definition: ARMSubtarget.h:529
unsigned Reg
unsigned second
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, unsigned IdxMode=0)
getAM3Opc - This function encodes the addrmode3 opc field.
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:181
static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg, ARMCC::CondCodes Pred, unsigned PredReg)
Check if the given instruction increments or decrements a register and return the amount it is increm...
This file defines the MallocAllocator and BumpPtrAllocator interfaces.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
bool isAtomic() const
Returns true if this operation has an atomic ordering requirement of unordered or higher...
AnalysisUsage & addRequired()
static uint32_t getAlignment(const MCSectionCOFF &Sec)
A description of a memory reference used in the backend.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:226
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:413
static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, MachineBasicBlock::iterator I, MachineBasicBlock::iterator E, SmallPtrSetImpl< MachineInstr *> &MemOps, SmallSet< unsigned, 4 > &MemRegs, const TargetRegisterInfo *TRI, AliasAnalysis *AA)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:672
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:410
Position
Position to insert a new instruction relative to an existing instruction.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool regsOverlap(Register regA, Register regB) const
Returns true if the two registers are equal or alias each other.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:407
void copyImplicitOps(MachineFunction &MF, const MachineInstr &MI)
Copy implicit register operands from specified instruction to this instruction.
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode)
unsigned char getAM3Offset(unsigned AM3Opc)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
iterator getLastNonDebugInstr()
Returns an iterator to the last non-debug instruction in the basic block, or end().
void setReg(Register Reg)
Change the register this operand corresponds to.
unsigned getUndefRegState(bool B)
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
unsigned getKillRegState(bool B)
static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
TargetInstrInfo - Interface to description of machine instruction set.
#define ARM_PREALLOC_LOAD_STORE_OPT_NAME
unsigned getDeadRegState(bool B)
unsigned getDefRegState(bool B)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
MachineInstrBundleIterator< MachineInstr > iterator
unsigned char getAM5Offset(unsigned AM5Opc)
AddrOpc getAM3Op(unsigned AM3Opc)
unsigned const MachineRegisterInfo * MRI
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:533
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr kills the specified register.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool isT2i32Load(unsigned Opc)
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:187
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
size_type size() const
Definition: SmallSet.h:159
Register is known to be fully dead.
Represent the analysis usage information of a pass.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:566
constexpr double e
Definition: MathExtras.h:57
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, unsigned &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition, otherwise returns AL.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
size_t size() const
Definition: SmallVector.h:52
static bool isAtomic(Instruction *I)
static bool isMemoryOp(const MachineInstr &MI)
Returns true if instruction is a memory operation that this pass is capable of operating on...
bool isDebugInstr() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint64_t getAlignment() const
Return the minimum known alignment in bytes of the actual memory reference.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
static cl::opt< bool > AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden, cl::init(false), cl::desc("Be more conservative in ARM load/store opt"))
This switch disables formation of double/multi instructions that could potentially lead to (new) alig...
FunctionPass * createARMLoadStoreOptimizationPass(bool PreAlloc=false)
Returns an instance of the load / store optimization pass.
unsigned first
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1095
static bool mayCombineMisaligned(const TargetSubtargetInfo &STI, const MachineInstr &MI)
Return true for loads/stores that can be combined to a double/multi operation without increasing the ...
static MachineBasicBlock::iterator findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg, ARMCC::CondCodes Pred, unsigned PredReg, int &Offset)
Searches for an increment or decrement of Reg before MBBI.
static bool isi32Store(unsigned Opc)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Basic Register Allocator
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:390
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:551
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
static uint64_t add(uint64_t LeftOp, uint64_t RightOp)
Definition: FileCheck.cpp:215
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly. ...
INITIALIZE_PASS(ARMLoadStoreOpt, "arm-ldst-opt", ARM_LOAD_STORE_OPT_NAME, false, false) static bool definesCPSR(const MachineInstr &MI)
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
Definition: Allocator.h:446
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
#define ARM_LOAD_STORE_OPT_NAME
unsigned getTransientStackAlignment() const
getTransientStackAlignment - This method returns the number of bytes to which the stack pointer must ...
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
iterator_range< mop_iterator > implicit_operands()
Definition: MachineInstr.h:490
static const MachineOperand & getLoadStoreBaseOp(const MachineInstr &MI)
unsigned succ_size() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:255
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
MachineFunctionProperties & set(Property P)
TargetSubtargetInfo - Generic base class for all target subtargets.
static bool isARMLowRegister(unsigned Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:160
Representation of each machine instruction.
Definition: MachineInstr.h:63
static bool isT2i32Store(unsigned Opc)
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
static bool isValidLSDoubleOffset(int Offset)
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:48
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
#define I(x, y, z)
Definition: MD5.cpp:58
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1228
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
virtual const TargetFrameLowering * getFrameLowering() const
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr *> OtherMIs) const
uint32_t Size
Definition: Profile.cpp:46
static bool isLoadSingle(unsigned Opc)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:91
bool isReg() const
isReg - Tests if this is a MO_Register operand.
static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, ARM_AM::AMSubMode Mode)
static bool isi32Load(unsigned Opc)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getLSMultipleTransferSize(const MachineInstr *MI)
aarch64 promote const
LLVM Value Representation.
Definition: Value.h:74
static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode)
static const MachineOperand & getLoadStoreRegOp(const MachineInstr &MI)
static MachineBasicBlock::iterator findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg, ARMCC::CondCodes Pred, unsigned PredReg, int &Offset)
Searches for a increment or decrement of Reg after MBBI.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
static void InsertLDR_STR(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int Offset, bool isDef, unsigned NewOpc, unsigned Reg, bool RegDeadKill, bool RegUndef, unsigned BaseReg, bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred, unsigned PredReg, const TargetInstrInfo *TII, MachineInstr *MI)
AddrOpc getAM5Op(unsigned AM5Opc)
Register getReg() const
getReg - Returns the register number.
static bool isVolatile(Instruction *Inst)
static bool isT1i32Store(unsigned Opc)
static int getMemoryOpOffset(const MachineInstr &MI)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:415
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
static bool ContainsReg(const ArrayRef< std::pair< unsigned, bool >> &Regs, unsigned Reg)
Properties which a MachineFunction may have at a given point in time.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This file describes how to lower LLVM code to machine code.
bool isImplicit() const
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1224