LLVM  8.0.0svn
ARMBaseInstrInfo.cpp
Go to the documentation of this file.
1 //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the Base ARM implementation of the TargetInstrInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMBaseRegisterInfo.h"
16 #include "ARMConstantPoolValue.h"
17 #include "ARMFeatures.h"
18 #include "ARMHazardRecognizer.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/Triple.h"
43 #include "llvm/IR/Attributes.h"
44 #include "llvm/IR/Constants.h"
45 #include "llvm/IR/DebugLoc.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/GlobalValue.h"
48 #include "llvm/MC/MCAsmInfo.h"
49 #include "llvm/MC/MCInstrDesc.h"
52 #include "llvm/Support/Casting.h"
54 #include "llvm/Support/Compiler.h"
55 #include "llvm/Support/Debug.h"
59 #include <algorithm>
60 #include <cassert>
61 #include <cstdint>
62 #include <iterator>
63 #include <new>
64 #include <utility>
65 #include <vector>
66 
67 using namespace llvm;
68 
69 #define DEBUG_TYPE "arm-instrinfo"
70 
71 #define GET_INSTRINFO_CTOR_DTOR
72 #include "ARMGenInstrInfo.inc"
73 
74 static cl::opt<bool>
75 EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
76  cl::desc("Enable ARM 2-addr to 3-addr conv"));
77 
78 /// ARM_MLxEntry - Record information about MLA / MLS instructions.
79 struct ARM_MLxEntry {
80  uint16_t MLxOpc; // MLA / MLS opcode
81  uint16_t MulOpc; // Expanded multiplication opcode
82  uint16_t AddSubOpc; // Expanded add / sub opcode
83  bool NegAcc; // True if the acc is negated before the add / sub.
84  bool HasLane; // True if instruction has an extra "lane" operand.
85 };
86 
87 static const ARM_MLxEntry ARM_MLxTable[] = {
88  // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
89  // fp scalar ops
90  { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
91  { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
92  { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
93  { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
94  { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
95  { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
96  { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
97  { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
98 
99  // fp SIMD ops
100  { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
101  { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
102  { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
103  { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
104  { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
105  { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
106  { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
107  { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
108 };
109 
111  : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
112  Subtarget(STI) {
113  for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
114  if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
115  llvm_unreachable("Duplicated entries?");
116  MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
117  MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
118  }
119 }
120 
121 // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
122 // currently defaults to no prepass hazard recognizer.
125  const ScheduleDAG *DAG) const {
126  if (usePreRAHazardRecognizer()) {
127  const InstrItineraryData *II =
128  static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
129  return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
130  }
132 }
133 
136  const ScheduleDAG *DAG) const {
137  if (Subtarget.isThumb2() || Subtarget.hasVFP2())
138  return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG);
140 }
141 
144  // FIXME: Thumb2 support.
145 
146  if (!EnableARM3Addr)
147  return nullptr;
148 
149  MachineFunction &MF = *MI.getParent()->getParent();
150  uint64_t TSFlags = MI.getDesc().TSFlags;
151  bool isPre = false;
152  switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
153  default: return nullptr;
154  case ARMII::IndexModePre:
155  isPre = true;
156  break;
158  break;
159  }
160 
161  // Try splitting an indexed load/store to an un-indexed one plus an add/sub
162  // operation.
163  unsigned MemOpc = getUnindexedOpcode(MI.getOpcode());
164  if (MemOpc == 0)
165  return nullptr;
166 
167  MachineInstr *UpdateMI = nullptr;
168  MachineInstr *MemMI = nullptr;
169  unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
170  const MCInstrDesc &MCID = MI.getDesc();
171  unsigned NumOps = MCID.getNumOperands();
172  bool isLoad = !MI.mayStore();
173  const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0);
174  const MachineOperand &Base = MI.getOperand(2);
175  const MachineOperand &Offset = MI.getOperand(NumOps - 3);
176  unsigned WBReg = WB.getReg();
177  unsigned BaseReg = Base.getReg();
178  unsigned OffReg = Offset.getReg();
179  unsigned OffImm = MI.getOperand(NumOps - 2).getImm();
180  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm();
181  switch (AddrMode) {
182  default: llvm_unreachable("Unknown indexed op!");
183  case ARMII::AddrMode2: {
184  bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
185  unsigned Amt = ARM_AM::getAM2Offset(OffImm);
186  if (OffReg == 0) {
187  if (ARM_AM::getSOImmVal(Amt) == -1)
188  // Can't encode it in a so_imm operand. This transformation will
189  // add more than 1 instruction. Abandon!
190  return nullptr;
191  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
192  get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
193  .addReg(BaseReg)
194  .addImm(Amt)
195  .add(predOps(Pred))
196  .add(condCodeOp());
197  } else if (Amt != 0) {
199  unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
200  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
201  get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
202  .addReg(BaseReg)
203  .addReg(OffReg)
204  .addReg(0)
205  .addImm(SOOpc)
206  .add(predOps(Pred))
207  .add(condCodeOp());
208  } else
209  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
210  get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
211  .addReg(BaseReg)
212  .addReg(OffReg)
213  .add(predOps(Pred))
214  .add(condCodeOp());
215  break;
216  }
217  case ARMII::AddrMode3 : {
218  bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
219  unsigned Amt = ARM_AM::getAM3Offset(OffImm);
220  if (OffReg == 0)
221  // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
222  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
223  get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
224  .addReg(BaseReg)
225  .addImm(Amt)
226  .add(predOps(Pred))
227  .add(condCodeOp());
228  else
229  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
230  get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
231  .addReg(BaseReg)
232  .addReg(OffReg)
233  .add(predOps(Pred))
234  .add(condCodeOp());
235  break;
236  }
237  }
238 
239  std::vector<MachineInstr*> NewMIs;
240  if (isPre) {
241  if (isLoad)
242  MemMI =
243  BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
244  .addReg(WBReg)
245  .addImm(0)
246  .addImm(Pred);
247  else
248  MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
249  .addReg(MI.getOperand(1).getReg())
250  .addReg(WBReg)
251  .addReg(0)
252  .addImm(0)
253  .addImm(Pred);
254  NewMIs.push_back(MemMI);
255  NewMIs.push_back(UpdateMI);
256  } else {
257  if (isLoad)
258  MemMI =
259  BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
260  .addReg(BaseReg)
261  .addImm(0)
262  .addImm(Pred);
263  else
264  MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
265  .addReg(MI.getOperand(1).getReg())
266  .addReg(BaseReg)
267  .addReg(0)
268  .addImm(0)
269  .addImm(Pred);
270  if (WB.isDead())
271  UpdateMI->getOperand(0).setIsDead();
272  NewMIs.push_back(UpdateMI);
273  NewMIs.push_back(MemMI);
274  }
275 
276  // Transfer LiveVariables states, kill / dead info.
277  if (LV) {
278  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
279  MachineOperand &MO = MI.getOperand(i);
281  unsigned Reg = MO.getReg();
282 
284  if (MO.isDef()) {
285  MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
286  if (MO.isDead())
287  LV->addVirtualRegisterDead(Reg, *NewMI);
288  }
289  if (MO.isUse() && MO.isKill()) {
290  for (unsigned j = 0; j < 2; ++j) {
291  // Look at the two new MI's in reverse order.
292  MachineInstr *NewMI = NewMIs[j];
293  if (!NewMI->readsRegister(Reg))
294  continue;
295  LV->addVirtualRegisterKilled(Reg, *NewMI);
296  if (VI.removeKill(MI))
297  VI.Kills.push_back(NewMI);
298  break;
299  }
300  }
301  }
302  }
303  }
304 
306  MFI->insert(MBBI, NewMIs[1]);
307  MFI->insert(MBBI, NewMIs[0]);
308  return NewMIs[0];
309 }
310 
311 // Branch analysis.
313  MachineBasicBlock *&TBB,
314  MachineBasicBlock *&FBB,
316  bool AllowModify) const {
317  TBB = nullptr;
318  FBB = nullptr;
319 
321  if (I == MBB.begin())
322  return false; // Empty blocks are easy.
323  --I;
324 
325  // Walk backwards from the end of the basic block until the branch is
326  // analyzed or we give up.
327  while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
328  // Flag to be raised on unanalyzeable instructions. This is useful in cases
329  // where we want to clean up on the end of the basic block before we bail
330  // out.
331  bool CantAnalyze = false;
332 
333  // Skip over DEBUG values and predicated nonterminators.
334  while (I->isDebugInstr() || !I->isTerminator()) {
335  if (I == MBB.begin())
336  return false;
337  --I;
338  }
339 
340  if (isIndirectBranchOpcode(I->getOpcode()) ||
341  isJumpTableBranchOpcode(I->getOpcode())) {
342  // Indirect branches and jump tables can't be analyzed, but we still want
343  // to clean up any instructions at the tail of the basic block.
344  CantAnalyze = true;
345  } else if (isUncondBranchOpcode(I->getOpcode())) {
346  TBB = I->getOperand(0).getMBB();
347  } else if (isCondBranchOpcode(I->getOpcode())) {
348  // Bail out if we encounter multiple conditional branches.
349  if (!Cond.empty())
350  return true;
351 
352  assert(!FBB && "FBB should have been null.");
353  FBB = TBB;
354  TBB = I->getOperand(0).getMBB();
355  Cond.push_back(I->getOperand(1));
356  Cond.push_back(I->getOperand(2));
357  } else if (I->isReturn()) {
358  // Returns can't be analyzed, but we should run cleanup.
359  CantAnalyze = !isPredicated(*I);
360  } else {
361  // We encountered other unrecognized terminator. Bail out immediately.
362  return true;
363  }
364 
365  // Cleanup code - to be run for unpredicated unconditional branches and
366  // returns.
367  if (!isPredicated(*I) &&
368  (isUncondBranchOpcode(I->getOpcode()) ||
369  isIndirectBranchOpcode(I->getOpcode()) ||
370  isJumpTableBranchOpcode(I->getOpcode()) ||
371  I->isReturn())) {
372  // Forget any previous condition branch information - it no longer applies.
373  Cond.clear();
374  FBB = nullptr;
375 
376  // If we can modify the function, delete everything below this
377  // unconditional branch.
378  if (AllowModify) {
379  MachineBasicBlock::iterator DI = std::next(I);
380  while (DI != MBB.end()) {
381  MachineInstr &InstToDelete = *DI;
382  ++DI;
383  InstToDelete.eraseFromParent();
384  }
385  }
386  }
387 
388  if (CantAnalyze)
389  return true;
390 
391  if (I == MBB.begin())
392  return false;
393 
394  --I;
395  }
396 
397  // We made it past the terminators without bailing out - we must have
398  // analyzed this branch successfully.
399  return false;
400 }
401 
403  int *BytesRemoved) const {
404  assert(!BytesRemoved && "code size not handled");
405 
407  if (I == MBB.end())
408  return 0;
409 
410  if (!isUncondBranchOpcode(I->getOpcode()) &&
411  !isCondBranchOpcode(I->getOpcode()))
412  return 0;
413 
414  // Remove the branch.
415  I->eraseFromParent();
416 
417  I = MBB.end();
418 
419  if (I == MBB.begin()) return 1;
420  --I;
421  if (!isCondBranchOpcode(I->getOpcode()))
422  return 1;
423 
424  // Remove the branch.
425  I->eraseFromParent();
426  return 2;
427 }
428 
430  MachineBasicBlock *TBB,
431  MachineBasicBlock *FBB,
433  const DebugLoc &DL,
434  int *BytesAdded) const {
435  assert(!BytesAdded && "code size not handled");
437  int BOpc = !AFI->isThumbFunction()
438  ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
439  int BccOpc = !AFI->isThumbFunction()
440  ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
441  bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
442 
443  // Shouldn't be a fall through.
444  assert(TBB && "insertBranch must not be told to insert a fallthrough");
445  assert((Cond.size() == 2 || Cond.size() == 0) &&
446  "ARM branch conditions have two components!");
447 
448  // For conditional branches, we use addOperand to preserve CPSR flags.
449 
450  if (!FBB) {
451  if (Cond.empty()) { // Unconditional branch?
452  if (isThumb)
453  BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL));
454  else
455  BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
456  } else
457  BuildMI(&MBB, DL, get(BccOpc))
458  .addMBB(TBB)
459  .addImm(Cond[0].getImm())
460  .add(Cond[1]);
461  return 1;
462  }
463 
464  // Two-way conditional branch.
465  BuildMI(&MBB, DL, get(BccOpc))
466  .addMBB(TBB)
467  .addImm(Cond[0].getImm())
468  .add(Cond[1]);
469  if (isThumb)
470  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
471  else
472  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
473  return 2;
474 }
475 
478  ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
479  Cond[0].setImm(ARMCC::getOppositeCondition(CC));
480  return false;
481 }
482 
484  if (MI.isBundle()) {
487  while (++I != E && I->isInsideBundle()) {
488  int PIdx = I->findFirstPredOperandIdx();
489  if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
490  return true;
491  }
492  return false;
493  }
494 
495  int PIdx = MI.findFirstPredOperandIdx();
496  return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
497 }
498 
501  unsigned Opc = MI.getOpcode();
502  if (isUncondBranchOpcode(Opc)) {
503  MI.setDesc(get(getMatchingCondBranchOpcode(Opc)));
505  .addImm(Pred[0].getImm())
506  .addReg(Pred[1].getReg());
507  return true;
508  }
509 
510  int PIdx = MI.findFirstPredOperandIdx();
511  if (PIdx != -1) {
512  MachineOperand &PMO = MI.getOperand(PIdx);
513  PMO.setImm(Pred[0].getImm());
514  MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
515  return true;
516  }
517  return false;
518 }
519 
521  ArrayRef<MachineOperand> Pred2) const {
522  if (Pred1.size() > 2 || Pred2.size() > 2)
523  return false;
524 
525  ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
526  ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
527  if (CC1 == CC2)
528  return true;
529 
530  switch (CC1) {
531  default:
532  return false;
533  case ARMCC::AL:
534  return true;
535  case ARMCC::HS:
536  return CC2 == ARMCC::HI;
537  case ARMCC::LS:
538  return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
539  case ARMCC::GE:
540  return CC2 == ARMCC::GT;
541  case ARMCC::LE:
542  return CC2 == ARMCC::LT;
543  }
544 }
545 
547  MachineInstr &MI, std::vector<MachineOperand> &Pred) const {
548  bool Found = false;
549  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
550  const MachineOperand &MO = MI.getOperand(i);
551  if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) ||
552  (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) {
553  Pred.push_back(MO);
554  Found = true;
555  }
556  }
557 
558  return Found;
559 }
560 
562  for (const auto &MO : MI.operands())
563  if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
564  return true;
565  return false;
566 }
567 
569  unsigned Op) const {
570  const MachineOperand &Offset = MI.getOperand(Op + 1);
571  return Offset.getReg() != 0;
572 }
573 
574 // Load with negative register offset requires additional 1cyc and +I unit
575 // for Cortex A57
577  unsigned Op) const {
578  const MachineOperand &Offset = MI.getOperand(Op + 1);
579  const MachineOperand &Opc = MI.getOperand(Op + 2);
580  assert(Opc.isImm());
581  assert(Offset.isReg());
582  int64_t OpcImm = Opc.getImm();
583 
584  bool isSub = ARM_AM::getAM3Op(OpcImm) == ARM_AM::sub;
585  return (isSub && Offset.getReg() != 0);
586 }
587 
589  unsigned Op) const {
590  const MachineOperand &Opc = MI.getOperand(Op + 2);
591  unsigned OffImm = Opc.getImm();
592  return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
593 }
594 
595 // Load, scaled register offset, not plus LSL2
597  unsigned Op) const {
598  const MachineOperand &Opc = MI.getOperand(Op + 2);
599  unsigned OffImm = Opc.getImm();
600 
601  bool isAdd = ARM_AM::getAM2Op(OffImm) == ARM_AM::add;
602  unsigned Amt = ARM_AM::getAM2Offset(OffImm);
604  if (ShiftOpc == ARM_AM::no_shift) return false; // not scaled
605  bool SimpleScaled = (isAdd && ShiftOpc == ARM_AM::lsl && Amt == 2);
606  return !SimpleScaled;
607 }
608 
609 // Minus reg for ldstso addr mode
611  unsigned Op) const {
612  unsigned OffImm = MI.getOperand(Op + 2).getImm();
613  return ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
614 }
615 
616 // Load, scaled register offset
618  unsigned Op) const {
619  unsigned OffImm = MI.getOperand(Op + 2).getImm();
620  return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
621 }
622 
623 static bool isEligibleForITBlock(const MachineInstr *MI) {
624  switch (MI->getOpcode()) {
625  default: return true;
626  case ARM::tADC: // ADC (register) T1
627  case ARM::tADDi3: // ADD (immediate) T1
628  case ARM::tADDi8: // ADD (immediate) T2
629  case ARM::tADDrr: // ADD (register) T1
630  case ARM::tAND: // AND (register) T1
631  case ARM::tASRri: // ASR (immediate) T1
632  case ARM::tASRrr: // ASR (register) T1
633  case ARM::tBIC: // BIC (register) T1
634  case ARM::tEOR: // EOR (register) T1
635  case ARM::tLSLri: // LSL (immediate) T1
636  case ARM::tLSLrr: // LSL (register) T1
637  case ARM::tLSRri: // LSR (immediate) T1
638  case ARM::tLSRrr: // LSR (register) T1
639  case ARM::tMUL: // MUL T1
640  case ARM::tMVN: // MVN (register) T1
641  case ARM::tORR: // ORR (register) T1
642  case ARM::tROR: // ROR (register) T1
643  case ARM::tRSB: // RSB (immediate) T1
644  case ARM::tSBC: // SBC (register) T1
645  case ARM::tSUBi3: // SUB (immediate) T1
646  case ARM::tSUBi8: // SUB (immediate) T2
647  case ARM::tSUBrr: // SUB (register) T1
648  return !ARMBaseInstrInfo::isCPSRDefined(*MI);
649  }
650 }
651 
652 /// isPredicable - Return true if the specified instruction can be predicated.
653 /// By default, this returns true for every instruction with a
654 /// PredicateOperand.
656  if (!MI.isPredicable())
657  return false;
658 
659  if (MI.isBundle())
660  return false;
661 
662  if (!isEligibleForITBlock(&MI))
663  return false;
664 
665  const ARMFunctionInfo *AFI =
667 
668  // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
669  // In their ARM encoding, they can't be encoded in a conditional form.
671  return false;
672 
673  if (AFI->isThumb2Function()) {
674  if (getSubtarget().restrictIT())
675  return isV8EligibleForIT(&MI);
676  }
677 
678  return true;
679 }
680 
681 namespace llvm {
682 
683 template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
684  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
685  const MachineOperand &MO = MI->getOperand(i);
686  if (!MO.isReg() || MO.isUndef() || MO.isUse())
687  continue;
688  if (MO.getReg() != ARM::CPSR)
689  continue;
690  if (!MO.isDead())
691  return false;
692  }
693  // all definitions of CPSR are dead
694  return true;
695 }
696 
697 } // end namespace llvm
698 
699 /// GetInstSize - Return the size of the specified MachineInstr.
700 ///
702  const MachineBasicBlock &MBB = *MI.getParent();
703  const MachineFunction *MF = MBB.getParent();
704  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
705 
706  const MCInstrDesc &MCID = MI.getDesc();
707  if (MCID.getSize())
708  return MCID.getSize();
709 
710  // If this machine instr is an inline asm, measure it.
711  if (MI.getOpcode() == ARM::INLINEASM)
712  return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
713  unsigned Opc = MI.getOpcode();
714  switch (Opc) {
715  default:
716  // pseudo-instruction sizes are zero.
717  return 0;
718  case TargetOpcode::BUNDLE:
719  return getInstBundleLength(MI);
720  case ARM::MOVi16_ga_pcrel:
721  case ARM::MOVTi16_ga_pcrel:
722  case ARM::t2MOVi16_ga_pcrel:
723  case ARM::t2MOVTi16_ga_pcrel:
724  return 4;
725  case ARM::MOVi32imm:
726  case ARM::t2MOVi32imm:
727  return 8;
728  case ARM::CONSTPOOL_ENTRY:
729  case ARM::JUMPTABLE_INSTS:
730  case ARM::JUMPTABLE_ADDRS:
731  case ARM::JUMPTABLE_TBB:
732  case ARM::JUMPTABLE_TBH:
733  // If this machine instr is a constant pool entry, its size is recorded as
734  // operand #2.
735  return MI.getOperand(2).getImm();
736  case ARM::Int_eh_sjlj_longjmp:
737  return 16;
738  case ARM::tInt_eh_sjlj_longjmp:
739  return 10;
740  case ARM::tInt_WIN_eh_sjlj_longjmp:
741  return 12;
742  case ARM::Int_eh_sjlj_setjmp:
743  case ARM::Int_eh_sjlj_setjmp_nofp:
744  return 20;
745  case ARM::tInt_eh_sjlj_setjmp:
746  case ARM::t2Int_eh_sjlj_setjmp:
747  case ARM::t2Int_eh_sjlj_setjmp_nofp:
748  return 12;
749  case ARM::SPACE:
750  return MI.getOperand(1).getImm();
751  }
752 }
753 
754 unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
755  unsigned Size = 0;
758  while (++I != E && I->isInsideBundle()) {
759  assert(!I->isBundle() && "No nested bundle!");
760  Size += getInstSizeInBytes(*I);
761  }
762  return Size;
763 }
764 
767  unsigned DestReg, bool KillSrc,
768  const ARMSubtarget &Subtarget) const {
769  unsigned Opc = Subtarget.isThumb()
770  ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
771  : ARM::MRS;
772 
773  MachineInstrBuilder MIB =
774  BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
775 
776  // There is only 1 A/R class MRS instruction, and it always refers to
777  // APSR. However, there are lots of other possibilities on M-class cores.
778  if (Subtarget.isMClass())
779  MIB.addImm(0x800);
780 
781  MIB.add(predOps(ARMCC::AL))
782  .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
783 }
784 
787  unsigned SrcReg, bool KillSrc,
788  const ARMSubtarget &Subtarget) const {
789  unsigned Opc = Subtarget.isThumb()
790  ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
791  : ARM::MSR;
792 
793  MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
794 
795  if (Subtarget.isMClass())
796  MIB.addImm(0x800);
797  else
798  MIB.addImm(8);
799 
800  MIB.addReg(SrcReg, getKillRegState(KillSrc))
803 }
804 
807  const DebugLoc &DL, unsigned DestReg,
808  unsigned SrcReg, bool KillSrc) const {
809  bool GPRDest = ARM::GPRRegClass.contains(DestReg);
810  bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
811 
812  if (GPRDest && GPRSrc) {
813  BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
814  .addReg(SrcReg, getKillRegState(KillSrc))
816  .add(condCodeOp());
817  return;
818  }
819 
820  bool SPRDest = ARM::SPRRegClass.contains(DestReg);
821  bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
822 
823  unsigned Opc = 0;
824  if (SPRDest && SPRSrc)
825  Opc = ARM::VMOVS;
826  else if (GPRDest && SPRSrc)
827  Opc = ARM::VMOVRS;
828  else if (SPRDest && GPRSrc)
829  Opc = ARM::VMOVSR;
830  else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP())
831  Opc = ARM::VMOVD;
832  else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
833  Opc = ARM::VORRq;
834 
835  if (Opc) {
836  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
837  MIB.addReg(SrcReg, getKillRegState(KillSrc));
838  if (Opc == ARM::VORRq)
839  MIB.addReg(SrcReg, getKillRegState(KillSrc));
840  MIB.add(predOps(ARMCC::AL));
841  return;
842  }
843 
844  // Handle register classes that require multiple instructions.
845  unsigned BeginIdx = 0;
846  unsigned SubRegs = 0;
847  int Spacing = 1;
848 
849  // Use VORRq when possible.
850  if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
851  Opc = ARM::VORRq;
852  BeginIdx = ARM::qsub_0;
853  SubRegs = 2;
854  } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
855  Opc = ARM::VORRq;
856  BeginIdx = ARM::qsub_0;
857  SubRegs = 4;
858  // Fall back to VMOVD.
859  } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
860  Opc = ARM::VMOVD;
861  BeginIdx = ARM::dsub_0;
862  SubRegs = 2;
863  } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
864  Opc = ARM::VMOVD;
865  BeginIdx = ARM::dsub_0;
866  SubRegs = 3;
867  } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
868  Opc = ARM::VMOVD;
869  BeginIdx = ARM::dsub_0;
870  SubRegs = 4;
871  } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
872  Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
873  BeginIdx = ARM::gsub_0;
874  SubRegs = 2;
875  } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
876  Opc = ARM::VMOVD;
877  BeginIdx = ARM::dsub_0;
878  SubRegs = 2;
879  Spacing = 2;
880  } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
881  Opc = ARM::VMOVD;
882  BeginIdx = ARM::dsub_0;
883  SubRegs = 3;
884  Spacing = 2;
885  } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
886  Opc = ARM::VMOVD;
887  BeginIdx = ARM::dsub_0;
888  SubRegs = 4;
889  Spacing = 2;
890  } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) {
891  Opc = ARM::VMOVS;
892  BeginIdx = ARM::ssub_0;
893  SubRegs = 2;
894  } else if (SrcReg == ARM::CPSR) {
895  copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
896  return;
897  } else if (DestReg == ARM::CPSR) {
898  copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
899  return;
900  }
901 
902  assert(Opc && "Impossible reg-to-reg copy");
903 
906 
907  // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
908  if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
909  BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
910  Spacing = -Spacing;
911  }
912 #ifndef NDEBUG
913  SmallSet<unsigned, 4> DstRegs;
914 #endif
915  for (unsigned i = 0; i != SubRegs; ++i) {
916  unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
917  unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
918  assert(Dst && Src && "Bad sub-register");
919 #ifndef NDEBUG
920  assert(!DstRegs.count(Src) && "destructive vector copy");
921  DstRegs.insert(Dst);
922 #endif
923  Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
924  // VORR takes two source operands.
925  if (Opc == ARM::VORRq)
926  Mov.addReg(Src);
927  Mov = Mov.add(predOps(ARMCC::AL));
928  // MOVr can set CC.
929  if (Opc == ARM::MOVr)
930  Mov = Mov.add(condCodeOp());
931  }
932  // Add implicit super-register defs and kills to the last instruction.
933  Mov->addRegisterDefined(DestReg, TRI);
934  if (KillSrc)
935  Mov->addRegisterKilled(SrcReg, TRI);
936 }
937 
939  const MachineOperand *&Src,
940  const MachineOperand *&Dest) const {
941  // VMOVRRD is also a copy instruction but it requires
942  // special way of handling. It is more complex copy version
943  // and since that we are not considering it. For recognition
944  // of such instruction isExtractSubregLike MI interface fuction
945  // could be used.
946  // VORRq is considered as a move only if two inputs are
947  // the same register.
948  if (!MI.isMoveReg() ||
949  (MI.getOpcode() == ARM::VORRq &&
950  MI.getOperand(1).getReg() != MI.getOperand(2).getReg()))
951  return false;
952  Dest = &MI.getOperand(0);
953  Src = &MI.getOperand(1);
954  return true;
955 }
956 
957 const MachineInstrBuilder &
959  unsigned SubIdx, unsigned State,
960  const TargetRegisterInfo *TRI) const {
961  if (!SubIdx)
962  return MIB.addReg(Reg, State);
963 
965  return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
966  return MIB.addReg(Reg, State, SubIdx);
967 }
968 
971  unsigned SrcReg, bool isKill, int FI,
972  const TargetRegisterClass *RC,
973  const TargetRegisterInfo *TRI) const {
974  DebugLoc DL;
975  if (I != MBB.end()) DL = I->getDebugLoc();
976  MachineFunction &MF = *MBB.getParent();
977  MachineFrameInfo &MFI = MF.getFrameInfo();
978  unsigned Align = MFI.getObjectAlignment(FI);
979 
982  MFI.getObjectSize(FI), Align);
983 
984  switch (TRI->getSpillSize(*RC)) {
985  case 2:
986  if (ARM::HPRRegClass.hasSubClassEq(RC)) {
987  BuildMI(MBB, I, DL, get(ARM::VSTRH))
988  .addReg(SrcReg, getKillRegState(isKill))
989  .addFrameIndex(FI)
990  .addImm(0)
991  .addMemOperand(MMO)
992  .add(predOps(ARMCC::AL));
993  } else
994  llvm_unreachable("Unknown reg class!");
995  break;
996  case 4:
997  if (ARM::GPRRegClass.hasSubClassEq(RC)) {
998  BuildMI(MBB, I, DL, get(ARM::STRi12))
999  .addReg(SrcReg, getKillRegState(isKill))
1000  .addFrameIndex(FI)
1001  .addImm(0)
1002  .addMemOperand(MMO)
1003  .add(predOps(ARMCC::AL));
1004  } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1005  BuildMI(MBB, I, DL, get(ARM::VSTRS))
1006  .addReg(SrcReg, getKillRegState(isKill))
1007  .addFrameIndex(FI)
1008  .addImm(0)
1009  .addMemOperand(MMO)
1010  .add(predOps(ARMCC::AL));
1011  } else
1012  llvm_unreachable("Unknown reg class!");
1013  break;
1014  case 8:
1015  if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1016  BuildMI(MBB, I, DL, get(ARM::VSTRD))
1017  .addReg(SrcReg, getKillRegState(isKill))
1018  .addFrameIndex(FI)
1019  .addImm(0)
1020  .addMemOperand(MMO)
1021  .add(predOps(ARMCC::AL));
1022  } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1023  if (Subtarget.hasV5TEOps()) {
1024  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD));
1025  AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1026  AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1027  MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1028  .add(predOps(ARMCC::AL));
1029  } else {
1030  // Fallback to STM instruction, which has existed since the dawn of
1031  // time.
1032  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STMIA))
1033  .addFrameIndex(FI)
1034  .addMemOperand(MMO)
1035  .add(predOps(ARMCC::AL));
1036  AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1037  AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1038  }
1039  } else
1040  llvm_unreachable("Unknown reg class!");
1041  break;
1042  case 16:
1043  if (ARM::DPairRegClass.hasSubClassEq(RC)) {
1044  // Use aligned spills if the stack can be realigned.
1045  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1046  BuildMI(MBB, I, DL, get(ARM::VST1q64))
1047  .addFrameIndex(FI)
1048  .addImm(16)
1049  .addReg(SrcReg, getKillRegState(isKill))
1050  .addMemOperand(MMO)
1051  .add(predOps(ARMCC::AL));
1052  } else {
1053  BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
1054  .addReg(SrcReg, getKillRegState(isKill))
1055  .addFrameIndex(FI)
1056  .addMemOperand(MMO)
1057  .add(predOps(ARMCC::AL));
1058  }
1059  } else
1060  llvm_unreachable("Unknown reg class!");
1061  break;
1062  case 24:
1063  if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1064  // Use aligned spills if the stack can be realigned.
1065  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1066  BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
1067  .addFrameIndex(FI)
1068  .addImm(16)
1069  .addReg(SrcReg, getKillRegState(isKill))
1070  .addMemOperand(MMO)
1071  .add(predOps(ARMCC::AL));
1072  } else {
1073  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
1074  .addFrameIndex(FI)
1075  .add(predOps(ARMCC::AL))
1076  .addMemOperand(MMO);
1077  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1078  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1079  AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1080  }
1081  } else
1082  llvm_unreachable("Unknown reg class!");
1083  break;
1084  case 32:
1085  if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
1086  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1087  // FIXME: It's possible to only store part of the QQ register if the
1088  // spilled def has a sub-register index.
1089  BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
1090  .addFrameIndex(FI)
1091  .addImm(16)
1092  .addReg(SrcReg, getKillRegState(isKill))
1093  .addMemOperand(MMO)
1094  .add(predOps(ARMCC::AL));
1095  } else {
1096  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
1097  .addFrameIndex(FI)
1098  .add(predOps(ARMCC::AL))
1099  .addMemOperand(MMO);
1100  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1101  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1102  MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1103  AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1104  }
1105  } else
1106  llvm_unreachable("Unknown reg class!");
1107  break;
1108  case 64:
1109  if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1110  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
1111  .addFrameIndex(FI)
1112  .add(predOps(ARMCC::AL))
1113  .addMemOperand(MMO);
1114  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1115  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1116  MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1117  MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1118  MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
1119  MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
1120  MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
1121  AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
1122  } else
1123  llvm_unreachable("Unknown reg class!");
1124  break;
1125  default:
1126  llvm_unreachable("Unknown reg class!");
1127  }
1128 }
1129 
1131  int &FrameIndex) const {
1132  switch (MI.getOpcode()) {
1133  default: break;
1134  case ARM::STRrs:
1135  case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
1136  if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1137  MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1138  MI.getOperand(3).getImm() == 0) {
1139  FrameIndex = MI.getOperand(1).getIndex();
1140  return MI.getOperand(0).getReg();
1141  }
1142  break;
1143  case ARM::STRi12:
1144  case ARM::t2STRi12:
1145  case ARM::tSTRspi:
1146  case ARM::VSTRD:
1147  case ARM::VSTRS:
1148  if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1149  MI.getOperand(2).getImm() == 0) {
1150  FrameIndex = MI.getOperand(1).getIndex();
1151  return MI.getOperand(0).getReg();
1152  }
1153  break;
1154  case ARM::VST1q64:
1155  case ARM::VST1d64TPseudo:
1156  case ARM::VST1d64QPseudo:
1157  if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
1158  FrameIndex = MI.getOperand(0).getIndex();
1159  return MI.getOperand(2).getReg();
1160  }
1161  break;
1162  case ARM::VSTMQIA:
1163  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1164  FrameIndex = MI.getOperand(1).getIndex();
1165  return MI.getOperand(0).getReg();
1166  }
1167  break;
1168  }
1169 
1170  return 0;
1171 }
1172 
1174  int &FrameIndex) const {
1175  const MachineMemOperand *Dummy;
1176  return MI.mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
1177 }
1178 
1179 void ARMBaseInstrInfo::
1181  unsigned DestReg, int FI,
1182  const TargetRegisterClass *RC,
1183  const TargetRegisterInfo *TRI) const {
1184  DebugLoc DL;
1185  if (I != MBB.end()) DL = I->getDebugLoc();
1186  MachineFunction &MF = *MBB.getParent();
1187  MachineFrameInfo &MFI = MF.getFrameInfo();
1188  unsigned Align = MFI.getObjectAlignment(FI);
1191  MFI.getObjectSize(FI), Align);
1192 
1193  switch (TRI->getSpillSize(*RC)) {
1194  case 2:
1195  if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1196  BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg)
1197  .addFrameIndex(FI)
1198  .addImm(0)
1199  .addMemOperand(MMO)
1200  .add(predOps(ARMCC::AL));
1201  } else
1202  llvm_unreachable("Unknown reg class!");
1203  break;
1204  case 4:
1205  if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1206  BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
1207  .addFrameIndex(FI)
1208  .addImm(0)
1209  .addMemOperand(MMO)
1210  .add(predOps(ARMCC::AL));
1211  } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1212  BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
1213  .addFrameIndex(FI)
1214  .addImm(0)
1215  .addMemOperand(MMO)
1216  .add(predOps(ARMCC::AL));
1217  } else
1218  llvm_unreachable("Unknown reg class!");
1219  break;
1220  case 8:
1221  if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1222  BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
1223  .addFrameIndex(FI)
1224  .addImm(0)
1225  .addMemOperand(MMO)
1226  .add(predOps(ARMCC::AL));
1227  } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1228  MachineInstrBuilder MIB;
1229 
1230  if (Subtarget.hasV5TEOps()) {
1231  MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
1232  AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1233  AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1234  MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1235  .add(predOps(ARMCC::AL));
1236  } else {
1237  // Fallback to LDM instruction, which has existed since the dawn of
1238  // time.
1239  MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
1240  .addFrameIndex(FI)
1241  .addMemOperand(MMO)
1242  .add(predOps(ARMCC::AL));
1243  MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1244  MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1245  }
1246 
1248  MIB.addReg(DestReg, RegState::ImplicitDefine);
1249  } else
1250  llvm_unreachable("Unknown reg class!");
1251  break;
1252  case 16:
1253  if (ARM::DPairRegClass.hasSubClassEq(RC)) {
1254  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1255  BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
1256  .addFrameIndex(FI)
1257  .addImm(16)
1258  .addMemOperand(MMO)
1259  .add(predOps(ARMCC::AL));
1260  } else {
1261  BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
1262  .addFrameIndex(FI)
1263  .addMemOperand(MMO)
1264  .add(predOps(ARMCC::AL));
1265  }
1266  } else
1267  llvm_unreachable("Unknown reg class!");
1268  break;
1269  case 24:
1270  if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1271  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1272  BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
1273  .addFrameIndex(FI)
1274  .addImm(16)
1275  .addMemOperand(MMO)
1276  .add(predOps(ARMCC::AL));
1277  } else {
1278  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1279  .addFrameIndex(FI)
1280  .addMemOperand(MMO)
1281  .add(predOps(ARMCC::AL));
1282  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1283  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1284  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1286  MIB.addReg(DestReg, RegState::ImplicitDefine);
1287  }
1288  } else
1289  llvm_unreachable("Unknown reg class!");
1290  break;
1291  case 32:
1292  if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
1293  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1294  BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
1295  .addFrameIndex(FI)
1296  .addImm(16)
1297  .addMemOperand(MMO)
1298  .add(predOps(ARMCC::AL));
1299  } else {
1300  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1301  .addFrameIndex(FI)
1302  .add(predOps(ARMCC::AL))
1303  .addMemOperand(MMO);
1304  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1305  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1306  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1307  MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1309  MIB.addReg(DestReg, RegState::ImplicitDefine);
1310  }
1311  } else
1312  llvm_unreachable("Unknown reg class!");
1313  break;
1314  case 64:
1315  if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1316  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1317  .addFrameIndex(FI)
1318  .add(predOps(ARMCC::AL))
1319  .addMemOperand(MMO);
1320  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1321  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1322  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1323  MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1324  MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
1325  MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
1326  MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
1327  MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
1329  MIB.addReg(DestReg, RegState::ImplicitDefine);
1330  } else
1331  llvm_unreachable("Unknown reg class!");
1332  break;
1333  default:
1334  llvm_unreachable("Unknown regclass!");
1335  }
1336 }
1337 
1339  int &FrameIndex) const {
1340  switch (MI.getOpcode()) {
1341  default: break;
1342  case ARM::LDRrs:
1343  case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
1344  if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1345  MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1346  MI.getOperand(3).getImm() == 0) {
1347  FrameIndex = MI.getOperand(1).getIndex();
1348  return MI.getOperand(0).getReg();
1349  }
1350  break;
1351  case ARM::LDRi12:
1352  case ARM::t2LDRi12:
1353  case ARM::tLDRspi:
1354  case ARM::VLDRD:
1355  case ARM::VLDRS:
1356  if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1357  MI.getOperand(2).getImm() == 0) {
1358  FrameIndex = MI.getOperand(1).getIndex();
1359  return MI.getOperand(0).getReg();
1360  }
1361  break;
1362  case ARM::VLD1q64:
1363  case ARM::VLD1d8TPseudo:
1364  case ARM::VLD1d16TPseudo:
1365  case ARM::VLD1d32TPseudo:
1366  case ARM::VLD1d64TPseudo:
1367  case ARM::VLD1d8QPseudo:
1368  case ARM::VLD1d16QPseudo:
1369  case ARM::VLD1d32QPseudo:
1370  case ARM::VLD1d64QPseudo:
1371  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1372  FrameIndex = MI.getOperand(1).getIndex();
1373  return MI.getOperand(0).getReg();
1374  }
1375  break;
1376  case ARM::VLDMQIA:
1377  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1378  FrameIndex = MI.getOperand(1).getIndex();
1379  return MI.getOperand(0).getReg();
1380  }
1381  break;
1382  }
1383 
1384  return 0;
1385 }
1386 
1388  int &FrameIndex) const {
1389  const MachineMemOperand *Dummy;
1390  return MI.mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
1391 }
1392 
1393 /// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
1394 /// depending on whether the result is used.
1395 void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
1396  bool isThumb1 = Subtarget.isThumb1Only();
1397  bool isThumb2 = Subtarget.isThumb2();
1398  const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
1399 
1400  DebugLoc dl = MI->getDebugLoc();
1401  MachineBasicBlock *BB = MI->getParent();
1402 
1403  MachineInstrBuilder LDM, STM;
1404  if (isThumb1 || !MI->getOperand(1).isDead()) {
1405  MachineOperand LDWb(MI->getOperand(1));
1406  LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
1407  : isThumb1 ? ARM::tLDMIA_UPD
1408  : ARM::LDMIA_UPD))
1409  .add(LDWb);
1410  } else {
1411  LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
1412  }
1413 
1414  if (isThumb1 || !MI->getOperand(0).isDead()) {
1415  MachineOperand STWb(MI->getOperand(0));
1416  STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
1417  : isThumb1 ? ARM::tSTMIA_UPD
1418  : ARM::STMIA_UPD))
1419  .add(STWb);
1420  } else {
1421  STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
1422  }
1423 
1424  MachineOperand LDBase(MI->getOperand(3));
1425  LDM.add(LDBase).add(predOps(ARMCC::AL));
1426 
1427  MachineOperand STBase(MI->getOperand(2));
1428  STM.add(STBase).add(predOps(ARMCC::AL));
1429 
1430  // Sort the scratch registers into ascending order.
1432  SmallVector<unsigned, 6> ScratchRegs;
1433  for(unsigned I = 5; I < MI->getNumOperands(); ++I)
1434  ScratchRegs.push_back(MI->getOperand(I).getReg());
1435  llvm::sort(ScratchRegs.begin(), ScratchRegs.end(),
1436  [&TRI](const unsigned &Reg1,
1437  const unsigned &Reg2) -> bool {
1438  return TRI.getEncodingValue(Reg1) <
1439  TRI.getEncodingValue(Reg2);
1440  });
1441 
1442  for (const auto &Reg : ScratchRegs) {
1443  LDM.addReg(Reg, RegState::Define);
1444  STM.addReg(Reg, RegState::Kill);
1445  }
1446 
1447  BB->erase(MI);
1448 }
1449 
1451  if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
1452  assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() &&
1453  "LOAD_STACK_GUARD currently supported only for MachO.");
1454  expandLoadStackGuard(MI);
1455  MI.getParent()->erase(MI);
1456  return true;
1457  }
1458 
1459  if (MI.getOpcode() == ARM::MEMCPY) {
1460  expandMEMCPY(MI);
1461  return true;
1462  }
1463 
1464  // This hook gets to expand COPY instructions before they become
1465  // copyPhysReg() calls. Look for VMOVS instructions that can legally be
1466  // widened to VMOVD. We prefer the VMOVD when possible because it may be
1467  // changed into a VORR that can go down the NEON pipeline.
1468  if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || Subtarget.isFPOnlySP())
1469  return false;
1470 
1471  // Look for a copy between even S-registers. That is where we keep floats
1472  // when using NEON v2f32 instructions for f32 arithmetic.
1473  unsigned DstRegS = MI.getOperand(0).getReg();
1474  unsigned SrcRegS = MI.getOperand(1).getReg();
1475  if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
1476  return false;
1477 
1479  unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
1480  &ARM::DPRRegClass);
1481  unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
1482  &ARM::DPRRegClass);
1483  if (!DstRegD || !SrcRegD)
1484  return false;
1485 
1486  // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
1487  // legal if the COPY already defines the full DstRegD, and it isn't a
1488  // sub-register insertion.
1489  if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
1490  return false;
1491 
1492  // A dead copy shouldn't show up here, but reject it just in case.
1493  if (MI.getOperand(0).isDead())
1494  return false;
1495 
1496  // All clear, widen the COPY.
1497  LLVM_DEBUG(dbgs() << "widening: " << MI);
1498  MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
1499 
1500  // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg
1501  // or some other super-register.
1502  int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD);
1503  if (ImpDefIdx != -1)
1504  MI.RemoveOperand(ImpDefIdx);
1505 
1506  // Change the opcode and operands.
1507  MI.setDesc(get(ARM::VMOVD));
1508  MI.getOperand(0).setReg(DstRegD);
1509  MI.getOperand(1).setReg(SrcRegD);
1510  MIB.add(predOps(ARMCC::AL));
1511 
1512  // We are now reading SrcRegD instead of SrcRegS. This may upset the
1513  // register scavenger and machine verifier, so we need to indicate that we
1514  // are reading an undefined value from SrcRegD, but a proper value from
1515  // SrcRegS.
1516  MI.getOperand(1).setIsUndef();
1517  MIB.addReg(SrcRegS, RegState::Implicit);
1518 
1519  // SrcRegD may actually contain an unrelated value in the ssub_1
1520  // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
1521  if (MI.getOperand(1).isKill()) {
1522  MI.getOperand(1).setIsKill(false);
1523  MI.addRegisterKilled(SrcRegS, TRI, true);
1524  }
1525 
1526  LLVM_DEBUG(dbgs() << "replaced by: " << MI);
1527  return true;
1528 }
1529 
1530 /// Create a copy of a const pool value. Update CPI to the new index and return
1531 /// the label UID.
1532 static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1535 
1536  const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1537  assert(MCPE.isMachineConstantPoolEntry() &&
1538  "Expecting a machine constantpool entry!");
1539  ARMConstantPoolValue *ACPV =
1540  static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1541 
1542  unsigned PCLabelId = AFI->createPICLabelUId();
1543  ARMConstantPoolValue *NewCPV = nullptr;
1544 
1545  // FIXME: The below assumes PIC relocation model and that the function
1546  // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1547  // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1548  // instructions, so that's probably OK, but is PIC always correct when
1549  // we get here?
1550  if (ACPV->isGlobalValue())
1552  cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
1553  4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
1554  else if (ACPV->isExtSymbol())
1555  NewCPV = ARMConstantPoolSymbol::
1556  Create(MF.getFunction().getContext(),
1557  cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1558  else if (ACPV->isBlockAddress())
1559  NewCPV = ARMConstantPoolConstant::
1560  Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1562  else if (ACPV->isLSDA())
1563  NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId,
1564  ARMCP::CPLSDA, 4);
1565  else if (ACPV->isMachineBasicBlock())
1566  NewCPV = ARMConstantPoolMBB::
1567  Create(MF.getFunction().getContext(),
1568  cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1569  else
1570  llvm_unreachable("Unexpected ARM constantpool value type!!");
1571  CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
1572  return PCLabelId;
1573 }
1574 
1577  unsigned DestReg, unsigned SubIdx,
1578  const MachineInstr &Orig,
1579  const TargetRegisterInfo &TRI) const {
1580  unsigned Opcode = Orig.getOpcode();
1581  switch (Opcode) {
1582  default: {
1583  MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
1584  MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1585  MBB.insert(I, MI);
1586  break;
1587  }
1588  case ARM::tLDRpci_pic:
1589  case ARM::t2LDRpci_pic: {
1590  MachineFunction &MF = *MBB.getParent();
1591  unsigned CPI = Orig.getOperand(1).getIndex();
1592  unsigned PCLabelId = duplicateCPV(MF, CPI);
1593  MachineInstrBuilder MIB =
1594  BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
1595  .addConstantPoolIndex(CPI)
1596  .addImm(PCLabelId);
1597  MIB->setMemRefs(Orig.memoperands_begin(), Orig.memoperands_end());
1598  break;
1599  }
1600  }
1601 }
1602 
1603 MachineInstr &
1605  MachineBasicBlock::iterator InsertBefore,
1606  const MachineInstr &Orig) const {
1607  MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
1609  for (;;) {
1610  switch (I->getOpcode()) {
1611  case ARM::tLDRpci_pic:
1612  case ARM::t2LDRpci_pic: {
1613  MachineFunction &MF = *MBB.getParent();
1614  unsigned CPI = I->getOperand(1).getIndex();
1615  unsigned PCLabelId = duplicateCPV(MF, CPI);
1616  I->getOperand(1).setIndex(CPI);
1617  I->getOperand(2).setImm(PCLabelId);
1618  break;
1619  }
1620  }
1621  if (!I->isBundledWithSucc())
1622  break;
1623  ++I;
1624  }
1625  return Cloned;
1626 }
1627 
1629  const MachineInstr &MI1,
1630  const MachineRegisterInfo *MRI) const {
1631  unsigned Opcode = MI0.getOpcode();
1632  if (Opcode == ARM::t2LDRpci ||
1633  Opcode == ARM::t2LDRpci_pic ||
1634  Opcode == ARM::tLDRpci ||
1635  Opcode == ARM::tLDRpci_pic ||
1636  Opcode == ARM::LDRLIT_ga_pcrel ||
1637  Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1638  Opcode == ARM::tLDRLIT_ga_pcrel ||
1639  Opcode == ARM::MOV_ga_pcrel ||
1640  Opcode == ARM::MOV_ga_pcrel_ldr ||
1641  Opcode == ARM::t2MOV_ga_pcrel) {
1642  if (MI1.getOpcode() != Opcode)
1643  return false;
1644  if (MI0.getNumOperands() != MI1.getNumOperands())
1645  return false;
1646 
1647  const MachineOperand &MO0 = MI0.getOperand(1);
1648  const MachineOperand &MO1 = MI1.getOperand(1);
1649  if (MO0.getOffset() != MO1.getOffset())
1650  return false;
1651 
1652  if (Opcode == ARM::LDRLIT_ga_pcrel ||
1653  Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1654  Opcode == ARM::tLDRLIT_ga_pcrel ||
1655  Opcode == ARM::MOV_ga_pcrel ||
1656  Opcode == ARM::MOV_ga_pcrel_ldr ||
1657  Opcode == ARM::t2MOV_ga_pcrel)
1658  // Ignore the PC labels.
1659  return MO0.getGlobal() == MO1.getGlobal();
1660 
1661  const MachineFunction *MF = MI0.getParent()->getParent();
1662  const MachineConstantPool *MCP = MF->getConstantPool();
1663  int CPI0 = MO0.getIndex();
1664  int CPI1 = MO1.getIndex();
1665  const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1666  const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1667  bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1668  bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1669  if (isARMCP0 && isARMCP1) {
1670  ARMConstantPoolValue *ACPV0 =
1671  static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1672  ARMConstantPoolValue *ACPV1 =
1673  static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1674  return ACPV0->hasSameValue(ACPV1);
1675  } else if (!isARMCP0 && !isARMCP1) {
1676  return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1677  }
1678  return false;
1679  } else if (Opcode == ARM::PICLDR) {
1680  if (MI1.getOpcode() != Opcode)
1681  return false;
1682  if (MI0.getNumOperands() != MI1.getNumOperands())
1683  return false;
1684 
1685  unsigned Addr0 = MI0.getOperand(1).getReg();
1686  unsigned Addr1 = MI1.getOperand(1).getReg();
1687  if (Addr0 != Addr1) {
1688  if (!MRI ||
1691  return false;
1692 
1693  // This assumes SSA form.
1694  MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1695  MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1696  // Check if the loaded value, e.g. a constantpool of a global address, are
1697  // the same.
1698  if (!produceSameValue(*Def0, *Def1, MRI))
1699  return false;
1700  }
1701 
1702  for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
1703  // %12 = PICLDR %11, 0, 14, %noreg
1704  const MachineOperand &MO0 = MI0.getOperand(i);
1705  const MachineOperand &MO1 = MI1.getOperand(i);
1706  if (!MO0.isIdenticalTo(MO1))
1707  return false;
1708  }
1709  return true;
1710  }
1711 
1713 }
1714 
1715 /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1716 /// determine if two loads are loading from the same base address. It should
1717 /// only return true if the base pointers are the same and the only differences
1718 /// between the two addresses is the offset. It also returns the offsets by
1719 /// reference.
1720 ///
1721 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1722 /// is permanently disabled.
1724  int64_t &Offset1,
1725  int64_t &Offset2) const {
1726  // Don't worry about Thumb: just ARM and Thumb2.
1727  if (Subtarget.isThumb1Only()) return false;
1728 
1729  if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1730  return false;
1731 
1732  switch (Load1->getMachineOpcode()) {
1733  default:
1734  return false;
1735  case ARM::LDRi12:
1736  case ARM::LDRBi12:
1737  case ARM::LDRD:
1738  case ARM::LDRH:
1739  case ARM::LDRSB:
1740  case ARM::LDRSH:
1741  case ARM::VLDRD:
1742  case ARM::VLDRS:
1743  case ARM::t2LDRi8:
1744  case ARM::t2LDRBi8:
1745  case ARM::t2LDRDi8:
1746  case ARM::t2LDRSHi8:
1747  case ARM::t2LDRi12:
1748  case ARM::t2LDRBi12:
1749  case ARM::t2LDRSHi12:
1750  break;
1751  }
1752 
1753  switch (Load2->getMachineOpcode()) {
1754  default:
1755  return false;
1756  case ARM::LDRi12:
1757  case ARM::LDRBi12:
1758  case ARM::LDRD:
1759  case ARM::LDRH:
1760  case ARM::LDRSB:
1761  case ARM::LDRSH:
1762  case ARM::VLDRD:
1763  case ARM::VLDRS:
1764  case ARM::t2LDRi8:
1765  case ARM::t2LDRBi8:
1766  case ARM::t2LDRSHi8:
1767  case ARM::t2LDRi12:
1768  case ARM::t2LDRBi12:
1769  case ARM::t2LDRSHi12:
1770  break;
1771  }
1772 
1773  // Check if base addresses and chain operands match.
1774  if (Load1->getOperand(0) != Load2->getOperand(0) ||
1775  Load1->getOperand(4) != Load2->getOperand(4))
1776  return false;
1777 
1778  // Index should be Reg0.
1779  if (Load1->getOperand(3) != Load2->getOperand(3))
1780  return false;
1781 
1782  // Determine the offsets.
1783  if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
1784  isa<ConstantSDNode>(Load2->getOperand(1))) {
1785  Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
1786  Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
1787  return true;
1788  }
1789 
1790  return false;
1791 }
1792 
1793 /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
1794 /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
1795 /// be scheduled togther. On some targets if two loads are loading from
1796 /// addresses in the same cache line, it's better if they are scheduled
1797 /// together. This function takes two integers that represent the load offsets
1798 /// from the common base address. It returns true if it decides it's desirable
1799 /// to schedule the two loads together. "NumLoads" is the number of loads that
1800 /// have already been scheduled after Load1.
1801 ///
1802 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1803 /// is permanently disabled.
1805  int64_t Offset1, int64_t Offset2,
1806  unsigned NumLoads) const {
1807  // Don't worry about Thumb: just ARM and Thumb2.
1808  if (Subtarget.isThumb1Only()) return false;
1809 
1810  assert(Offset2 > Offset1);
1811 
1812  if ((Offset2 - Offset1) / 8 > 64)
1813  return false;
1814 
1815  // Check if the machine opcodes are different. If they are different
1816  // then we consider them to not be of the same base address,
1817  // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
1818  // In this case, they are considered to be the same because they are different
1819  // encoding forms of the same basic instruction.
1820  if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
1821  !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
1822  Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
1823  (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
1824  Load2->getMachineOpcode() == ARM::t2LDRBi8)))
1825  return false; // FIXME: overly conservative?
1826 
1827  // Four loads in a row should be sufficient.
1828  if (NumLoads >= 3)
1829  return false;
1830 
1831  return true;
1832 }
1833 
1835  const MachineBasicBlock *MBB,
1836  const MachineFunction &MF) const {
1837  // Debug info is never a scheduling boundary. It's necessary to be explicit
1838  // due to the special treatment of IT instructions below, otherwise a
1839  // dbg_value followed by an IT will result in the IT instruction being
1840  // considered a scheduling hazard, which is wrong. It should be the actual
1841  // instruction preceding the dbg_value instruction(s), just like it is
1842  // when debug info is not present.
1843  if (MI.isDebugInstr())
1844  return false;
1845 
1846  // Terminators and labels can't be scheduled around.
1847  if (MI.isTerminator() || MI.isPosition())
1848  return true;
1849 
1850  // Treat the start of the IT block as a scheduling boundary, but schedule
1851  // t2IT along with all instructions following it.
1852  // FIXME: This is a big hammer. But the alternative is to add all potential
1853  // true and anti dependencies to IT block instructions as implicit operands
1854  // to the t2IT instruction. The added compile time and complexity does not
1855  // seem worth it.
1857  // Make sure to skip any debug instructions
1858  while (++I != MBB->end() && I->isDebugInstr())
1859  ;
1860  if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
1861  return true;
1862 
1863  // Don't attempt to schedule around any instruction that defines
1864  // a stack-oriented pointer, as it's unlikely to be profitable. This
1865  // saves compile time, because it doesn't require every single
1866  // stack slot reference to depend on the instruction that does the
1867  // modification.
1868  // Calls don't actually change the stack pointer, even if they have imp-defs.
1869  // No ARM calling conventions change the stack pointer. (X86 calling
1870  // conventions sometimes do).
1871  if (!MI.isCall() && MI.definesRegister(ARM::SP))
1872  return true;
1873 
1874  return false;
1875 }
1876 
1877 bool ARMBaseInstrInfo::
1879  unsigned NumCycles, unsigned ExtraPredCycles,
1880  BranchProbability Probability) const {
1881  if (!NumCycles)
1882  return false;
1883 
1884  // If we are optimizing for size, see if the branch in the predecessor can be
1885  // lowered to cbn?z by the constant island lowering pass, and return false if
1886  // so. This results in a shorter instruction sequence.
1887  if (MBB.getParent()->getFunction().optForSize()) {
1888  MachineBasicBlock *Pred = *MBB.pred_begin();
1889  if (!Pred->empty()) {
1890  MachineInstr *LastMI = &*Pred->rbegin();
1891  if (LastMI->getOpcode() == ARM::t2Bcc) {
1892  MachineBasicBlock::iterator CmpMI = LastMI;
1893  if (CmpMI != Pred->begin()) {
1894  --CmpMI;
1895  if (CmpMI->getOpcode() == ARM::tCMPi8 ||
1896  CmpMI->getOpcode() == ARM::t2CMPri) {
1897  unsigned Reg = CmpMI->getOperand(0).getReg();
1898  unsigned PredReg = 0;
1899  ARMCC::CondCodes P = getInstrPredicate(*CmpMI, PredReg);
1900  if (P == ARMCC::AL && CmpMI->getOperand(1).getImm() == 0 &&
1901  isARMLowRegister(Reg))
1902  return false;
1903  }
1904  }
1905  }
1906  }
1907  }
1908  return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
1909  MBB, 0, 0, Probability);
1910 }
1911 
1912 bool ARMBaseInstrInfo::
1914  unsigned TCycles, unsigned TExtra,
1915  MachineBasicBlock &FBB,
1916  unsigned FCycles, unsigned FExtra,
1917  BranchProbability Probability) const {
1918  if (!TCycles)
1919  return false;
1920 
1921  // Attempt to estimate the relative costs of predication versus branching.
1922  // Here we scale up each component of UnpredCost to avoid precision issue when
1923  // scaling TCycles/FCycles by Probability.
1924  const unsigned ScalingUpFactor = 1024;
1925 
1926  unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
1927  unsigned UnpredCost;
1928  if (!Subtarget.hasBranchPredictor()) {
1929  // When we don't have a branch predictor it's always cheaper to not take a
1930  // branch than take it, so we have to take that into account.
1931  unsigned NotTakenBranchCost = 1;
1932  unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
1933  unsigned TUnpredCycles, FUnpredCycles;
1934  if (!FCycles) {
1935  // Triangle: TBB is the fallthrough
1936  TUnpredCycles = TCycles + NotTakenBranchCost;
1937  FUnpredCycles = TakenBranchCost;
1938  } else {
1939  // Diamond: TBB is the block that is branched to, FBB is the fallthrough
1940  TUnpredCycles = TCycles + TakenBranchCost;
1941  FUnpredCycles = FCycles + NotTakenBranchCost;
1942  // The branch at the end of FBB will disappear when it's predicated, so
1943  // discount it from PredCost.
1944  PredCost -= 1 * ScalingUpFactor;
1945  }
1946  // The total cost is the cost of each path scaled by their probabilites
1947  unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
1948  unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
1949  UnpredCost = TUnpredCost + FUnpredCost;
1950  // When predicating assume that the first IT can be folded away but later
1951  // ones cost one cycle each
1952  if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
1953  PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
1954  }
1955  } else {
1956  unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
1957  unsigned FUnpredCost =
1958  Probability.getCompl().scale(FCycles * ScalingUpFactor);
1959  UnpredCost = TUnpredCost + FUnpredCost;
1960  UnpredCost += 1 * ScalingUpFactor; // The branch itself
1961  UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
1962  }
1963 
1964  return PredCost <= UnpredCost;
1965 }
1966 
1967 bool
1969  MachineBasicBlock &FMBB) const {
1970  // Reduce false anti-dependencies to let the target's out-of-order execution
1971  // engine do its thing.
1972  return Subtarget.isProfitableToUnpredicate();
1973 }
1974 
1975 /// getInstrPredicate - If instruction is predicated, returns its predicate
1976 /// condition, otherwise returns AL. It also returns the condition code
1977 /// register by reference.
1979  unsigned &PredReg) {
1980  int PIdx = MI.findFirstPredOperandIdx();
1981  if (PIdx == -1) {
1982  PredReg = 0;
1983  return ARMCC::AL;
1984  }
1985 
1986  PredReg = MI.getOperand(PIdx+1).getReg();
1987  return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
1988 }
1989 
1990 unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {
1991  if (Opc == ARM::B)
1992  return ARM::Bcc;
1993  if (Opc == ARM::tB)
1994  return ARM::tBcc;
1995  if (Opc == ARM::t2B)
1996  return ARM::t2Bcc;
1997 
1998  llvm_unreachable("Unknown unconditional branch opcode!");
1999 }
2000 
2002  bool NewMI,
2003  unsigned OpIdx1,
2004  unsigned OpIdx2) const {
2005  switch (MI.getOpcode()) {
2006  case ARM::MOVCCr:
2007  case ARM::t2MOVCCr: {
2008  // MOVCC can be commuted by inverting the condition.
2009  unsigned PredReg = 0;
2010  ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
2011  // MOVCC AL can't be inverted. Shouldn't happen.
2012  if (CC == ARMCC::AL || PredReg != ARM::CPSR)
2013  return nullptr;
2014  MachineInstr *CommutedMI =
2015  TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2016  if (!CommutedMI)
2017  return nullptr;
2018  // After swapping the MOVCC operands, also invert the condition.
2019  CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
2020  .setImm(ARMCC::getOppositeCondition(CC));
2021  return CommutedMI;
2022  }
2023  }
2024  return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2025 }
2026 
2027 /// Identify instructions that can be folded into a MOVCC instruction, and
2028 /// return the defining instruction.
2030  const MachineRegisterInfo &MRI,
2031  const TargetInstrInfo *TII) {
2033  return nullptr;
2034  if (!MRI.hasOneNonDBGUse(Reg))
2035  return nullptr;
2036  MachineInstr *MI = MRI.getVRegDef(Reg);
2037  if (!MI)
2038  return nullptr;
2039  // MI is folded into the MOVCC by predicating it.
2040  if (!MI->isPredicable())
2041  return nullptr;
2042  // Check if MI has any non-dead defs or physreg uses. This also detects
2043  // predicated instructions which will be reading CPSR.
2044  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
2045  const MachineOperand &MO = MI->getOperand(i);
2046  // Reject frame index operands, PEI can't handle the predicated pseudos.
2047  if (MO.isFI() || MO.isCPI() || MO.isJTI())
2048  return nullptr;
2049  if (!MO.isReg())
2050  continue;
2051  // MI can't have any tied operands, that would conflict with predication.
2052  if (MO.isTied())
2053  return nullptr;
2055  return nullptr;
2056  if (MO.isDef() && !MO.isDead())
2057  return nullptr;
2058  }
2059  bool DontMoveAcrossStores = true;
2060  if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
2061  return nullptr;
2062  return MI;
2063 }
2064 
2067  unsigned &TrueOp, unsigned &FalseOp,
2068  bool &Optimizable) const {
2069  assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2070  "Unknown select instruction");
2071  // MOVCC operands:
2072  // 0: Def.
2073  // 1: True use.
2074  // 2: False use.
2075  // 3: Condition code.
2076  // 4: CPSR use.
2077  TrueOp = 1;
2078  FalseOp = 2;
2079  Cond.push_back(MI.getOperand(3));
2080  Cond.push_back(MI.getOperand(4));
2081  // We can always fold a def.
2082  Optimizable = true;
2083  return false;
2084 }
2085 
2086 MachineInstr *
2089  bool PreferFalse) const {
2090  assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2091  "Unknown select instruction");
2094  bool Invert = !DefMI;
2095  if (!DefMI)
2096  DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
2097  if (!DefMI)
2098  return nullptr;
2099 
2100  // Find new register class to use.
2101  MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
2102  unsigned DestReg = MI.getOperand(0).getReg();
2103  const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
2104  if (!MRI.constrainRegClass(DestReg, PreviousClass))
2105  return nullptr;
2106 
2107  // Create a new predicated version of DefMI.
2108  // Rfalse is the first use.
2109  MachineInstrBuilder NewMI =
2110  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
2111 
2112  // Copy all the DefMI operands, excluding its (null) predicate.
2113  const MCInstrDesc &DefDesc = DefMI->getDesc();
2114  for (unsigned i = 1, e = DefDesc.getNumOperands();
2115  i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
2116  NewMI.add(DefMI->getOperand(i));
2117 
2118  unsigned CondCode = MI.getOperand(3).getImm();
2119  if (Invert)
2121  else
2122  NewMI.addImm(CondCode);
2123  NewMI.add(MI.getOperand(4));
2124 
2125  // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
2126  if (NewMI->hasOptionalDef())
2127  NewMI.add(condCodeOp());
2128 
2129  // The output register value when the predicate is false is an implicit
2130  // register operand tied to the first def.
2131  // The tie makes the register allocator ensure the FalseReg is allocated the
2132  // same register as operand 0.
2133  FalseReg.setImplicit();
2134  NewMI.add(FalseReg);
2135  NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
2136 
2137  // Update SeenMIs set: register newly created MI and erase removed DefMI.
2138  SeenMIs.insert(NewMI);
2139  SeenMIs.erase(DefMI);
2140 
2141  // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
2142  // DefMI would be invalid when tranferred inside the loop. Checking for a
2143  // loop is expensive, but at least remove kill flags if they are in different
2144  // BBs.
2145  if (DefMI->getParent() != MI.getParent())
2146  NewMI->clearKillInfo();
2147 
2148  // The caller will erase MI, but not DefMI.
2149  DefMI->eraseFromParent();
2150  return NewMI;
2151 }
2152 
2153 /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
2154 /// instruction is encoded with an 'S' bit is determined by the optional CPSR
2155 /// def operand.
2156 ///
2157 /// This will go away once we can teach tblgen how to set the optional CPSR def
2158 /// operand itself.
2160  uint16_t PseudoOpc;
2161  uint16_t MachineOpc;
2162 };
2163 
2165  {ARM::ADDSri, ARM::ADDri},
2166  {ARM::ADDSrr, ARM::ADDrr},
2167  {ARM::ADDSrsi, ARM::ADDrsi},
2168  {ARM::ADDSrsr, ARM::ADDrsr},
2169 
2170  {ARM::SUBSri, ARM::SUBri},
2171  {ARM::SUBSrr, ARM::SUBrr},
2172  {ARM::SUBSrsi, ARM::SUBrsi},
2173  {ARM::SUBSrsr, ARM::SUBrsr},
2174 
2175  {ARM::RSBSri, ARM::RSBri},
2176  {ARM::RSBSrsi, ARM::RSBrsi},
2177  {ARM::RSBSrsr, ARM::RSBrsr},
2178 
2179  {ARM::tADDSi3, ARM::tADDi3},
2180  {ARM::tADDSi8, ARM::tADDi8},
2181  {ARM::tADDSrr, ARM::tADDrr},
2182  {ARM::tADCS, ARM::tADC},
2183 
2184  {ARM::tSUBSi3, ARM::tSUBi3},
2185  {ARM::tSUBSi8, ARM::tSUBi8},
2186  {ARM::tSUBSrr, ARM::tSUBrr},
2187  {ARM::tSBCS, ARM::tSBC},
2188 
2189  {ARM::t2ADDSri, ARM::t2ADDri},
2190  {ARM::t2ADDSrr, ARM::t2ADDrr},
2191  {ARM::t2ADDSrs, ARM::t2ADDrs},
2192 
2193  {ARM::t2SUBSri, ARM::t2SUBri},
2194  {ARM::t2SUBSrr, ARM::t2SUBrr},
2195  {ARM::t2SUBSrs, ARM::t2SUBrs},
2196 
2197  {ARM::t2RSBSri, ARM::t2RSBri},
2198  {ARM::t2RSBSrs, ARM::t2RSBrs},
2199 };
2200 
2201 unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
2202  for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i)
2203  if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc)
2204  return AddSubFlagsOpcodeMap[i].MachineOpc;
2205  return 0;
2206 }
2207 
2210  const DebugLoc &dl, unsigned DestReg,
2211  unsigned BaseReg, int NumBytes,
2212  ARMCC::CondCodes Pred, unsigned PredReg,
2213  const ARMBaseInstrInfo &TII,
2214  unsigned MIFlags) {
2215  if (NumBytes == 0 && DestReg != BaseReg) {
2216  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
2217  .addReg(BaseReg, RegState::Kill)
2218  .add(predOps(Pred, PredReg))
2219  .add(condCodeOp())
2220  .setMIFlags(MIFlags);
2221  return;
2222  }
2223 
2224  bool isSub = NumBytes < 0;
2225  if (isSub) NumBytes = -NumBytes;
2226 
2227  while (NumBytes) {
2228  unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
2229  unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
2230  assert(ThisVal && "Didn't extract field correctly");
2231 
2232  // We will handle these bits from offset, clear them.
2233  NumBytes &= ~ThisVal;
2234 
2235  assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
2236 
2237  // Build the new ADD / SUB.
2238  unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
2239  BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
2240  .addReg(BaseReg, RegState::Kill)
2241  .addImm(ThisVal)
2242  .add(predOps(Pred, PredReg))
2243  .add(condCodeOp())
2244  .setMIFlags(MIFlags);
2245  BaseReg = DestReg;
2246  }
2247 }
2248 
2250  MachineFunction &MF, MachineInstr *MI,
2251  unsigned NumBytes) {
2252  // This optimisation potentially adds lots of load and store
2253  // micro-operations, it's only really a great benefit to code-size.
2254  if (!MF.getFunction().optForMinSize())
2255  return false;
2256 
2257  // If only one register is pushed/popped, LLVM can use an LDR/STR
2258  // instead. We can't modify those so make sure we're dealing with an
2259  // instruction we understand.
2260  bool IsPop = isPopOpcode(MI->getOpcode());
2261  bool IsPush = isPushOpcode(MI->getOpcode());
2262  if (!IsPush && !IsPop)
2263  return false;
2264 
2265  bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
2266  MI->getOpcode() == ARM::VLDMDIA_UPD;
2267  bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
2268  MI->getOpcode() == ARM::tPOP ||
2269  MI->getOpcode() == ARM::tPOP_RET;
2270 
2271  assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
2272  MI->getOperand(1).getReg() == ARM::SP)) &&
2273  "trying to fold sp update into non-sp-updating push/pop");
2274 
2275  // The VFP push & pop act on D-registers, so we can only fold an adjustment
2276  // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
2277  // if this is violated.
2278  if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
2279  return false;
2280 
2281  // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
2282  // pred) so the list starts at 4. Thumb1 starts after the predicate.
2283  int RegListIdx = IsT1PushPop ? 2 : 4;
2284 
2285  // Calculate the space we'll need in terms of registers.
2286  unsigned RegsNeeded;
2287  const TargetRegisterClass *RegClass;
2288  if (IsVFPPushPop) {
2289  RegsNeeded = NumBytes / 8;
2290  RegClass = &ARM::DPRRegClass;
2291  } else {
2292  RegsNeeded = NumBytes / 4;
2293  RegClass = &ARM::GPRRegClass;
2294  }
2295 
2296  // We're going to have to strip all list operands off before
2297  // re-adding them since the order matters, so save the existing ones
2298  // for later.
2300 
2301  // We're also going to need the first register transferred by this
2302  // instruction, which won't necessarily be the first register in the list.
2303  unsigned FirstRegEnc = -1;
2304 
2306  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
2307  MachineOperand &MO = MI->getOperand(i);
2308  RegList.push_back(MO);
2309 
2310  if (MO.isReg() && TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
2311  FirstRegEnc = TRI->getEncodingValue(MO.getReg());
2312  }
2313 
2314  const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
2315 
2316  // Now try to find enough space in the reglist to allocate NumBytes.
2317  for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
2318  --CurRegEnc) {
2319  unsigned CurReg = RegClass->getRegister(CurRegEnc);
2320  if (!IsPop) {
2321  // Pushing any register is completely harmless, mark the register involved
2322  // as undef since we don't care about its value and must not restore it
2323  // during stack unwinding.
2324  RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
2325  false, false, true));
2326  --RegsNeeded;
2327  continue;
2328  }
2329 
2330  // However, we can only pop an extra register if it's not live. For
2331  // registers live within the function we might clobber a return value
2332  // register; the other way a register can be live here is if it's
2333  // callee-saved.
2334  if (isCalleeSavedRegister(CurReg, CSRegs) ||
2335  MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
2337  // VFP pops don't allow holes in the register list, so any skip is fatal
2338  // for our transformation. GPR pops do, so we should just keep looking.
2339  if (IsVFPPushPop)
2340  return false;
2341  else
2342  continue;
2343  }
2344 
2345  // Mark the unimportant registers as <def,dead> in the POP.
2346  RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
2347  true));
2348  --RegsNeeded;
2349  }
2350 
2351  if (RegsNeeded > 0)
2352  return false;
2353 
2354  // Finally we know we can profitably perform the optimisation so go
2355  // ahead: strip all existing registers off and add them back again
2356  // in the right order.
2357  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
2358  MI->RemoveOperand(i);
2359 
2360  // Add the complete list back in.
2361  MachineInstrBuilder MIB(MF, &*MI);
2362  for (int i = RegList.size() - 1; i >= 0; --i)
2363  MIB.add(RegList[i]);
2364 
2365  return true;
2366 }
2367 
2368 bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2369  unsigned FrameReg, int &Offset,
2370  const ARMBaseInstrInfo &TII) {
2371  unsigned Opcode = MI.getOpcode();
2372  const MCInstrDesc &Desc = MI.getDesc();
2373  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2374  bool isSub = false;
2375 
2376  // Memory operands in inline assembly always use AddrMode2.
2377  if (Opcode == ARM::INLINEASM)
2378  AddrMode = ARMII::AddrMode2;
2379 
2380  if (Opcode == ARM::ADDri) {
2381  Offset += MI.getOperand(FrameRegIdx+1).getImm();
2382  if (Offset == 0) {
2383  // Turn it into a move.
2384  MI.setDesc(TII.get(ARM::MOVr));
2385  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2386  MI.RemoveOperand(FrameRegIdx+1);
2387  Offset = 0;
2388  return true;
2389  } else if (Offset < 0) {
2390  Offset = -Offset;
2391  isSub = true;
2392  MI.setDesc(TII.get(ARM::SUBri));
2393  }
2394 
2395  // Common case: small offset, fits into instruction.
2396  if (ARM_AM::getSOImmVal(Offset) != -1) {
2397  // Replace the FrameIndex with sp / fp
2398  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2399  MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
2400  Offset = 0;
2401  return true;
2402  }
2403 
2404  // Otherwise, pull as much of the immedidate into this ADDri/SUBri
2405  // as possible.
2406  unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
2407  unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
2408 
2409  // We will handle these bits from offset, clear them.
2410  Offset &= ~ThisImmVal;
2411 
2412  // Get the properly encoded SOImmVal field.
2413  assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2414  "Bit extraction didn't work?");
2415  MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
2416  } else {
2417  unsigned ImmIdx = 0;
2418  int InstrOffs = 0;
2419  unsigned NumBits = 0;
2420  unsigned Scale = 1;
2421  switch (AddrMode) {
2422  case ARMII::AddrMode_i12:
2423  ImmIdx = FrameRegIdx + 1;
2424  InstrOffs = MI.getOperand(ImmIdx).getImm();
2425  NumBits = 12;
2426  break;
2427  case ARMII::AddrMode2:
2428  ImmIdx = FrameRegIdx+2;
2429  InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
2430  if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2431  InstrOffs *= -1;
2432  NumBits = 12;
2433  break;
2434  case ARMII::AddrMode3:
2435  ImmIdx = FrameRegIdx+2;
2436  InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
2437  if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2438  InstrOffs *= -1;
2439  NumBits = 8;
2440  break;
2441  case ARMII::AddrMode4:
2442  case ARMII::AddrMode6:
2443  // Can't fold any offset even if it's zero.
2444  return false;
2445  case ARMII::AddrMode5:
2446  ImmIdx = FrameRegIdx+1;
2447  InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2448  if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2449  InstrOffs *= -1;
2450  NumBits = 8;
2451  Scale = 4;
2452  break;
2453  case ARMII::AddrMode5FP16:
2454  ImmIdx = FrameRegIdx+1;
2455  InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2456  if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2457  InstrOffs *= -1;
2458  NumBits = 8;
2459  Scale = 2;
2460  break;
2461  default:
2462  llvm_unreachable("Unsupported addressing mode!");
2463  }
2464 
2465  Offset += InstrOffs * Scale;
2466  assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2467  if (Offset < 0) {
2468  Offset = -Offset;
2469  isSub = true;
2470  }
2471 
2472  // Attempt to fold address comp. if opcode has offset bits
2473  if (NumBits > 0) {
2474  // Common case: small offset, fits into instruction.
2475  MachineOperand &ImmOp = MI.getOperand(ImmIdx);
2476  int ImmedOffset = Offset / Scale;
2477  unsigned Mask = (1 << NumBits) - 1;
2478  if ((unsigned)Offset <= Mask * Scale) {
2479  // Replace the FrameIndex with sp
2480  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2481  // FIXME: When addrmode2 goes away, this will simplify (like the
2482  // T2 version), as the LDR.i12 versions don't need the encoding
2483  // tricks for the offset value.
2484  if (isSub) {
2485  if (AddrMode == ARMII::AddrMode_i12)
2486  ImmedOffset = -ImmedOffset;
2487  else
2488  ImmedOffset |= 1 << NumBits;
2489  }
2490  ImmOp.ChangeToImmediate(ImmedOffset);
2491  Offset = 0;
2492  return true;
2493  }
2494 
2495  // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2496  ImmedOffset = ImmedOffset & Mask;
2497  if (isSub) {
2498  if (AddrMode == ARMII::AddrMode_i12)
2499  ImmedOffset = -ImmedOffset;
2500  else
2501  ImmedOffset |= 1 << NumBits;
2502  }
2503  ImmOp.ChangeToImmediate(ImmedOffset);
2504  Offset &= ~(Mask*Scale);
2505  }
2506  }
2507 
2508  Offset = (isSub) ? -Offset : Offset;
2509  return Offset == 0;
2510 }
2511 
2512 /// analyzeCompare - For a comparison instruction, return the source registers
2513 /// in SrcReg and SrcReg2 if having two register operands, and the value it
2514 /// compares against in CmpValue. Return true if the comparison instruction
2515 /// can be analyzed.
2516 bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
2517  unsigned &SrcReg2, int &CmpMask,
2518  int &CmpValue) const {
2519  switch (MI.getOpcode()) {
2520  default: break;
2521  case ARM::CMPri:
2522  case ARM::t2CMPri:
2523  case ARM::tCMPi8:
2524  SrcReg = MI.getOperand(0).getReg();
2525  SrcReg2 = 0;
2526  CmpMask = ~0;
2527  CmpValue = MI.getOperand(1).getImm();
2528  return true;
2529  case ARM::CMPrr:
2530  case ARM::t2CMPrr:
2531  SrcReg = MI.getOperand(0).getReg();
2532  SrcReg2 = MI.getOperand(1).getReg();
2533  CmpMask = ~0;
2534  CmpValue = 0;
2535  return true;
2536  case ARM::TSTri:
2537  case ARM::t2TSTri:
2538  SrcReg = MI.getOperand(0).getReg();
2539  SrcReg2 = 0;
2540  CmpMask = MI.getOperand(1).getImm();
2541  CmpValue = 0;
2542  return true;
2543  }
2544 
2545  return false;
2546 }
2547 
2548 /// isSuitableForMask - Identify a suitable 'and' instruction that
2549 /// operates on the given source register and applies the same mask
2550 /// as a 'tst' instruction. Provide a limited look-through for copies.
2551 /// When successful, MI will hold the found instruction.
2552 static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
2553  int CmpMask, bool CommonUse) {
2554  switch (MI->getOpcode()) {
2555  case ARM::ANDri:
2556  case ARM::t2ANDri:
2557  if (CmpMask != MI->getOperand(2).getImm())
2558  return false;
2559  if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
2560  return true;
2561  break;
2562  }
2563 
2564  return false;
2565 }
2566 
2567 /// getSwappedCondition - assume the flags are set by MI(a,b), return
2568 /// the condition code if we modify the instructions such that flags are
2569 /// set by MI(b,a).
2571  switch (CC) {
2572  default: return ARMCC::AL;
2573  case ARMCC::EQ: return ARMCC::EQ;
2574  case ARMCC::NE: return ARMCC::NE;
2575  case ARMCC::HS: return ARMCC::LS;
2576  case ARMCC::LO: return ARMCC::HI;
2577  case ARMCC::HI: return ARMCC::LO;
2578  case ARMCC::LS: return ARMCC::HS;
2579  case ARMCC::GE: return ARMCC::LE;
2580  case ARMCC::LT: return ARMCC::GT;
2581  case ARMCC::GT: return ARMCC::LT;
2582  case ARMCC::LE: return ARMCC::GE;
2583  }
2584 }
2585 
2586 /// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
2587 /// the condition code if we modify the instructions such that flags are
2588 /// set by ADD(a,b,X).
2590  switch (CC) {
2591  default: return ARMCC::AL;
2592  case ARMCC::HS: return ARMCC::LO;
2593  case ARMCC::LO: return ARMCC::HS;
2594  case ARMCC::VS: return ARMCC::VS;
2595  case ARMCC::VC: return ARMCC::VC;
2596  }
2597 }
2598 
2599 /// isRedundantFlagInstr - check whether the first instruction, whose only
2600 /// purpose is to update flags, can be made redundant.
2601 /// CMPrr can be made redundant by SUBrr if the operands are the same.
2602 /// CMPri can be made redundant by SUBri if the operands are the same.
2603 /// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
2604 /// This function can be extended later on.
2605 inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
2606  unsigned SrcReg, unsigned SrcReg2,
2607  int ImmValue, const MachineInstr *OI) {
2608  if ((CmpI->getOpcode() == ARM::CMPrr ||
2609  CmpI->getOpcode() == ARM::t2CMPrr) &&
2610  (OI->getOpcode() == ARM::SUBrr ||
2611  OI->getOpcode() == ARM::t2SUBrr) &&
2612  ((OI->getOperand(1).getReg() == SrcReg &&
2613  OI->getOperand(2).getReg() == SrcReg2) ||
2614  (OI->getOperand(1).getReg() == SrcReg2 &&
2615  OI->getOperand(2).getReg() == SrcReg)))
2616  return true;
2617 
2618  if ((CmpI->getOpcode() == ARM::CMPri ||
2619  CmpI->getOpcode() == ARM::t2CMPri) &&
2620  (OI->getOpcode() == ARM::SUBri ||
2621  OI->getOpcode() == ARM::t2SUBri) &&
2622  OI->getOperand(1).getReg() == SrcReg &&
2623  OI->getOperand(2).getImm() == ImmValue)
2624  return true;
2625 
2626  if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2627  (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
2628  OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
2629  OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
2630  OI->getOperand(0).getReg() == SrcReg &&
2631  OI->getOperand(1).getReg() == SrcReg2)
2632  return true;
2633  return false;
2634 }
2635 
2636 static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
2637  switch (MI->getOpcode()) {
2638  default: return false;
2639  case ARM::tLSLri:
2640  case ARM::tLSRri:
2641  case ARM::tLSLrr:
2642  case ARM::tLSRrr:
2643  case ARM::tSUBrr:
2644  case ARM::tADDrr:
2645  case ARM::tADDi3:
2646  case ARM::tADDi8:
2647  case ARM::tSUBi3:
2648  case ARM::tSUBi8:
2649  case ARM::tMUL:
2650  IsThumb1 = true;
2652  case ARM::RSBrr:
2653  case ARM::RSBri:
2654  case ARM::RSCrr:
2655  case ARM::RSCri:
2656  case ARM::ADDrr:
2657  case ARM::ADDri:
2658  case ARM::ADCrr:
2659  case ARM::ADCri:
2660  case ARM::SUBrr:
2661  case ARM::SUBri:
2662  case ARM::SBCrr:
2663  case ARM::SBCri:
2664  case ARM::t2RSBri:
2665  case ARM::t2ADDrr:
2666  case ARM::t2ADDri:
2667  case ARM::t2ADCrr:
2668  case ARM::t2ADCri:
2669  case ARM::t2SUBrr:
2670  case ARM::t2SUBri:
2671  case ARM::t2SBCrr:
2672  case ARM::t2SBCri:
2673  case ARM::ANDrr:
2674  case ARM::ANDri:
2675  case ARM::t2ANDrr:
2676  case ARM::t2ANDri:
2677  case ARM::ORRrr:
2678  case ARM::ORRri:
2679  case ARM::t2ORRrr:
2680  case ARM::t2ORRri:
2681  case ARM::EORrr:
2682  case ARM::EORri:
2683  case ARM::t2EORrr:
2684  case ARM::t2EORri:
2685  case ARM::t2LSRri:
2686  case ARM::t2LSRrr:
2687  case ARM::t2LSLri:
2688  case ARM::t2LSLrr:
2689  return true;
2690  }
2691 }
2692 
2693 /// optimizeCompareInstr - Convert the instruction supplying the argument to the
2694 /// comparison into one that sets the zero bit in the flags register;
2695 /// Remove a redundant Compare instruction if an earlier instruction can set the
2696 /// flags in the same way as Compare.
2697 /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
2698 /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
2699 /// condition code of instructions which use the flags.
2701  MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
2702  int CmpValue, const MachineRegisterInfo *MRI) const {
2703  // Get the unique definition of SrcReg.
2704  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2705  if (!MI) return false;
2706 
2707  // Masked compares sometimes use the same register as the corresponding 'and'.
2708  if (CmpMask != ~0) {
2709  if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) {
2710  MI = nullptr;
2712  UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
2713  UI != UE; ++UI) {
2714  if (UI->getParent() != CmpInstr.getParent())
2715  continue;
2716  MachineInstr *PotentialAND = &*UI;
2717  if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
2718  isPredicated(*PotentialAND))
2719  continue;
2720  MI = PotentialAND;
2721  break;
2722  }
2723  if (!MI) return false;
2724  }
2725  }
2726 
2727  // Get ready to iterate backward from CmpInstr.
2728  MachineBasicBlock::iterator I = CmpInstr, E = MI,
2729  B = CmpInstr.getParent()->begin();
2730 
2731  // Early exit if CmpInstr is at the beginning of the BB.
2732  if (I == B) return false;
2733 
2734  // There are two possible candidates which can be changed to set CPSR:
2735  // One is MI, the other is a SUB or ADD instruction.
2736  // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
2737  // ADDr[ri](r1, r2, X).
2738  // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
2739  MachineInstr *SubAdd = nullptr;
2740  if (SrcReg2 != 0)
2741  // MI is not a candidate for CMPrr.
2742  MI = nullptr;
2743  else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
2744  // Conservatively refuse to convert an instruction which isn't in the same
2745  // BB as the comparison.
2746  // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
2747  // Thus we cannot return here.
2748  if (CmpInstr.getOpcode() == ARM::CMPri ||
2749  CmpInstr.getOpcode() == ARM::t2CMPri)
2750  MI = nullptr;
2751  else
2752  return false;
2753  }
2754 
2755  bool IsThumb1 = false;
2756  if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
2757  return false;
2758 
2759  // We also want to do this peephole for cases like this: if (a*b == 0),
2760  // and optimise away the CMP instruction from the generated code sequence:
2761  // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
2762  // resulting from the select instruction, but these MOVS instructions for
2763  // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
2764  // However, if we only have MOVS instructions in between the CMP and the
2765  // other instruction (the MULS in this example), then the CPSR is dead so we
2766  // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
2767  // reordering and then continue the analysis hoping we can eliminate the
2768  // CMP. This peephole works on the vregs, so is still in SSA form. As a
2769  // consequence, the movs won't redefine/kill the MUL operands which would
2770  // make this reordering illegal.
2771  if (MI && IsThumb1) {
2772  --I;
2773  bool CanReorder = true;
2774  const bool HasStmts = I != E;
2775  for (; I != E; --I) {
2776  if (I->getOpcode() != ARM::tMOVi8) {
2777  CanReorder = false;
2778  break;
2779  }
2780  }
2781  if (HasStmts && CanReorder) {
2782  MI = MI->removeFromParent();
2783  E = CmpInstr;
2784  CmpInstr.getParent()->insert(E, MI);
2785  }
2786  I = CmpInstr;
2787  E = MI;
2788  }
2789 
2790  // Check that CPSR isn't set between the comparison instruction and the one we
2791  // want to change. At the same time, search for SubAdd.
2793  do {
2794  const MachineInstr &Instr = *--I;
2795 
2796  // Check whether CmpInstr can be made redundant by the current instruction.
2797  if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr)) {
2798  SubAdd = &*I;
2799  break;
2800  }
2801 
2802  // Allow E (which was initially MI) to be SubAdd but do not search before E.
2803  if (I == E)
2804  break;
2805 
2806  if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
2807  Instr.readsRegister(ARM::CPSR, TRI))
2808  // This instruction modifies or uses CPSR after the one we want to
2809  // change. We can't do this transformation.
2810  return false;
2811 
2812  } while (I != B);
2813 
2814  // Return false if no candidates exist.
2815  if (!MI && !SubAdd)
2816  return false;
2817 
2818  // The single candidate is called MI.
2819  if (!MI) MI = SubAdd;
2820 
2821  // We can't use a predicated instruction - it doesn't always write the flags.
2822  if (isPredicated(*MI))
2823  return false;
2824 
2825  // Scan forward for the use of CPSR
2826  // When checking against MI: if it's a conditional code that requires
2827  // checking of the V bit or C bit, then this is not safe to do.
2828  // It is safe to remove CmpInstr if CPSR is redefined or killed.
2829  // If we are done with the basic block, we need to check whether CPSR is
2830  // live-out.
2832  OperandsToUpdate;
2833  bool isSafe = false;
2834  I = CmpInstr;
2835  E = CmpInstr.getParent()->end();
2836  while (!isSafe && ++I != E) {
2837  const MachineInstr &Instr = *I;
2838  for (unsigned IO = 0, EO = Instr.getNumOperands();
2839  !isSafe && IO != EO; ++IO) {
2840  const MachineOperand &MO = Instr.getOperand(IO);
2841  if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
2842  isSafe = true;
2843  break;
2844  }
2845  if (!MO.isReg() || MO.getReg() != ARM::CPSR)
2846  continue;
2847  if (MO.isDef()) {
2848  isSafe = true;
2849  break;
2850  }
2851  // Condition code is after the operand before CPSR except for VSELs.
2852  ARMCC::CondCodes CC;
2853  bool IsInstrVSel = true;
2854  switch (Instr.getOpcode()) {
2855  default:
2856  IsInstrVSel = false;
2857  CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
2858  break;
2859  case ARM::VSELEQD:
2860  case ARM::VSELEQS:
2861  CC = ARMCC::EQ;
2862  break;
2863  case ARM::VSELGTD:
2864  case ARM::VSELGTS:
2865  CC = ARMCC::GT;
2866  break;
2867  case ARM::VSELGED:
2868  case ARM::VSELGES:
2869  CC = ARMCC::GE;
2870  break;
2871  case ARM::VSELVSS:
2872  case ARM::VSELVSD:
2873  CC = ARMCC::VS;
2874  break;
2875  }
2876 
2877  if (SubAdd) {
2878  // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
2879  // on CMP needs to be updated to be based on SUB.
2880  // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
2881  // needs to be modified.
2882  // Push the condition code operands to OperandsToUpdate.
2883  // If it is safe to remove CmpInstr, the condition code of these
2884  // operands will be modified.
2885  unsigned Opc = SubAdd->getOpcode();
2886  bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
2887  Opc == ARM::SUBri || Opc == ARM::t2SUBri;
2888  if (!IsSub || (SrcReg2 != 0 && SubAdd->getOperand(1).getReg() == SrcReg2 &&
2889  SubAdd->getOperand(2).getReg() == SrcReg)) {
2890  // VSel doesn't support condition code update.
2891  if (IsInstrVSel)
2892  return false;
2893  // Ensure we can swap the condition.
2894  ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC));
2895  if (NewCC == ARMCC::AL)
2896  return false;
2897  OperandsToUpdate.push_back(
2898  std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
2899  }
2900  } else {
2901  // No SubAdd, so this is x = <op> y, z; cmp x, 0.
2902  switch (CC) {
2903  case ARMCC::EQ: // Z
2904  case ARMCC::NE: // Z
2905  case ARMCC::MI: // N
2906  case ARMCC::PL: // N
2907  case ARMCC::AL: // none
2908  // CPSR can be used multiple times, we should continue.
2909  break;
2910  case ARMCC::HS: // C
2911  case ARMCC::LO: // C
2912  case ARMCC::VS: // V
2913  case ARMCC::VC: // V
2914  case ARMCC::HI: // C Z
2915  case ARMCC::LS: // C Z
2916  case ARMCC::GE: // N V
2917  case ARMCC::LT: // N V
2918  case ARMCC::GT: // Z N V
2919  case ARMCC::LE: // Z N V
2920  // The instruction uses the V bit or C bit which is not safe.
2921  return false;
2922  }
2923  }
2924  }
2925  }
2926 
2927  // If CPSR is not killed nor re-defined, we should check whether it is
2928  // live-out. If it is live-out, do not optimize.
2929  if (!isSafe) {
2930  MachineBasicBlock *MBB = CmpInstr.getParent();
2932  SE = MBB->succ_end(); SI != SE; ++SI)
2933  if ((*SI)->isLiveIn(ARM::CPSR))
2934  return false;
2935  }
2936 
2937  // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
2938  // set CPSR so this is represented as an explicit output)
2939  if (!IsThumb1) {
2940  MI->getOperand(5).setReg(ARM::CPSR);
2941  MI->getOperand(5).setIsDef(true);
2942  }
2943  assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
2944  CmpInstr.eraseFromParent();
2945 
2946  // Modify the condition code of operands in OperandsToUpdate.
2947  // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
2948  // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
2949  for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
2950  OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
2951 
2952  return true;
2953 }
2954 
2956  // Do not sink MI if it might be used to optimize a redundant compare.
2957  // We heuristically only look at the instruction immediately following MI to
2958  // avoid potentially searching the entire basic block.
2959  if (isPredicated(MI))
2960  return true;
2962  ++Next;
2963  unsigned SrcReg, SrcReg2;
2964  int CmpMask, CmpValue;
2965  if (Next != MI.getParent()->end() &&
2966  analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
2967  isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI))
2968  return false;
2969  return true;
2970 }
2971 
2973  unsigned Reg,
2974  MachineRegisterInfo *MRI) const {
2975  // Fold large immediates into add, sub, or, xor.
2976  unsigned DefOpc = DefMI.getOpcode();
2977  if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
2978  return false;
2979  if (!DefMI.getOperand(1).isImm())
2980  // Could be t2MOVi32imm @xx
2981  return false;
2982 
2983  if (!MRI->hasOneNonDBGUse(Reg))
2984  return false;
2985 
2986  const MCInstrDesc &DefMCID = DefMI.getDesc();
2987  if (DefMCID.hasOptionalDef()) {
2988  unsigned NumOps = DefMCID.getNumOperands();
2989  const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
2990  if (MO.getReg() == ARM::CPSR && !MO.isDead())
2991  // If DefMI defines CPSR and it is not dead, it's obviously not safe
2992  // to delete DefMI.
2993  return false;
2994  }
2995 
2996  const MCInstrDesc &UseMCID = UseMI.getDesc();
2997  if (UseMCID.hasOptionalDef()) {
2998  unsigned NumOps = UseMCID.getNumOperands();
2999  if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
3000  // If the instruction sets the flag, do not attempt this optimization
3001  // since it may change the semantics of the code.
3002  return false;
3003  }
3004 
3005  unsigned UseOpc = UseMI.getOpcode();
3006  unsigned NewUseOpc = 0;
3007  uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();
3008  uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
3009  bool Commute = false;
3010  switch (UseOpc) {
3011  default: return false;
3012  case ARM::SUBrr:
3013  case ARM::ADDrr:
3014  case ARM::ORRrr:
3015  case ARM::EORrr:
3016  case ARM::t2SUBrr:
3017  case ARM::t2ADDrr:
3018  case ARM::t2ORRrr:
3019  case ARM::t2EORrr: {
3020  Commute = UseMI.getOperand(2).getReg() != Reg;
3021  switch (UseOpc) {
3022  default: break;
3023  case ARM::ADDrr:
3024  case ARM::SUBrr:
3025  if (UseOpc == ARM::SUBrr && Commute)
3026  return false;
3027 
3028  // ADD/SUB are special because they're essentially the same operation, so
3029  // we can handle a larger range of immediates.
3030  if (ARM_AM::isSOImmTwoPartVal(ImmVal))
3031  NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
3032  else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
3033  ImmVal = -ImmVal;
3034  NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
3035  } else
3036  return false;
3037  SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3038  SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3039  break;
3040  case ARM::ORRrr:
3041  case ARM::EORrr:
3042  if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
3043  return false;
3044  SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3045  SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3046  switch (UseOpc) {
3047  default: break;
3048  case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
3049  case ARM::EORrr: NewUseOpc = ARM::EORri; break;
3050  }
3051  break;
3052  case ARM::t2ADDrr:
3053  case ARM::t2SUBrr:
3054  if (UseOpc == ARM::t2SUBrr && Commute)
3055  return false;
3056 
3057  // ADD/SUB are special because they're essentially the same operation, so
3058  // we can handle a larger range of immediates.
3059  if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3060  NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2ADDri : ARM::t2SUBri;
3061  else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
3062  ImmVal = -ImmVal;
3063  NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2SUBri : ARM::t2ADDri;
3064  } else
3065  return false;
3066  SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3067  SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3068  break;
3069  case ARM::t2ORRrr:
3070  case ARM::t2EORrr:
3071  if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3072  return false;
3073  SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3074  SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3075  switch (UseOpc) {
3076  default: break;
3077  case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
3078  case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
3079  }
3080  break;
3081  }
3082  }
3083  }
3084 
3085  unsigned OpIdx = Commute ? 2 : 1;
3086  unsigned Reg1 = UseMI.getOperand(OpIdx).getReg();
3087  bool isKill = UseMI.getOperand(OpIdx).isKill();
3088  unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
3089  BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
3090  NewReg)
3091  .addReg(Reg1, getKillRegState(isKill))
3092  .addImm(SOImmValV1)
3093  .add(predOps(ARMCC::AL))
3094  .add(condCodeOp());
3095  UseMI.setDesc(get(NewUseOpc));
3096  UseMI.getOperand(1).setReg(NewReg);
3097  UseMI.getOperand(1).setIsKill();
3098  UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
3099  DefMI.eraseFromParent();
3100  return true;
3101 }
3102 
3103 static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
3104  const MachineInstr &MI) {
3105  switch (MI.getOpcode()) {
3106  default: {
3107  const MCInstrDesc &Desc = MI.getDesc();
3108  int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
3109  assert(UOps >= 0 && "bad # UOps");
3110  return UOps;
3111  }
3112 
3113  case ARM::LDRrs:
3114  case ARM::LDRBrs:
3115  case ARM::STRrs:
3116  case ARM::STRBrs: {
3117  unsigned ShOpVal = MI.getOperand(3).getImm();
3118  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3119  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3120  if (!isSub &&
3121  (ShImm == 0 ||
3122  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3123  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3124  return 1;
3125  return 2;
3126  }
3127 
3128  case ARM::LDRH:
3129  case ARM::STRH: {
3130  if (!MI.getOperand(2).getReg())
3131  return 1;
3132 
3133  unsigned ShOpVal = MI.getOperand(3).getImm();
3134  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3135  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3136  if (!isSub &&
3137  (ShImm == 0 ||
3138  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3139  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3140  return 1;
3141  return 2;
3142  }
3143 
3144  case ARM::LDRSB:
3145  case ARM::LDRSH:
3146  return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;
3147 
3148  case ARM::LDRSB_POST:
3149  case ARM::LDRSH_POST: {
3150  unsigned Rt = MI.getOperand(0).getReg();
3151  unsigned Rm = MI.getOperand(3).getReg();
3152  return (Rt == Rm) ? 4 : 3;
3153  }
3154 
3155  case ARM::LDR_PRE_REG:
3156  case ARM::LDRB_PRE_REG: {
3157  unsigned Rt = MI.getOperand(0).getReg();
3158  unsigned Rm = MI.getOperand(3).getReg();
3159  if (Rt == Rm)
3160  return 3;
3161  unsigned ShOpVal = MI.getOperand(4).getImm();
3162  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3163  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3164  if (!isSub &&
3165  (ShImm == 0 ||
3166  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3167  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3168  return 2;
3169  return 3;
3170  }
3171 
3172  case ARM::STR_PRE_REG:
3173  case ARM::STRB_PRE_REG: {
3174  unsigned ShOpVal = MI.getOperand(4).getImm();
3175  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3176  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3177  if (!isSub &&
3178  (ShImm == 0 ||
3179  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3180  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3181  return 2;
3182  return 3;
3183  }
3184 
3185  case ARM::LDRH_PRE:
3186  case ARM::STRH_PRE: {
3187  unsigned Rt = MI.getOperand(0).getReg();
3188  unsigned Rm = MI.getOperand(3).getReg();
3189  if (!Rm)
3190  return 2;
3191  if (Rt == Rm)
3192  return 3;
3193  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;
3194  }
3195 
3196  case ARM::LDR_POST_REG:
3197  case ARM::LDRB_POST_REG:
3198  case ARM::LDRH_POST: {
3199  unsigned Rt = MI.getOperand(0).getReg();
3200  unsigned Rm = MI.getOperand(3).getReg();
3201  return (Rt == Rm) ? 3 : 2;
3202  }
3203 
3204  case ARM::LDR_PRE_IMM:
3205  case ARM::LDRB_PRE_IMM:
3206  case ARM::LDR_POST_IMM:
3207  case ARM::LDRB_POST_IMM:
3208  case ARM::STRB_POST_IMM:
3209  case ARM::STRB_POST_REG:
3210  case ARM::STRB_PRE_IMM:
3211  case ARM::STRH_POST:
3212  case ARM::STR_POST_IMM:
3213  case ARM::STR_POST_REG:
3214  case ARM::STR_PRE_IMM:
3215  return 2;
3216 
3217  case ARM::LDRSB_PRE:
3218  case ARM::LDRSH_PRE: {
3219  unsigned Rm = MI.getOperand(3).getReg();
3220  if (Rm == 0)
3221  return 3;
3222  unsigned Rt = MI.getOperand(0).getReg();
3223  if (Rt == Rm)
3224  return 4;
3225  unsigned ShOpVal = MI.getOperand(4).getImm();
3226  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3227  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3228  if (!isSub &&
3229  (ShImm == 0 ||
3230  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3231  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3232  return 3;
3233  return 4;
3234  }
3235 
3236  case ARM::LDRD: {
3237  unsigned Rt = MI.getOperand(0).getReg();
3238  unsigned Rn = MI.getOperand(2).getReg();
3239  unsigned Rm = MI.getOperand(3).getReg();
3240  if (Rm)
3241  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3242  : 3;
3243  return (Rt == Rn) ? 3 : 2;
3244  }
3245 
3246  case ARM::STRD: {
3247  unsigned Rm = MI.getOperand(3).getReg();
3248  if (Rm)
3249  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3250  : 3;
3251  return 2;
3252  }
3253 
3254  case ARM::LDRD_POST:
3255  case ARM::t2LDRD_POST:
3256  return 3;
3257 
3258  case ARM::STRD_POST:
3259  case ARM::t2STRD_POST:
3260  return 4;
3261 
3262  case ARM::LDRD_PRE: {
3263  unsigned Rt = MI.getOperand(0).getReg();
3264  unsigned Rn = MI.getOperand(3).getReg();
3265  unsigned Rm = MI.getOperand(4).getReg();
3266  if (Rm)
3267  return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3268  : 4;
3269  return (Rt == Rn) ? 4 : 3;
3270  }
3271 
3272  case ARM::t2LDRD_PRE: {
3273  unsigned Rt = MI.getOperand(0).getReg();
3274  unsigned Rn = MI.getOperand(3).getReg();
3275  return (Rt == Rn) ? 4 : 3;
3276  }
3277 
3278  case ARM::STRD_PRE: {
3279  unsigned Rm = MI.getOperand(4).getReg();
3280  if (Rm)
3281  return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3282  : 4;
3283  return 3;
3284  }
3285 
3286  case ARM::t2STRD_PRE:
3287  return 3;
3288 
3289  case ARM::t2LDR_POST:
3290  case ARM::t2LDRB_POST:
3291  case ARM::t2LDRB_PRE:
3292  case ARM::t2LDRSBi12:
3293  case ARM::t2LDRSBi8:
3294  case ARM::t2LDRSBpci:
3295  case ARM::t2LDRSBs:
3296  case ARM::t2LDRH_POST:
3297  case ARM::t2LDRH_PRE:
3298  case ARM::t2LDRSBT:
3299  case ARM::t2LDRSB_POST:
3300  case ARM::t2LDRSB_PRE:
3301  case ARM::t2LDRSH_POST:
3302  case ARM::t2LDRSH_PRE:
3303  case ARM::t2LDRSHi12:
3304  case ARM::t2LDRSHi8:
3305  case ARM::t2LDRSHpci:
3306  case ARM::t2LDRSHs:
3307  return 2;
3308 
3309  case ARM::t2LDRDi8: {
3310  unsigned Rt = MI.getOperand(0).getReg();
3311  unsigned Rn = MI.getOperand(2).getReg();
3312  return (Rt == Rn) ? 3 : 2;
3313  }
3314 
3315  case ARM::t2STRB_POST:
3316  case ARM::t2STRB_PRE:
3317  case ARM::t2STRBs:
3318  case ARM::t2STRDi8:
3319  case ARM::t2STRH_POST:
3320  case ARM::t2STRH_PRE:
3321  case ARM::t2STRHs:
3322  case ARM::t2STR_POST:
3323  case ARM::t2STR_PRE:
3324  case ARM::t2STRs:
3325  return 2;
3326  }
3327 }
3328 
3329 // Return the number of 32-bit words loaded by LDM or stored by STM. If this
3330 // can't be easily determined return 0 (missing MachineMemOperand).
3331 //
3332 // FIXME: The current MachineInstr design does not support relying on machine
3333 // mem operands to determine the width of a memory access. Instead, we expect
3334 // the target to provide this information based on the instruction opcode and
3335 // operands. However, using MachineMemOperand is the best solution now for
3336 // two reasons:
3337 //
3338 // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
3339 // operands. This is much more dangerous than using the MachineMemOperand
3340 // sizes because CodeGen passes can insert/remove optional machine operands. In
3341 // fact, it's totally incorrect for preRA passes and appears to be wrong for
3342 // postRA passes as well.
3343 //
3344 // 2) getNumLDMAddresses is only used by the scheduling machine model and any
3345 // machine model that calls this should handle the unknown (zero size) case.
3346 //
3347 // Long term, we should require a target hook that verifies MachineMemOperand
3348 // sizes during MC lowering. That target hook should be local to MC lowering
3349 // because we can't ensure that it is aware of other MI forms. Doing this will
3350 // ensure that MachineMemOperands are correctly propagated through all passes.
3352  unsigned Size = 0;
3354  E = MI.memoperands_end();
3355  I != E; ++I) {
3356  Size += (*I)->getSize();
3357  }
3358  return Size / 4;
3359 }
3360 
3361 static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,
3362  unsigned NumRegs) {
3363  unsigned UOps = 1 + NumRegs; // 1 for address computation.
3364  switch (Opc) {
3365  default:
3366  break;
3367  case ARM::VLDMDIA_UPD:
3368  case ARM::VLDMDDB_UPD:
3369  case ARM::VLDMSIA_UPD:
3370  case ARM::VLDMSDB_UPD:
3371  case ARM::VSTMDIA_UPD:
3372  case ARM::VSTMDDB_UPD:
3373  case ARM::VSTMSIA_UPD:
3374  case ARM::VSTMSDB_UPD:
3375  case ARM::LDMIA_UPD:
3376  case ARM::LDMDA_UPD:
3377  case ARM::LDMDB_UPD:
3378  case ARM::LDMIB_UPD:
3379  case ARM::STMIA_UPD:
3380  case ARM::STMDA_UPD:
3381  case ARM::STMDB_UPD:
3382  case ARM::STMIB_UPD:
3383  case ARM::tLDMIA_UPD:
3384  case ARM::tSTMIA_UPD:
3385  case ARM::t2LDMIA_UPD:
3386  case ARM::t2LDMDB_UPD:
3387  case ARM::t2STMIA_UPD:
3388  case ARM::t2STMDB_UPD:
3389  ++UOps; // One for base register writeback.
3390  break;
3391  case ARM::LDMIA_RET:
3392  case ARM::tPOP_RET:
3393  case ARM::t2LDMIA_RET:
3394  UOps += 2; // One for base reg wb, one for write to pc.
3395  break;
3396  }
3397  return UOps;
3398 }
3399 
3401  const MachineInstr &MI) const {
3402  if (!ItinData || ItinData->isEmpty())
3403  return 1;
3404 
3405  const MCInstrDesc &Desc = MI.getDesc();
3406  unsigned Class = Desc.getSchedClass();
3407  int ItinUOps = ItinData->getNumMicroOps(Class);
3408  if (ItinUOps >= 0) {
3409  if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
3410  return getNumMicroOpsSwiftLdSt(ItinData, MI);
3411 
3412  return ItinUOps;
3413  }
3414 
3415  unsigned Opc = MI.getOpcode();
3416  switch (Opc) {
3417  default:
3418  llvm_unreachable("Unexpected multi-uops instruction!");
3419  case ARM::VLDMQIA:
3420  case ARM::VSTMQIA:
3421  return 2;
3422 
3423  // The number of uOps for load / store multiple are determined by the number
3424  // registers.
3425  //
3426  // On Cortex-A8, each pair of register loads / stores can be scheduled on the
3427  // same cycle. The scheduling for the first load / store must be done
3428  // separately by assuming the address is not 64-bit aligned.
3429  //
3430  // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
3431  // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
3432  // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
3433  case ARM::VLDMDIA:
3434  case ARM::VLDMDIA_UPD:
3435  case ARM::VLDMDDB_UPD:
3436  case ARM::VLDMSIA:
3437  case ARM::VLDMSIA_UPD:
3438  case ARM::VLDMSDB_UPD:
3439  case ARM::VSTMDIA:
3440  case ARM::VSTMDIA_UPD:
3441  case ARM::VSTMDDB_UPD:
3442  case ARM::VSTMSIA:
3443  case ARM::VSTMSIA_UPD:
3444  case ARM::VSTMSDB_UPD: {
3445  unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
3446  return (NumRegs / 2) + (NumRegs % 2) + 1;
3447  }
3448 
3449  case ARM::LDMIA_RET:
3450  case ARM::LDMIA:
3451  case ARM::LDMDA:
3452  case ARM::LDMDB:
3453  case ARM::LDMIB:
3454  case ARM::LDMIA_UPD:
3455  case ARM::LDMDA_UPD:
3456  case ARM::LDMDB_UPD:
3457  case ARM::LDMIB_UPD:
3458  case ARM::STMIA:
3459  case ARM::STMDA:
3460  case ARM::STMDB:
3461  case ARM::STMIB:
3462  case ARM::STMIA_UPD:
3463  case ARM::STMDA_UPD:
3464  case ARM::STMDB_UPD:
3465  case ARM::STMIB_UPD:
3466  case ARM::tLDMIA:
3467  case ARM::tLDMIA_UPD:
3468  case ARM::tSTMIA_UPD:
3469  case ARM::tPOP_RET:
3470  case ARM::tPOP:
3471  case ARM::tPUSH:
3472  case ARM::t2LDMIA_RET:
3473  case ARM::t2LDMIA:
3474  case ARM::t2LDMDB:
3475  case ARM::t2LDMIA_UPD:
3476  case ARM::t2LDMDB_UPD:
3477  case ARM::t2STMIA:
3478  case ARM::t2STMDB:
3479  case ARM::t2STMIA_UPD:
3480  case ARM::t2STMDB_UPD: {
3481  unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
3482  switch (Subtarget.getLdStMultipleTiming()) {
3484  return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs);
3486  // Assume the worst.
3487  return NumRegs;
3489  if (NumRegs < 4)
3490  return 2;
3491  // 4 registers would be issued: 2, 2.
3492  // 5 registers would be issued: 2, 2, 1.
3493  unsigned UOps = (NumRegs / 2);
3494  if (NumRegs % 2)
3495  ++UOps;
3496  return UOps;
3497  }
3499  unsigned UOps = (NumRegs / 2);
3500  // If there are odd number of registers or if it's not 64-bit aligned,
3501  // then it takes an extra AGU (Address Generation Unit) cycle.
3502  if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
3503  (*MI.memoperands_begin())->getAlignment() < 8)
3504  ++UOps;
3505  return UOps;
3506  }
3507  }
3508  }
3509  }
3510  llvm_unreachable("Didn't find the number of microops");
3511 }
3512 
3513 int
3514 ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
3515  const MCInstrDesc &DefMCID,
3516  unsigned DefClass,
3517  unsigned DefIdx, unsigned DefAlign) const {
3518  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3519  if (RegNo <= 0)
3520  // Def is the address writeback.
3521  return ItinData->getOperandCycle(DefClass, DefIdx);
3522 
3523  int DefCycle;
3524  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3525  // (regno / 2) + (regno % 2) + 1
3526  DefCycle = RegNo / 2 + 1;
3527  if (RegNo % 2)
3528  ++DefCycle;
3529  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3530  DefCycle = RegNo;
3531  bool isSLoad = false;
3532 
3533  switch (DefMCID.getOpcode()) {
3534  default: break;
3535  case ARM::VLDMSIA:
3536  case ARM::VLDMSIA_UPD:
3537  case ARM::VLDMSDB_UPD:
3538  isSLoad = true;
3539  break;
3540  }
3541 
3542  // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3543  // then it takes an extra cycle.
3544  if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
3545  ++DefCycle;
3546  } else {
3547  // Assume the worst.
3548  DefCycle = RegNo + 2;
3549  }
3550 
3551  return DefCycle;
3552 }
3553 
3555  unsigned BaseReg = MI.getOperand(0).getReg();
3556  for (unsigned i = 1, sz = MI.getNumOperands(); i < sz; ++i) {
3557  const auto &Op = MI.getOperand(i);
3558  if (Op.isReg() && Op.getReg() == BaseReg)
3559  return true;
3560  }
3561  return false;
3562 }
3563 unsigned
3565  // ins GPR:$Rn, $p (2xOp), reglist:$regs, variable_ops
3566  // (outs GPR:$wb), (ins GPR:$Rn, $p (2xOp), reglist:$regs, variable_ops)
3567  return MI.getNumOperands() + 1 - MI.getDesc().getNumOperands();
3568 }
3569 
3570 int
3571 ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
3572  const MCInstrDesc &DefMCID,
3573  unsigned DefClass,
3574  unsigned DefIdx, unsigned DefAlign) const {
3575  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3576  if (RegNo <= 0)
3577  // Def is the address writeback.
3578  return ItinData->getOperandCycle(DefClass, DefIdx);
3579 
3580  int DefCycle;
3581  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3582  // 4 registers would be issued: 1, 2, 1.
3583  // 5 registers would be issued: 1, 2, 2.
3584  DefCycle = RegNo / 2;
3585  if (DefCycle < 1)
3586  DefCycle = 1;
3587  // Result latency is issue cycle + 2: E2.
3588  DefCycle += 2;
3589  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3590  DefCycle = (RegNo / 2);
3591  // If there are odd number of registers or if it's not 64-bit aligned,
3592  // then it takes an extra AGU (Address Generation Unit) cycle.
3593  if ((RegNo % 2) || DefAlign < 8)
3594  ++DefCycle;
3595  // Result latency is AGU cycles + 2.
3596  DefCycle += 2;
3597  } else {
3598  // Assume the worst.
3599  DefCycle = RegNo + 2;
3600  }
3601 
3602  return DefCycle;
3603 }
3604 
3605 int
3606 ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
3607  const MCInstrDesc &UseMCID,
3608  unsigned UseClass,
3609  unsigned UseIdx, unsigned UseAlign) const {
3610  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3611  if (RegNo <= 0)
3612  return ItinData->getOperandCycle(UseClass, UseIdx);
3613 
3614  int UseCycle;
3615  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3616  // (regno / 2) + (regno % 2) + 1
3617  UseCycle = RegNo / 2 + 1;
3618  if (RegNo % 2)
3619  ++UseCycle;
3620  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3621  UseCycle = RegNo;
3622  bool isSStore = false;
3623 
3624  switch (UseMCID.getOpcode()) {
3625  default: break;
3626  case ARM::VSTMSIA:
3627  case ARM::VSTMSIA_UPD:
3628  case ARM::VSTMSDB_UPD:
3629  isSStore = true;
3630  break;
3631  }
3632 
3633  // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3634  // then it takes an extra cycle.
3635  if ((isSStore && (RegNo % 2)) || UseAlign < 8)
3636  ++UseCycle;
3637  } else {
3638  // Assume the worst.
3639  UseCycle = RegNo + 2;
3640  }
3641 
3642  return UseCycle;
3643 }
3644 
3645 int
3646 ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
3647  const MCInstrDesc &UseMCID,
3648  unsigned UseClass,
3649  unsigned UseIdx, unsigned UseAlign) const {
3650  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3651  if (RegNo <= 0)
3652  return ItinData->getOperandCycle(UseClass, UseIdx);
3653 
3654  int UseCycle;
3655  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3656  UseCycle = RegNo / 2;
3657  if (UseCycle < 2)
3658  UseCycle = 2;
3659  // Read in E3.
3660  UseCycle += 2;
3661  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3662  UseCycle = (RegNo / 2);
3663  // If there are odd number of registers or if it's not 64-bit aligned,
3664  // then it takes an extra AGU (Address Generation Unit) cycle.
3665  if ((RegNo % 2) || UseAlign < 8)
3666  ++UseCycle;
3667  } else {
3668  // Assume the worst.
3669  UseCycle = 1;
3670  }
3671  return UseCycle;
3672 }
3673 
3674 int
3676  const MCInstrDesc &DefMCID,
3677  unsigned DefIdx, unsigned DefAlign,
3678  const MCInstrDesc &UseMCID,
3679  unsigned UseIdx, unsigned UseAlign) const {
3680  unsigned DefClass = DefMCID.getSchedClass();
3681  unsigned UseClass = UseMCID.getSchedClass();
3682 
3683  if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
3684  return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
3685 
3686  // This may be a def / use of a variable_ops instruction, the operand
3687  // latency might be determinable dynamically. Let the target try to
3688  // figure it out.
3689  int DefCycle = -1;
3690  bool LdmBypass = false;
3691  switch (DefMCID.getOpcode()) {
3692  default:
3693  DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
3694  break;
3695 
3696  case ARM::VLDMDIA:
3697  case ARM::VLDMDIA_UPD:
3698  case ARM::VLDMDDB_UPD:
3699  case ARM::VLDMSIA:
3700  case ARM::VLDMSIA_UPD:
3701  case ARM::VLDMSDB_UPD:
3702  DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3703  break;
3704 
3705  case ARM::LDMIA_RET:
3706  case ARM::LDMIA:
3707  case ARM::LDMDA:
3708  case ARM::LDMDB:
3709  case ARM::LDMIB:
3710  case ARM::LDMIA_UPD:
3711  case ARM::LDMDA_UPD:
3712  case ARM::LDMDB_UPD:
3713  case ARM::LDMIB_UPD:
3714  case ARM::tLDMIA:
3715  case ARM::tLDMIA_UPD:
3716  case ARM::tPUSH:
3717  case ARM::t2LDMIA_RET:
3718  case ARM::t2LDMIA:
3719  case ARM::t2LDMDB:
3720  case ARM::t2LDMIA_UPD:
3721  case ARM::t2LDMDB_UPD:
3722  LdmBypass = true;
3723  DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3724  break;
3725  }
3726 
3727  if (DefCycle == -1)
3728  // We can't seem to determine the result latency of the def, assume it's 2.
3729  DefCycle = 2;
3730 
3731  int UseCycle = -1;
3732  switch (UseMCID.getOpcode()) {
3733  default:
3734  UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
3735  break;
3736 
3737  case ARM::VSTMDIA:
3738  case ARM::VSTMDIA_UPD:
3739  case ARM::VSTMDDB_UPD:
3740  case ARM::VSTMSIA:
3741  case ARM::VSTMSIA_UPD:
3742  case ARM::VSTMSDB_UPD:
3743  UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3744  break;
3745 
3746  case ARM::STMIA:
3747  case ARM::STMDA:
3748  case ARM::STMDB:
3749  case ARM::STMIB:
3750  case ARM::STMIA_UPD:
3751  case ARM::STMDA_UPD:
3752  case ARM::STMDB_UPD:
3753  case ARM::STMIB_UPD:
3754  case ARM::tSTMIA_UPD:
3755  case ARM::tPOP_RET:
3756  case ARM::tPOP:
3757  case ARM::t2STMIA:
3758  case ARM::t2STMDB:
3759  case ARM::t2STMIA_UPD:
3760  case ARM::t2STMDB_UPD:
3761  UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3762  break;
3763  }
3764 
3765  if (UseCycle == -1)
3766  // Assume it's read in the first stage.
3767  UseCycle = 1;
3768 
3769  UseCycle = DefCycle - UseCycle + 1;
3770  if (UseCycle > 0) {
3771  if (LdmBypass) {
3772  // It's a variable_ops instruction so we can't use DefIdx here. Just use
3773  // first def operand.
3774  if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
3775  UseClass, UseIdx))
3776  --UseCycle;
3777  } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
3778  UseClass, UseIdx)) {
3779  --UseCycle;
3780  }
3781  }
3782 
3783  return UseCycle;
3784 }
3785 
3787  const MachineInstr *MI, unsigned Reg,
3788  unsigned &DefIdx, unsigned &Dist) {
3789  Dist = 0;
3790 
3793  assert(II->isInsideBundle() && "Empty bundle?");
3794 
3795  int Idx = -1;
3796  while (II->isInsideBundle()) {
3797  Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
3798  if (Idx != -1)
3799  break;
3800  --II;
3801  ++Dist;
3802  }
3803 
3804  assert(Idx != -1 && "Cannot find bundled definition!");
3805  DefIdx = Idx;
3806  return &*II;
3807 }
3808 
3810  const MachineInstr &MI, unsigned Reg,
3811  unsigned &UseIdx, unsigned &Dist) {
3812  Dist = 0;
3813 
3815  assert(II->isInsideBundle() && "Empty bundle?");
3817 
3818  // FIXME: This doesn't properly handle multiple uses.
3819  int Idx = -1;
3820  while (II != E && II->isInsideBundle()) {
3821  Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
3822  if (Idx != -1)
3823  break;
3824  if (II->getOpcode() != ARM::t2IT)
3825  ++Dist;
3826  ++II;
3827  }
3828 
3829  if (Idx == -1) {
3830  Dist = 0;
3831  return nullptr;
3832  }
3833 
3834  UseIdx = Idx;
3835  return &*II;
3836 }
3837 
3838 /// Return the number of cycles to add to (or subtract from) the static
3839 /// itinerary based on the def opcode and alignment. The caller will ensure that
3840 /// adjusted latency is at least one cycle.
3841 static int adjustDefLatency(const ARMSubtarget &Subtarget,
3842  const MachineInstr &DefMI,
3843  const MCInstrDesc &DefMCID, unsigned DefAlign) {
3844  int Adjust = 0;
3845  if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
3846  // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
3847  // variants are one cycle cheaper.
3848  switch (DefMCID.getOpcode()) {
3849  default: break;
3850  case ARM::LDRrs:
3851  case ARM::LDRBrs: {
3852  unsigned ShOpVal = DefMI.getOperand(3).getImm();
3853  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3854  if (ShImm == 0 ||
3855  (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
3856  --Adjust;
3857  break;
3858  }
3859  case ARM::t2LDRs:
3860  case ARM::t2LDRBs:
3861  case ARM::t2LDRHs:
3862  case ARM::t2LDRSHs: {
3863  // Thumb2 mode: lsl only.
3864  unsigned ShAmt = DefMI.getOperand(3).getImm();
3865  if (ShAmt == 0 || ShAmt == 2)
3866  --Adjust;
3867  break;
3868  }
3869  }
3870  } else if (Subtarget.isSwift()) {
3871  // FIXME: Properly handle all of the latency adjustments for address
3872  // writeback.
3873  switch (DefMCID.getOpcode()) {
3874  default: break;
3875  case ARM::LDRrs:
3876  case ARM::LDRBrs: {
3877  unsigned ShOpVal = DefMI.getOperand(3).getImm();
3878  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3879  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3880  if (!isSub &&
3881  (ShImm == 0 ||
3882  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3883  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3884  Adjust -= 2;
3885  else if (!isSub &&
3886  ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
3887  --Adjust;
3888  break;
3889  }
3890  case ARM::t2LDRs:
3891  case ARM::t2LDRBs:
3892  case ARM::t2LDRHs:
3893  case ARM::t2LDRSHs: {
3894  // Thumb2 mode: lsl only.
3895  unsigned ShAmt = DefMI.getOperand(3).getImm();
3896  if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
3897  Adjust -= 2;
3898  break;
3899  }
3900  }
3901  }
3902 
3903  if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
3904  switch (DefMCID.getOpcode()) {
3905  default: break;
3906  case ARM::VLD1q8:
3907  case ARM::VLD1q16:
3908  case ARM::VLD1q32:
3909  case ARM::VLD1q64:
3910  case ARM::VLD1q8wb_fixed:
3911  case ARM::VLD1q16wb_fixed:
3912  case ARM::VLD1q32wb_fixed:
3913  case ARM::VLD1q64wb_fixed:
3914  case ARM::VLD1q8wb_register:
3915  case ARM::VLD1q16wb_register:
3916  case ARM::VLD1q32wb_register:
3917  case ARM::VLD1q64wb_register:
3918  case ARM::VLD2d8:
3919  case ARM::VLD2d16:
3920  case ARM::VLD2d32:
3921  case ARM::VLD2q8:
3922  case ARM::VLD2q16:
3923  case ARM::VLD2q32:
3924  case ARM::VLD2d8wb_fixed:
3925  case ARM::VLD2d16wb_fixed:
3926  case ARM::VLD2d32wb_fixed:
3927  case ARM::VLD2q8wb_fixed:
3928  case ARM::VLD2q16wb_fixed:
3929  case ARM::VLD2q32wb_fixed:
3930  case ARM::VLD2d8wb_register:
3931  case ARM::VLD2d16wb_register:
3932  case ARM::VLD2d32wb_register:
3933  case ARM::VLD2q8wb_register:
3934  case ARM::VLD2q16wb_register:
3935  case ARM::VLD2q32wb_register:
3936  case ARM::VLD3d8:
3937  case ARM::VLD3d16:
3938  case ARM::VLD3d32:
3939  case ARM::VLD1d64T:
3940  case ARM::VLD3d8_UPD:
3941  case ARM::VLD3d16_UPD:
3942  case ARM::VLD3d32_UPD:
3943  case ARM::VLD1d64Twb_fixed:
3944  case ARM::VLD1d64Twb_register:
3945  case ARM::VLD3q8_UPD:
3946  case ARM::VLD3q16_UPD:
3947  case ARM::VLD3q32_UPD:
3948  case ARM::VLD4d8:
3949  case ARM::VLD4d16:
3950  case ARM::VLD4d32:
3951  case ARM::VLD1d64Q:
3952  case ARM::VLD4d8_UPD:
3953  case ARM::VLD4d16_UPD:
3954  case ARM::VLD4d32_UPD:
3955  case ARM::VLD1d64Qwb_fixed:
3956  case ARM::VLD1d64Qwb_register:
3957  case ARM::VLD4q8_UPD:
3958  case ARM::VLD4q16_UPD:
3959  case ARM::VLD4q32_UPD:
3960  case ARM::VLD1DUPq8:
3961  case ARM::VLD1DUPq16:
3962  case ARM::VLD1DUPq32:
3963  case ARM::VLD1DUPq8wb_fixed:
3964  case ARM::VLD1DUPq16wb_fixed:
3965  case ARM::VLD1DUPq32wb_fixed:
3966  case ARM::VLD1DUPq8wb_register:
3967  case ARM::VLD1DUPq16wb_register:
3968  case ARM::VLD1DUPq32wb_register:
3969  case ARM::VLD2DUPd8:
3970  case ARM::VLD2DUPd16:
3971  case ARM::VLD2DUPd32:
3972  case ARM::VLD2DUPd8wb_fixed:
3973  case ARM::VLD2DUPd16wb_fixed:
3974  case ARM::VLD2DUPd32wb_fixed:
3975  case ARM::VLD2DUPd8wb_register:
3976  case ARM::VLD2DUPd16wb_register:
3977  case ARM::VLD2DUPd32wb_register:
3978  case ARM::VLD4DUPd8:
3979  case ARM::VLD4DUPd16:
3980  case ARM::VLD4DUPd32:
3981  case ARM::VLD4DUPd8_UPD:
3982  case ARM::VLD4DUPd16_UPD:
3983  case ARM::VLD4DUPd32_UPD:
3984  case ARM::VLD1LNd8:
3985  case ARM::VLD1LNd16:
3986  case ARM::VLD1LNd32:
3987  case ARM::VLD1LNd8_UPD:
3988  case ARM::VLD1LNd16_UPD:
3989  case ARM::VLD1LNd32_UPD:
3990  case ARM::VLD2LNd8:
3991  case ARM::VLD2LNd16:
3992  case ARM::VLD2LNd32:
3993  case ARM::VLD2LNq16:
3994  case ARM::VLD2LNq32:
3995  case ARM::VLD2LNd8_UPD:
3996  case ARM::VLD2LNd16_UPD:
3997  case ARM::VLD2LNd32_UPD:
3998  case ARM::VLD2LNq16_UPD:
3999  case ARM::VLD2LNq32_UPD:
4000  case ARM::VLD4LNd8:
4001  case ARM::VLD4LNd16:
4002  case ARM::VLD4LNd32:
4003  case ARM::VLD4LNq16:
4004  case ARM::VLD4LNq32:
4005  case ARM::VLD4LNd8_UPD:
4006  case ARM::VLD4LNd16_UPD:
4007  case ARM::VLD4LNd32_UPD:
4008  case ARM::VLD4LNq16_UPD:
4009  case ARM::VLD4LNq32_UPD:
4010  // If the address is not 64-bit aligned, the latencies of these
4011  // instructions increases by one.
4012  ++Adjust;
4013  break;
4014  }
4015  }
4016  return Adjust;
4017 }
4018 
4020  const MachineInstr &DefMI,
4021  unsigned DefIdx,
4022  const MachineInstr &UseMI,
4023  unsigned UseIdx) const {
4024  // No operand latency. The caller may fall back to getInstrLatency.
4025  if (!ItinData || ItinData->isEmpty())
4026  return -1;
4027 
4028  const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
4029  unsigned Reg = DefMO.getReg();
4030 
4031  const MachineInstr *ResolvedDefMI = &DefMI;
4032  unsigned DefAdj = 0;
4033  if (DefMI.isBundle())
4034  ResolvedDefMI =
4035  getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);
4036  if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
4037  ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
4038  return 1;
4039  }
4040 
4041  const MachineInstr *ResolvedUseMI = &UseMI;
4042  unsigned UseAdj = 0;
4043  if (UseMI.isBundle()) {
4044  ResolvedUseMI =
4045  getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
4046  if (!ResolvedUseMI)
4047  return -1;
4048  }
4049 
4050  return getOperandLatencyImpl(
4051  ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
4052  Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
4053 }
4054 
4055 int ARMBaseInstrInfo::getOperandLatencyImpl(
4056  const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4057  unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
4058  const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
4059  unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
4060  if (Reg == ARM::CPSR) {
4061  if (DefMI.getOpcode() == ARM::FMSTAT) {
4062  // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
4063  return Subtarget.isLikeA9() ? 1 : 20;
4064  }
4065 
4066  // CPSR set and branch can be paired in the same cycle.
4067  if (UseMI.isBranch())
4068  return 0;
4069 
4070  // Otherwise it takes the instruction latency (generally one).
4071  unsigned Latency = getInstrLatency(ItinData, DefMI);
4072 
4073  // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
4074  // its uses. Instructions which are otherwise scheduled between them may
4075  // incur a code size penalty (not able to use the CPSR setting 16-bit
4076  // instructions).
4077  if (Latency > 0 && Subtarget.isThumb2()) {
4078  const MachineFunction *MF = DefMI.getParent()->getParent();
4079  // FIXME: Use Function::optForSize().
4080  if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize))
4081  --Latency;
4082  }
4083  return Latency;
4084  }
4085 
4086  if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
4087  return -1;
4088 
4089  unsigned DefAlign = DefMI.hasOneMemOperand()
4090  ? (*DefMI.memoperands_begin())->getAlignment()
4091  : 0;
4092  unsigned UseAlign = UseMI.hasOneMemOperand()
4093  ? (*UseMI.memoperands_begin())->getAlignment()
4094  : 0;
4095 
4096  // Get the itinerary's latency if possible, and handle variable_ops.
4097  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, UseMCID,
4098  UseIdx, UseAlign);
4099  // Unable to find operand latency. The caller may resort to getInstrLatency.
4100  if (Latency < 0)
4101  return Latency;
4102 
4103  // Adjust for IT block position.
4104  int Adj = DefAdj + UseAdj;
4105 
4106  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4107  Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
4108  if (Adj >= 0 || (int)Latency > -Adj) {
4109  return Latency + Adj;
4110  }
4111  // Return the itinerary latency, which may be zero but not less than zero.
4112  return Latency;
4113 }
4114 
4115 int
4117  SDNode *DefNode, unsigned DefIdx,
4118  SDNode *UseNode, unsigned UseIdx) const {
4119  if (!DefNode->isMachineOpcode())
4120  return 1;
4121 
4122  const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
4123 
4124  if (isZeroCost(DefMCID.Opcode))
4125  return 0;
4126 
4127  if (!ItinData || ItinData->isEmpty())
4128  return DefMCID.mayLoad() ? 3 : 1;
4129 
4130  if (!UseNode->isMachineOpcode()) {
4131  int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
4132  int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
4133  int Threshold = 1 + Adj;
4134  return Latency <= Threshold ? 1 : Latency - Adj;
4135  }
4136 
4137  const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
4138  const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
4139  unsigned DefAlign = !DefMN->memoperands_empty()
4140  ? (*DefMN->memoperands_begin())->getAlignment() : 0;
4141  const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
4142  unsigned UseAlign = !UseMN->memoperands_empty()
4143  ? (*UseMN->memoperands_begin())->getAlignment() : 0;
4144  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
4145  UseMCID, UseIdx, UseAlign);
4146 
4147  if (Latency > 1 &&
4148  (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
4149  Subtarget.isCortexA7())) {
4150  // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4151  // variants are one cycle cheaper.
4152  switch (DefMCID.getOpcode()) {
4153  default: break;
4154  case ARM::LDRrs:
4155  case ARM::LDRBrs: {
4156  unsigned ShOpVal =
4157  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4158  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4159  if (ShImm == 0 ||
4160  (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4161  --Latency;
4162  break;
4163  }
4164  case ARM::t2LDRs:
4165  case ARM::t2LDRBs:
4166  case ARM::t2LDRHs:
4167  case ARM::t2LDRSHs: {
4168  // Thumb2 mode: lsl only.
4169  unsigned ShAmt =
4170  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4171  if (ShAmt == 0 || ShAmt == 2)
4172  --Latency;
4173  break;
4174  }
4175  }
4176  } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) {
4177  // FIXME: Properly handle all of the latency adjustments for address
4178  // writeback.
4179  switch (DefMCID.getOpcode()) {
4180  default: break;
4181  case ARM::LDRrs:
4182  case ARM::LDRBrs: {
4183  unsigned ShOpVal =
4184  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4185  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4186  if (ShImm == 0 ||
4187  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4188  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4189  Latency -= 2;
4190  else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4191  --Latency;
4192  break;
4193  }
4194  case ARM::t2LDRs:
4195  case ARM::t2LDRBs:
4196  case ARM::t2LDRHs:
4197  case ARM::t2LDRSHs:
4198  // Thumb2 mode: lsl 0-3 only.
4199  Latency -= 2;
4200  break;
4201  }
4202  }
4203 
4204  if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
4205  switch (DefMCID.getOpcode()) {
4206  default: break;
4207  case ARM::VLD1q8:
4208  case ARM::VLD1q16:
4209  case ARM::VLD1q32:
4210  case ARM::VLD1q64:
4211  case ARM::VLD1q8wb_register:
4212  case ARM::VLD1q16wb_register:
4213  case ARM::VLD1q32wb_register:
4214  case ARM::VLD1q64wb_register:
4215  case ARM::VLD1q8wb_fixed:
4216  case ARM::VLD1q16wb_fixed:
4217  case ARM::VLD1q32wb_fixed:
4218  case ARM::VLD1q64wb_fixed:
4219  case ARM::VLD2d8:
4220  case ARM::VLD2d16:
4221  case ARM::VLD2d32:
4222  case ARM::VLD2q8Pseudo:
4223  case ARM::VLD2q16Pseudo:
4224  case ARM::VLD2q32Pseudo:
4225  case ARM::VLD2d8wb_fixed:
4226  case ARM::VLD2d16wb_fixed:
4227  case ARM::VLD2d32wb_fixed:
4228  case ARM::VLD2q8PseudoWB_fixed:
4229  case ARM::VLD2q16PseudoWB_fixed:
4230  case ARM::VLD2q32PseudoWB_fixed:
4231  case ARM::VLD2d8wb_register:
4232  case ARM::VLD2d16wb_register:
4233  case ARM::VLD2d32wb_register:
4234  case ARM::VLD2q8PseudoWB_register:
4235  case ARM::VLD2q16PseudoWB_register:
4236  case ARM::VLD2q32PseudoWB_register:
4237  case ARM::VLD3d8Pseudo:
4238  case ARM::VLD3d16Pseudo:
4239  case ARM::VLD3d32Pseudo:
4240  case ARM::VLD1d8TPseudo:
4241  case ARM::VLD1d16TPseudo:
4242  case ARM::VLD1d32TPseudo:
4243  case ARM::VLD1d64TPseudo:
4244  case ARM::VLD1d64TPseudoWB_fixed:
4245  case ARM::VLD1d64TPseudoWB_register:
4246  case ARM::VLD3d8Pseudo_UPD:
4247  case ARM::VLD3d16Pseudo_UPD:
4248  case ARM::VLD3d32Pseudo_UPD:
4249  case ARM::VLD3q8Pseudo_UPD:
4250  case ARM::VLD3q16Pseudo_UPD:
4251  case ARM::VLD3q32Pseudo_UPD:
4252  case ARM::VLD3q8oddPseudo:
4253  case ARM::VLD3q16oddPseudo:
4254  case ARM::VLD3q32oddPseudo:
4255  case ARM::VLD3q8oddPseudo_UPD:
4256  case ARM::VLD3q16oddPseudo_UPD:
4257  case ARM::VLD3q32oddPseudo_UPD:
4258  case ARM::VLD4d8Pseudo:
4259  case ARM::VLD4d16Pseudo:
4260  case ARM::VLD4d32Pseudo:
4261  case ARM::VLD1d8QPseudo:
4262  case ARM::VLD1d16QPseudo:
4263  case ARM::VLD1d32QPseudo:
4264  case ARM::VLD1d64QPseudo:
4265  case ARM::VLD1d64QPseudoWB_fixed:
4266  case ARM::VLD1d64QPseudoWB_register:
4267  case ARM::VLD1q8HighQPseudo:
4268  case ARM::VLD1q8LowQPseudo_UPD:
4269  case ARM::VLD1q8HighTPseudo:
4270  case ARM::VLD1q8LowTPseudo_UPD:
4271  case ARM::VLD1q16HighQPseudo:
4272  case ARM::VLD1q16LowQPseudo_UPD:
4273  case ARM::VLD1q16HighTPseudo:
4274  case ARM::VLD1q16LowTPseudo_UPD:
4275  case ARM::VLD1q32HighQPseudo:
4276  case ARM::VLD1q32LowQPseudo_UPD:
4277  case ARM::VLD1q32HighTPseudo:
4278  case ARM::VLD1q32LowTPseudo_UPD:
4279  case ARM::VLD1q64HighQPseudo:
4280  case ARM::VLD1q64LowQPseudo_UPD:
4281  case ARM::VLD1q64HighTPseudo:
4282  case ARM::VLD1q64LowTPseudo_UPD:
4283  case ARM::VLD4d8Pseudo_UPD:
4284  case ARM::VLD4d16Pseudo_UPD:
4285  case ARM::VLD4d32Pseudo_UPD:
4286  case ARM::VLD4q8Pseudo_UPD:
4287  case ARM::VLD4q16Pseudo_UPD:
4288  case ARM::VLD4q32Pseudo_UPD:
4289  case ARM::VLD4q8oddPseudo:
4290  case ARM::VLD4q16oddPseudo:
4291  case ARM::VLD4q32oddPseudo:
4292  case ARM::VLD4q8oddPseudo_UPD:
4293  case ARM::VLD4q16oddPseudo_UPD:
4294  case ARM::VLD4q32oddPseudo_UPD:
4295  case ARM::VLD1DUPq8:
4296  case ARM::VLD1DUPq16:
4297  case ARM::VLD1DUPq32:
4298  case ARM::VLD1DUPq8wb_fixed:
4299  case ARM::VLD1DUPq16wb_fixed:
4300  case ARM::VLD1DUPq32wb_fixed:
4301  case ARM::VLD1DUPq8wb_register:
4302  case ARM::VLD1DUPq16wb_register:
4303  case ARM::VLD1DUPq32wb_register:
4304  case ARM::VLD2DUPd8:
4305  case ARM::VLD2DUPd16:
4306  case ARM::VLD2DUPd32:
4307  case ARM::VLD2DUPd8wb_fixed:
4308  case ARM::VLD2DUPd16wb_fixed:
4309  case ARM::VLD2DUPd32wb_fixed:
4310  case ARM::VLD2DUPd8wb_register:
4311  case ARM::VLD2DUPd16wb_register:
4312  case ARM::VLD2DUPd32wb_register:
4313  case ARM::VLD2DUPq8EvenPseudo:
4314  case ARM::VLD2DUPq8OddPseudo:
4315  case ARM::VLD2DUPq16EvenPseudo:
4316  case ARM::VLD2DUPq16OddPseudo:
4317  case ARM::VLD2DUPq32EvenPseudo:
4318  case ARM::VLD2DUPq32OddPseudo:
4319  case ARM::VLD3DUPq8EvenPseudo:
4320  case ARM::VLD3DUPq8OddPseudo:
4321  case ARM::VLD3DUPq16EvenPseudo:
4322  case ARM::VLD3DUPq16OddPseudo:
4323  case ARM::VLD3DUPq32EvenPseudo:
4324  case ARM::VLD3DUPq32OddPseudo:
4325  case ARM::VLD4DUPd8Pseudo:
4326  case ARM::VLD4DUPd16Pseudo:
4327  case ARM::VLD4DUPd32Pseudo:
4328  case ARM::VLD4DUPd8Pseudo_UPD:
4329  case ARM::VLD4DUPd16Pseudo_UPD:
4330  case ARM::VLD4DUPd32Pseudo_UPD:
4331  case ARM::VLD4DUPq8EvenPseudo:
4332  case ARM::VLD4DUPq8OddPseudo:
4333  case ARM::VLD4DUPq16EvenPseudo:
4334  case ARM::VLD4DUPq16OddPseudo:
4335  case ARM::VLD4DUPq32EvenPseudo:
4336  case ARM::VLD4DUPq32OddPseudo:
4337  case ARM::VLD1LNq8Pseudo:
4338  case ARM::VLD1LNq16Pseudo:
4339  case ARM::VLD1LNq32Pseudo:
4340  case ARM::VLD1LNq8Pseudo_UPD:
4341  case ARM::VLD1LNq16Pseudo_UPD:
4342  case ARM::VLD1LNq32Pseudo_UPD:
4343  case ARM::VLD2LNd8Pseudo:
4344  case ARM::VLD2LNd16Pseudo:
4345  case ARM::VLD2LNd32Pseudo:
4346  case ARM::VLD2LNq16Pseudo:
4347  case ARM::VLD2LNq32Pseudo:
4348  case ARM::VLD2LNd8Pseudo_UPD:
4349  case ARM::VLD2LNd16Pseudo_UPD:
4350  case ARM::VLD2LNd32Pseudo_UPD:
4351  case ARM::VLD2LNq16Pseudo_UPD:
4352  case ARM::VLD2LNq32Pseudo_UPD:
4353  case ARM::VLD4LNd8Pseudo:
4354  case ARM::VLD4LNd16Pseudo:
4355  case ARM::VLD4LNd32Pseudo:
4356  case ARM::VLD4LNq16Pseudo:
4357  case ARM::VLD4LNq32Pseudo:
4358  case ARM::VLD4LNd8Pseudo_UPD:
4359  case ARM::VLD4LNd16Pseudo_UPD:
4360  case ARM::VLD4LNd32Pseudo_UPD:
4361  case ARM::VLD4LNq16Pseudo_UPD:
4362  case ARM::VLD4LNq32Pseudo_UPD:
4363  // If the address is not 64-bit aligned, the latencies of these
4364  // instructions increases by one.
4365  ++Latency;
4366  break;
4367  }
4368 
4369  return Latency;
4370 }
4371 
4372 unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
4373  if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4374  MI.isImplicitDef())
4375  return 0;
4376 
4377  if (MI.isBundle())
4378  return 0;
4379 
4380  const MCInstrDesc &MCID = MI.getDesc();
4381 
4382  if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4383  !Subtarget.cheapPredicableCPSRDef())) {
4384  // When predicated, CPSR is an additional source operand for CPSR updating
4385  // instructions, this apparently increases their latencies.
4386  return 1;
4387  }
4388  return 0;
4389 }
4390 
4391 unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4392  const MachineInstr &MI,
4393  unsigned *PredCost) const {
4394  if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4395  MI.isImplicitDef())
4396  return 1;
4397 
4398  // An instruction scheduler typically runs on unbundled instructions, however
4399  // other passes may query the latency of a bundled instruction.
4400  if (MI.isBundle()) {
4401  unsigned Latency = 0;
4404  while (++I != E && I->isInsideBundle()) {
4405  if (I->getOpcode() != ARM::t2IT)
4406  Latency += getInstrLatency(ItinData, *I, PredCost);
4407  }
4408  return Latency;
4409  }
4410 
4411  const MCInstrDesc &MCID = MI.getDesc();
4412  if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4413  !Subtarget.cheapPredicableCPSRDef()))) {
4414  // When predicated, CPSR is an additional source operand for CPSR updating
4415  // instructions, this apparently increases their latencies.
4416  *PredCost = 1;
4417  }
4418  // Be sure to call getStageLatency for an empty itinerary in case it has a
4419  // valid MinLatency property.
4420  if (!ItinData)
4421  return MI.mayLoad() ? 3 : 1;
4422 
4423  unsigned Class = MCID.getSchedClass();
4424 
4425  // For instructions with variable uops, use uops as latency.
4426  if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
4427  return getNumMicroOps(ItinData, MI);
4428 
4429  // For the common case, fall back on the itinerary's latency.
4430  unsigned Latency = ItinData->getStageLatency(Class);
4431 
4432  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4433  unsigned DefAlign =
4434  MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlignment() : 0;
4435  int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
4436  if (Adj >= 0 || (int)Latency > -Adj) {
4437  return Latency + Adj;
4438  }
4439  return Latency;
4440 }
4441 
4442 int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4443  SDNode *Node) const {
4444  if (!Node->isMachineOpcode())
4445  return 1;
4446 
4447  if (!ItinData || ItinData->isEmpty())
4448  return 1;
4449 
4450  unsigned Opcode = Node->getMachineOpcode();
4451  switch (Opcode) {
4452  default:
4453  return ItinData->getStageLatency(get(Opcode).getSchedClass());
4454  case ARM::VLDMQIA:
4455  case ARM::VSTMQIA:
4456  return 2;
4457  }
4458 }
4459 
4460 bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
4461  const MachineRegisterInfo *MRI,
4462  const MachineInstr &DefMI,
4463  unsigned DefIdx,
4464  const MachineInstr &UseMI,
4465  unsigned UseIdx) const {
4466  unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4467  unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
4468  if (Subtarget.nonpipelinedVFP() &&
4469  (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
4470  return true;
4471 
4472  // Hoist VFP / NEON instructions with 4 or higher latency.
4473  unsigned Latency =
4474  SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);
4475  if (Latency <= 3)
4476  return false;
4477  return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
4478  UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
4479 }
4480 
4481 bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
4482  const MachineInstr &DefMI,
4483  unsigned DefIdx) const {
4484  const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
4485  if (!ItinData || ItinData->isEmpty())
4486  return false;
4487 
4488  unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4489  if (DDomain == ARMII::DomainGeneral) {
4490  unsigned DefClass = DefMI.getDesc().getSchedClass();
4491  int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
4492  return (DefCycle != -1 && DefCycle <= 2);
4493  }
4494  return false;
4495 }
4496 
4497 bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
4498  StringRef &ErrInfo) const {
4499  if (convertAddSubFlagsOpcode(MI.getOpcode())) {
4500  ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
4501  return false;
4502  }
4503  return true;
4504 }
4505 
4506 // LoadStackGuard has so far only been implemented for MachO. Different code
4507 // sequence is needed for other targets.
4509  unsigned LoadImmOpc,
4510  unsigned LoadOpc) const {
4511  assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
4512  "ROPI/RWPI not currently supported with stack guard");
4513 
4514  MachineBasicBlock &MBB = *MI->getParent();
4515  DebugLoc DL = MI->getDebugLoc();
4516  unsigned Reg = MI->getOperand(0).getReg();
4517  const GlobalValue *GV =
4518  cast<GlobalValue>((*MI->memoperands_begin())->getValue());
4519  MachineInstrBuilder MIB;
4520 
4521  BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4523 
4524  if (Subtarget.isGVIndirectSymbol(GV)) {
4525  MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4526  MIB.addReg(Reg, RegState::Kill).addImm(0);
4527  auto Flags = MachineMemOperand::MOLoad |
4530  MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
4531  MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4);
4532  MIB.addMemOperand(MMO).add(predOps(ARMCC::AL));
4533  }
4534 
4535  MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4536  MIB.addReg(Reg, RegState::Kill)
4537  .addImm(0)
4538  .setMemRefs(MI->memoperands_begin(), MI->memoperands_end())
4539  .add(predOps(ARMCC::AL));
4540 }
4541 
4542 bool
4543 ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
4544  unsigned &AddSubOpc,
4545  bool &NegAcc, bool &HasLane) const {
4546  DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
4547  if (I == MLxEntryMap.end())
4548  return false;
4549 
4550  const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
4551  MulOpc = Entry.MulOpc;
4552  AddSubOpc = Entry.AddSubOpc;
4553  NegAcc = Entry.NegAcc;
4554  HasLane = Entry.HasLane;
4555  return true;
4556 }
4557 
4558 //===----------------------------------------------------------------------===//
4559 // Execution domains.
4560 //===----------------------------------------------------------------------===//
4561 //
4562 // Some instructions go down the NEON pipeline, some go down the VFP pipeline,
4563 // and some can go down both. The vmov instructions go down the VFP pipeline,
4564 // but they can be changed to vorr equivalents that are executed by the NEON
4565 // pipeline.
4566 //
4567 // We use the following execution domain numbering:
4568 //
4571  ExeVFP = 1,
4573 };
4574 
4575 //
4576 // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
4577 //
4578 std::pair<uint16_t, uint16_t>
4580  // If we don't have access to NEON instructions then we won't be able
4581  // to swizzle anything to the NEON domain. Check to make sure.
4582  if (Subtarget.hasNEON()) {
4583  // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
4584  // if they are not predicated.
4585  if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI))
4586  return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4587 
4588  // CortexA9 is particularly picky about mixing the two and wants these
4589  // converted.
4590  if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) &&
4591  (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR ||
4592  MI.getOpcode() == ARM::VMOVS))
4593  return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4594  }
4595  // No other instructions can be swizzled, so just determine their domain.
4596  unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;
4597 
4598  if (Domain & ARMII::DomainNEON)
4599  return std::make_pair(ExeNEON, 0);
4600 
4601  // Certain instructions can go either way on Cortex-A8.
4602  // Treat them as NEON instructions.
4603  if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
4604  return std::make_pair(ExeNEON, 0);
4605 
4606  if (Domain & ARMII::DomainVFP)
4607  return std::make_pair(ExeVFP, 0);
4608 
4609  return std::make_pair(ExeGeneric, 0);
4610 }
4611 
4613  unsigned SReg, unsigned &Lane) {
4614  unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
4615  Lane = 0;
4616 
4617  if (DReg != ARM::NoRegister)
4618  return DReg;
4619 
4620  Lane = 1;
4621  DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
4622 
4623  assert(DReg && "S-register with no D super-register?");
4624  return DReg;
4625 }
4626 
4627 /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
4628 /// set ImplicitSReg to a register number that must be marked as implicit-use or
4629 /// zero if no register needs to be defined as implicit-use.
4630 ///
4631 /// If the function cannot determine if an SPR should be marked implicit use or
4632 /// not, it returns false.
4633 ///
4634 /// This function handles cases where an instruction is being modified from taking
4635 /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
4636 /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
4637 /// lane of the DPR).
4638 ///
4639 /// If the other SPR is defined, an implicit-use of it should be added. Else,
4640 /// (including the case where the DPR itself is defined), it should not.
4641 ///
4643  MachineInstr &MI, unsigned DReg,
4644  unsigned Lane, unsigned &ImplicitSReg) {
4645  // If the DPR is defined or used already, the other SPR lane will be chained
4646  // correctly, so there is nothing to be done.
4647  if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) {
4648  ImplicitSReg = 0;
4649  return true;
4650  }
4651 
4652  // Otherwise we need to go searching to see if the SPR is set explicitly.
4653  ImplicitSReg = TRI->getSubReg(DReg,
4654  (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
4656  MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
4657 
4658  if (LQR == MachineBasicBlock::LQR_Live)
4659  return true;
4660  else if (LQR == MachineBasicBlock::LQR_Unknown)
4661  return false;
4662 
4663  // If the register is known not to be live, there is no need to add an
4664  // implicit-use.
4665  ImplicitSReg = 0;
4666  return true;
4667 }
4668 
4670  unsigned Domain) const {
4671  unsigned DstReg, SrcReg, DReg;
4672  unsigned Lane;
4673  MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
4675  switch (MI.getOpcode()) {
4676  default:
4677  llvm_unreachable("cannot handle opcode!");
4678  break;
4679  case ARM::VMOVD:
4680  if (Domain != ExeNEON)
4681  break;
4682 
4683  // Zap the predicate operands.
4684  assert(!isPredicated(MI) && "Cannot predicate a VORRd");
4685 
4686  // Make sure we've got NEON instructions.
4687  assert(Subtarget.hasNEON() && "VORRd requires NEON");
4688 
4689  // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
4690  DstReg = MI.getOperand(0).getReg();
4691  SrcReg = MI.getOperand(1).getReg();
4692 
4693  for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4694  MI.RemoveOperand(i - 1);
4695 
4696  // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
4697  MI.setDesc(get(ARM::VORRd));
4698  MIB.addReg(DstReg, RegState::Define)
4699  .addReg(SrcReg)
4700  .addReg(SrcReg)
4701  .add(predOps(ARMCC::AL));
4702  break;
4703  case ARM::VMOVRS:
4704  if (Domain != ExeNEON)
4705  break;
4706  assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
4707 
4708  // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
4709  DstReg = MI.getOperand(0).getReg();
4710  SrcReg = MI.getOperand(1).getReg();
4711 
4712  for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4713  MI.RemoveOperand(i - 1);
4714 
4715  DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
4716 
4717  // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
4718  // Note that DSrc has been widened and the other lane may be undef, which
4719  // contaminates the entire register.
4720  MI.setDesc(get(ARM::VGETLNi32));
4721  MIB.addReg(DstReg, RegState::Define)
4722  .addReg(DReg, RegState::Undef)
4723  .addImm(Lane)
4724  .add(predOps(ARMCC::AL));
4725 
4726  // The old source should be an implicit use, otherwise we might think it
4727  // was dead before here.
4728  MIB.addReg(SrcReg, RegState::Implicit);
4729  break;
4730  case ARM::VMOVSR: {
4731  if (Domain != ExeNEON)
4732  break;
4733  assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
4734 
4735  // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
4736  DstReg = MI.getOperand(0).getReg();
4737  SrcReg = MI.getOperand(1).getReg();
4738 
4739  DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
4740 
4741  unsigned ImplicitSReg;
4742  if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
4743  break;
4744 
4745  for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4746  MI.RemoveOperand(i - 1);
4747 
4748  // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
4749  // Again DDst may be undefined at the beginning of this instruction.
4750  MI.setDesc(get(ARM::VSETLNi32));
4751  MIB.addReg(DReg, RegState::Define)
4752  .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI)))
4753  .addReg(SrcReg)
4754  .addImm(Lane)
4755  .add(predOps(ARMCC::AL));
4756 
4757  // The narrower destination must be marked as set to keep previous chains
4758  // in place.
4759  MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
4760  if (ImplicitSReg != 0)
4761  MIB.addReg(ImplicitSReg, RegState::Implicit);
4762  break;
4763  }
4764  case ARM::VMOVS: {
4765  if (Domain != ExeNEON)
4766  break;
4767 
4768  // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
4769  DstReg = MI.getOperand(0).getReg();
4770  SrcReg = MI.getOperand(1).getReg();
4771 
4772  unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
4773  DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
4774  DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
4775 
4776  unsigned ImplicitSReg;
4777  if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
4778  break;
4779 
4780  for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4781  MI.RemoveOperand(i - 1);
4782 
4783  if (DSrc == DDst) {
4784  // Destination can be:
4785  // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
4786  MI.setDesc(get(ARM::VDUPLN32d));
4787  MIB.addReg(DDst, RegState::Define)
4788  .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI)))
4789  .addImm(SrcLane)
4790  .add(predOps(ARMCC::AL));
4791 
4792  // Neither the source or the destination are naturally represented any
4793  // more, so add them in manually.
4794  MIB.addReg(DstReg, RegState::Implicit | RegState::Define);
4795  MIB.addReg(SrcReg, RegState::Implicit);
4796  if (ImplicitSReg != 0)
4797  MIB.addReg(ImplicitSReg, RegState::Implicit);
4798  break;
4799  }
4800 
4801  // In general there's no single instruction that can perform an S <-> S
4802  // move in NEON space, but a pair of VEXT instructions *can* do the
4803  // job. It turns out that the VEXTs needed will only use DSrc once, with
4804  // the position based purely on the combination of lane-0 and lane-1
4805  // involved. For example
4806  // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
4807  // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
4808  // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
4809  // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
4810  //
4811  // Pattern of the MachineInstrs is:
4812  // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
4813  MachineInstrBuilder NewMIB;
4814  NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32),
4815  DDst);
4816 
4817  // On the first instruction, both DSrc and DDst may be undef if present.
4818  // Specifically when the original instruction didn't have them as an
4819  // <imp-use>.
4820  unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
4821  bool CurUndef = !MI.readsRegister(CurReg, TRI);
4822  NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
4823 
4824  CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
4825  CurUndef = !MI.readsRegister(CurReg, TRI);
4826  NewMIB.addReg(CurReg, getUndefRegState(CurUndef))
4827  .addImm(1)
4828  .add(predOps(ARMCC::AL));
4829 
4830  if (SrcLane == DstLane)
4831  NewMIB.addReg(SrcReg, RegState::Implicit);
4832 
4833  MI.setDesc(get(ARM::VEXTd32));
4834  MIB.addReg(DDst, RegState::Define);
4835 
4836  // On the second instruction, DDst has definitely been defined above, so
4837  // it is not undef. DSrc, if present, can be undef as above.
4838  CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
4839  CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
4840  MIB.addReg(CurReg, getUndefRegState(CurUndef));
4841 
4842  CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
4843  CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
4844  MIB.addReg(CurReg, getUndefRegState(CurUndef))
4845  .addImm(1)
4846  .add(predOps(ARMCC::AL));
4847 
4848  if (SrcLane != DstLane)
4849  MIB.addReg(SrcReg, RegState::Implicit);
4850 
4851  // As before, the original destination is no longer represented, add it
4852  // implicitly.
4853  MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
4854  if (ImplicitSReg != 0)
4855  MIB.addReg(ImplicitSReg, RegState::Implicit);
4856  break;
4857  }
4858  }
4859 }
4860 
4861 //===----------------------------------------------------------------------===//
4862 // Partial register updates
4863 //===----------------------------------------------------------------------===//
4864 //
4865 // Swift renames NEON registers with 64-bit granularity. That means any
4866 // instruction writing an S-reg implicitly reads the containing D-reg. The
4867 // problem is mostly avoided by translating f32 operations to v2f32 operations
4868 // on D-registers, but f32 loads are still a problem.
4869 //
4870 // These instructions can load an f32 into a NEON register:
4871 //
4872 // VLDRS - Only writes S, partial D update.
4873 // VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
4874 // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
4875 //
4876 // FCONSTD can be used as a dependency-breaking instruction.
4878  const MachineInstr &MI, unsigned OpNum,
4879  const TargetRegisterInfo *TRI) const {
4880  auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance();
4881  if (!PartialUpdateClearance)
4882  return 0;
4883 
4884  assert(TRI && "Need TRI instance");
4885 
4886  const MachineOperand &MO = MI.getOperand(OpNum);
4887  if (MO.readsReg())
4888  return 0;
4889  unsigned Reg = MO.getReg();
4890  int UseOp = -1;
4891 
4892  switch (MI.getOpcode()) {
4893  // Normal instructions writing only an S-register.
4894  case ARM::VLDRS:
4895  case ARM::FCONSTS:
4896  case ARM::VMOVSR:
4897  case ARM::VMOVv8i8:
4898  case ARM::VMOVv4i16:
4899  case ARM::VMOVv2i32:
4900  case ARM::VMOVv2f32:
4901  case ARM::VMOVv1i64:
4902  UseOp = MI.findRegisterUseOperandIdx(Reg, false, TRI);
4903  break;
4904 
4905  // Explicitly reads the dependency.
4906  case ARM::VLD1LNd32:
4907  UseOp = 3;
4908  break;
4909  default:
4910  return 0;
4911  }
4912 
4913  // If this instruction actually reads a value from Reg, there is no unwanted
4914  // dependency.
4915  if (UseOp != -1 && MI.getOperand(UseOp).readsReg())
4916  return 0;
4917 
4918  // We must be able to clobber the whole D-reg.
4920  // Virtual register must be a def undef foo:ssub_0 operand.
4921  if (!MO.getSubReg() || MI.readsVirtualRegister(Reg))
4922  return 0;
4923  } else if (ARM::SPRRegClass.contains(Reg)) {
4924  // Physical register: MI must define the full D-reg.
4925  unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
4926  &ARM::DPRRegClass);
4927  if (!DReg || !MI.definesRegister(DReg, TRI))
4928  return 0;
4929  }
4930 
4931  // MI has an unwanted D-register dependency.
4932  // Avoid defs in the previous N instructrions.
4933  return PartialUpdateClearance;
4934 }
4935 
4936 // Break a partial register dependency after getPartialRegUpdateClearance
4937 // returned non-zero.
4939  MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
4940  assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def");
4941  assert(TRI && "Need TRI instance");
4942 
4943  const MachineOperand &MO = MI.getOperand(OpNum);
4944  unsigned Reg = MO.getReg();
4946  "Can't break virtual register dependencies.");
4947  unsigned DReg = Reg;
4948 
4949  // If MI defines an S-reg, find the corresponding D super-register.
4950  if (ARM::SPRRegClass.contains(Reg)) {
4951  DReg = ARM::D0 + (Reg - ARM::S0) / 2;
4952  assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
4953  }
4954 
4955  assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
4956  assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
4957 
4958  // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
4959  // the full D-register by loading the same value to both lanes. The
4960  // instruction is micro-coded with 2 uops, so don't do this until we can
4961  // properly schedule micro-coded instructions. The dispatcher stalls cause
4962  // too big regressions.
4963 
4964  // Insert the dependency-breaking FCONSTD before MI.
4965  // 96 is the encoding of 0.5, but the actual value doesn't matter here.
4966  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
4967  .addImm(96)
4968  .add(predOps(ARMCC::AL));
4969  MI.addRegisterKilled(DReg, TRI, true);
4970 }
4971 
4973  return Subtarget.getFeatureBits()[ARM::HasV6KOps];
4974 }
4975 
4977  if (MI->getNumOperands() < 4)
4978  return true;
4979  unsigned ShOpVal = MI->getOperand(3).getImm();
4980  unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
4981  // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
4982  if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
4983  ((ShImm == 1 || ShImm == 2) &&
4984  ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
4985  return true;
4986 
4987  return false;
4988 }
4989 
4991  const MachineInstr &MI, unsigned DefIdx,
4992  SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
4993  assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
4994  assert(MI.isRegSequenceLike() && "Invalid kind of instruction");
4995 
4996  switch (MI.getOpcode()) {
4997  case ARM::VMOVDRR:
4998  // dX = VMOVDRR rY, rZ
4999  // is the same as:
5000  // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
5001  // Populate the InputRegs accordingly.
5002  // rY
5003  const MachineOperand *MOReg = &MI.getOperand(1);
5004  if (!MOReg->isUndef())
5005  InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5006  MOReg->getSubReg(), ARM::ssub_0));
5007  // rZ
5008  MOReg = &MI.getOperand(2);
5009  if (!MOReg->isUndef())
5010  InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5011  MOReg->getSubReg(), ARM::ssub_1));
5012  return true;
5013  }
5014  llvm_unreachable("Target dependent opcode missing");
5015 }
5016 
5018  const MachineInstr &MI, unsigned DefIdx,
5019  RegSubRegPairAndIdx &InputReg) const {
5020  assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5021  assert(MI.isExtractSubregLike() && "Invalid kind of instruction");
5022 
5023  switch (MI.getOpcode()) {
5024  case ARM::VMOVRRD:
5025  // rX, rY = VMOVRRD dZ
5026  // is the same as:
5027  // rX = EXTRACT_SUBREG dZ, ssub_0
5028  // rY = EXTRACT_SUBREG dZ, ssub_1
5029  const MachineOperand &MOReg = MI.getOperand(2);
5030  if (MOReg.isUndef())
5031  return false;
5032  InputReg.Reg = MOReg.getReg();
5033  InputReg.SubReg = MOReg.getSubReg();
5034  InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
5035  return true;
5036  }
5037  llvm_unreachable("Target dependent opcode missing");
5038 }
5039 
5041  const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
5042  RegSubRegPairAndIdx &InsertedReg) const {
5043  assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5044  assert(MI.isInsertSubregLike() && "Invalid kind of instruction");
5045 
5046  switch (MI.getOpcode()) {
5047  case ARM::VSETLNi32:
5048  // dX = VSETLNi32 dY, rZ, imm
5049  const MachineOperand &MOBaseReg = MI.getOperand(1);
5050  const MachineOperand &MOInsertedReg = MI.getOperand(2);
5051  if (MOInsertedReg.isUndef())
5052  return false;
5053  const MachineOperand &MOIndex = MI.getOperand(3);
5054  BaseReg.Reg = MOBaseReg.getReg();
5055  BaseReg.SubReg = MOBaseReg.getSubReg();
5056 
5057  InsertedReg.Reg = MOInsertedReg.getReg();
5058  InsertedReg.SubReg = MOInsertedReg.getSubReg();
5059  InsertedReg.SubIdx = MOIndex.getImm() == 0 ? ARM::ssub_0 : ARM::ssub_1;
5060  return true;
5061  }
5062  llvm_unreachable("Target dependent opcode missing");
5063 }
bool isLdstSoMinusReg(const MachineInstr &MI, unsigned Op) const
MachineConstantPoolValue * MachineCPVal
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const override
bool getInsertSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const override
Build the equivalent inputs of a INSERT_SUBREG for the given MI and DefIdx.
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
bool checkVLDnAccessAlignment() const
Definition: ARMSubtarget.h:607
BranchProbability getCompl() const
int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate...
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:485
instr_iterator instr_end()
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
unsigned getT2SOImmTwoPartFirst(unsigned Imm)
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isThumb() const
Definition: ARMSubtarget.h:698
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
bool DefinesPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred) const override
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, unsigned Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before...
unsigned getRegister(unsigned i) const
Return the specified register in the class.
bool isExtractSubregLike(QueryType Type=IgnoreBundle) const
Return true if this instruction behaves the same way as the generic EXTRACT_SUBREG instructions...
Definition: MachineInstr.h:634
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:285
ARMConstantPoolValue - ARM specific constantpool value.
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:162
unsigned getReg() const
getReg - Returns the register number.
bool expandPostRAPseudo(MachineInstr &MI) const override
void setIsUndef(bool Val=true)
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, int CmpMask, bool CommonUse)
isSuitableForMask - Identify a suitable &#39;and&#39; instruction that operates on the given source register ...
unsigned Reg
bool isPredicated(const MachineInstr &MI) const override
virtual MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const
Clones instruction or the whole instruction bundle Orig and insert into MBB before InsertBefore...
unsigned getSubReg() const
bool isPredicable(QueryType Type=AllInBundle) const
Return true if this instruction has a predicate operand that controls execution.
Definition: MachineInstr.h:539
bool isV8EligibleForIT(const InstrType *Instr)
Definition: ARMFeatures.h:25
static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, unsigned SReg, unsigned &Lane)
bool isRegSequence() const
Definition: MachineInstr.h:884
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:307
bool isSOImmTwoPartVal(unsigned V)
isSOImmTwoPartVal - Return true if the specified value can be obtained by or&#39;ing together two SOImmVa...
VarInfo - This represents the regions where a virtual register is live in the program.
Definition: LiveVariables.h:79
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
void setIsDead(bool Val=true)
unsigned getPartialRegUpdateClearance(const MachineInstr &, unsigned, const TargetRegisterInfo *) const override
setjmp/longjmp based exceptions
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
bool isLdstScaledRegNotPlusLsl2(const MachineInstr &MI, unsigned Op) const
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
bool mayLoad() const
Return true if this instruction could possibly read memory.
Definition: MCInstrDesc.h:392
unsigned getSOImmTwoPartSecond(unsigned V)
getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal, return the second chunk of ...
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:361
bool isCopyLike() const
Return true if the instruction behaves like a copy.
Definition: MachineInstr.h:906
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
static bool isThumb(const MCSubtargetInfo &STI)
unsigned getSpillSize(const TargetRegisterClass &RC) const
Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...
static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1)
return AArch64::GPR64RegClass contains(Reg)
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
bool removeKill(MachineInstr &MI)
removeKill - Delete a kill corresponding to the specified machine instruction.
Definition: LiveVariables.h:94
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
void clearKillInfo()
Clears kill flags on all operands.
static bool isCPSRDefined(const MachineInstr &MI)
Can load/store 1 register/cycle.
Definition: ARMSubtarget.h:124
static uint32_t getAlignment(const MCSectionCOFF &Sec)
bool isFpMLxInstruction(unsigned Opcode) const
isFpMLxInstruction - Return true if the specified opcode is a fp MLA / MLS instruction.
A description of a memory reference used in the backend.
bool isMachineConstantPoolEntry() const
isMachineConstantPoolEntry - Return true if the MachineConstantPoolEntry is indeed a target specific ...
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
ShiftOpc getAM2ShiftOpc(unsigned AM2Opc)
unsigned getSOImmValRotate(unsigned Imm)
getSOImmValRotate - Try to handle Imm with an immediate shifter operand, computing the rotate amount ...
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:209
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:314
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, unsigned &SrcReg2, int &CmpMask, int &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2 if h...
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void setImplicit(bool Val=true)
static bool isLoad(int Opcode)
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.