LLVM  7.0.0svn
ARMBaseInstrInfo.cpp
Go to the documentation of this file.
1 //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the Base ARM implementation of the TargetInstrInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMBaseRegisterInfo.h"
16 #include "ARMConstantPoolValue.h"
17 #include "ARMFeatures.h"
18 #include "ARMHazardRecognizer.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/Triple.h"
43 #include "llvm/IR/Attributes.h"
44 #include "llvm/IR/Constants.h"
45 #include "llvm/IR/DebugLoc.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/GlobalValue.h"
48 #include "llvm/MC/MCAsmInfo.h"
49 #include "llvm/MC/MCInstrDesc.h"
52 #include "llvm/Support/Casting.h"
54 #include "llvm/Support/Compiler.h"
55 #include "llvm/Support/Debug.h"
59 #include <algorithm>
60 #include <cassert>
61 #include <cstdint>
62 #include <iterator>
63 #include <new>
64 #include <utility>
65 #include <vector>
66 
67 using namespace llvm;
68 
69 #define DEBUG_TYPE "arm-instrinfo"
70 
71 #define GET_INSTRINFO_CTOR_DTOR
72 #include "ARMGenInstrInfo.inc"
73 
74 static cl::opt<bool>
75 EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
76  cl::desc("Enable ARM 2-addr to 3-addr conv"));
77 
78 /// ARM_MLxEntry - Record information about MLA / MLS instructions.
79 struct ARM_MLxEntry {
80  uint16_t MLxOpc; // MLA / MLS opcode
81  uint16_t MulOpc; // Expanded multiplication opcode
82  uint16_t AddSubOpc; // Expanded add / sub opcode
83  bool NegAcc; // True if the acc is negated before the add / sub.
84  bool HasLane; // True if instruction has an extra "lane" operand.
85 };
86 
87 static const ARM_MLxEntry ARM_MLxTable[] = {
88  // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
89  // fp scalar ops
90  { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
91  { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
92  { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
93  { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
94  { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
95  { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
96  { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
97  { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
98 
99  // fp SIMD ops
100  { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
101  { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
102  { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
103  { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
104  { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
105  { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
106  { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
107  { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
108 };
109 
111  : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
112  Subtarget(STI) {
113  for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
114  if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
115  llvm_unreachable("Duplicated entries?");
116  MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
117  MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
118  }
119 }
120 
121 // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
122 // currently defaults to no prepass hazard recognizer.
125  const ScheduleDAG *DAG) const {
126  if (usePreRAHazardRecognizer()) {
127  const InstrItineraryData *II =
128  static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
129  return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
130  }
132 }
133 
136  const ScheduleDAG *DAG) const {
137  if (Subtarget.isThumb2() || Subtarget.hasVFP2())
138  return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG);
140 }
141 
144  // FIXME: Thumb2 support.
145 
146  if (!EnableARM3Addr)
147  return nullptr;
148 
149  MachineFunction &MF = *MI.getParent()->getParent();
150  uint64_t TSFlags = MI.getDesc().TSFlags;
151  bool isPre = false;
152  switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
153  default: return nullptr;
154  case ARMII::IndexModePre:
155  isPre = true;
156  break;
158  break;
159  }
160 
161  // Try splitting an indexed load/store to an un-indexed one plus an add/sub
162  // operation.
163  unsigned MemOpc = getUnindexedOpcode(MI.getOpcode());
164  if (MemOpc == 0)
165  return nullptr;
166 
167  MachineInstr *UpdateMI = nullptr;
168  MachineInstr *MemMI = nullptr;
169  unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
170  const MCInstrDesc &MCID = MI.getDesc();
171  unsigned NumOps = MCID.getNumOperands();
172  bool isLoad = !MI.mayStore();
173  const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0);
174  const MachineOperand &Base = MI.getOperand(2);
175  const MachineOperand &Offset = MI.getOperand(NumOps - 3);
176  unsigned WBReg = WB.getReg();
177  unsigned BaseReg = Base.getReg();
178  unsigned OffReg = Offset.getReg();
179  unsigned OffImm = MI.getOperand(NumOps - 2).getImm();
180  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm();
181  switch (AddrMode) {
182  default: llvm_unreachable("Unknown indexed op!");
183  case ARMII::AddrMode2: {
184  bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
185  unsigned Amt = ARM_AM::getAM2Offset(OffImm);
186  if (OffReg == 0) {
187  if (ARM_AM::getSOImmVal(Amt) == -1)
188  // Can't encode it in a so_imm operand. This transformation will
189  // add more than 1 instruction. Abandon!
190  return nullptr;
191  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
192  get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
193  .addReg(BaseReg)
194  .addImm(Amt)
195  .add(predOps(Pred))
196  .add(condCodeOp());
197  } else if (Amt != 0) {
199  unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
200  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
201  get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
202  .addReg(BaseReg)
203  .addReg(OffReg)
204  .addReg(0)
205  .addImm(SOOpc)
206  .add(predOps(Pred))
207  .add(condCodeOp());
208  } else
209  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
210  get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
211  .addReg(BaseReg)
212  .addReg(OffReg)
213  .add(predOps(Pred))
214  .add(condCodeOp());
215  break;
216  }
217  case ARMII::AddrMode3 : {
218  bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
219  unsigned Amt = ARM_AM::getAM3Offset(OffImm);
220  if (OffReg == 0)
221  // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
222  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
223  get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
224  .addReg(BaseReg)
225  .addImm(Amt)
226  .add(predOps(Pred))
227  .add(condCodeOp());
228  else
229  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
230  get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
231  .addReg(BaseReg)
232  .addReg(OffReg)
233  .add(predOps(Pred))
234  .add(condCodeOp());
235  break;
236  }
237  }
238 
239  std::vector<MachineInstr*> NewMIs;
240  if (isPre) {
241  if (isLoad)
242  MemMI =
243  BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
244  .addReg(WBReg)
245  .addImm(0)
246  .addImm(Pred);
247  else
248  MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
249  .addReg(MI.getOperand(1).getReg())
250  .addReg(WBReg)
251  .addReg(0)
252  .addImm(0)
253  .addImm(Pred);
254  NewMIs.push_back(MemMI);
255  NewMIs.push_back(UpdateMI);
256  } else {
257  if (isLoad)
258  MemMI =
259  BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
260  .addReg(BaseReg)
261  .addImm(0)
262  .addImm(Pred);
263  else
264  MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
265  .addReg(MI.getOperand(1).getReg())
266  .addReg(BaseReg)
267  .addReg(0)
268  .addImm(0)
269  .addImm(Pred);
270  if (WB.isDead())
271  UpdateMI->getOperand(0).setIsDead();
272  NewMIs.push_back(UpdateMI);
273  NewMIs.push_back(MemMI);
274  }
275 
276  // Transfer LiveVariables states, kill / dead info.
277  if (LV) {
278  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
279  MachineOperand &MO = MI.getOperand(i);
281  unsigned Reg = MO.getReg();
282 
284  if (MO.isDef()) {
285  MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
286  if (MO.isDead())
287  LV->addVirtualRegisterDead(Reg, *NewMI);
288  }
289  if (MO.isUse() && MO.isKill()) {
290  for (unsigned j = 0; j < 2; ++j) {
291  // Look at the two new MI's in reverse order.
292  MachineInstr *NewMI = NewMIs[j];
293  if (!NewMI->readsRegister(Reg))
294  continue;
295  LV->addVirtualRegisterKilled(Reg, *NewMI);
296  if (VI.removeKill(MI))
297  VI.Kills.push_back(NewMI);
298  break;
299  }
300  }
301  }
302  }
303  }
304 
306  MFI->insert(MBBI, NewMIs[1]);
307  MFI->insert(MBBI, NewMIs[0]);
308  return NewMIs[0];
309 }
310 
311 // Branch analysis.
313  MachineBasicBlock *&TBB,
314  MachineBasicBlock *&FBB,
316  bool AllowModify) const {
317  TBB = nullptr;
318  FBB = nullptr;
319 
321  if (I == MBB.begin())
322  return false; // Empty blocks are easy.
323  --I;
324 
325  // Walk backwards from the end of the basic block until the branch is
326  // analyzed or we give up.
327  while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
328  // Flag to be raised on unanalyzeable instructions. This is useful in cases
329  // where we want to clean up on the end of the basic block before we bail
330  // out.
331  bool CantAnalyze = false;
332 
333  // Skip over DEBUG values and predicated nonterminators.
334  while (I->isDebugValue() || !I->isTerminator()) {
335  if (I == MBB.begin())
336  return false;
337  --I;
338  }
339 
340  if (isIndirectBranchOpcode(I->getOpcode()) ||
341  isJumpTableBranchOpcode(I->getOpcode())) {
342  // Indirect branches and jump tables can't be analyzed, but we still want
343  // to clean up any instructions at the tail of the basic block.
344  CantAnalyze = true;
345  } else if (isUncondBranchOpcode(I->getOpcode())) {
346  TBB = I->getOperand(0).getMBB();
347  } else if (isCondBranchOpcode(I->getOpcode())) {
348  // Bail out if we encounter multiple conditional branches.
349  if (!Cond.empty())
350  return true;
351 
352  assert(!FBB && "FBB should have been null.");
353  FBB = TBB;
354  TBB = I->getOperand(0).getMBB();
355  Cond.push_back(I->getOperand(1));
356  Cond.push_back(I->getOperand(2));
357  } else if (I->isReturn()) {
358  // Returns can't be analyzed, but we should run cleanup.
359  CantAnalyze = !isPredicated(*I);
360  } else {
361  // We encountered other unrecognized terminator. Bail out immediately.
362  return true;
363  }
364 
365  // Cleanup code - to be run for unpredicated unconditional branches and
366  // returns.
367  if (!isPredicated(*I) &&
368  (isUncondBranchOpcode(I->getOpcode()) ||
369  isIndirectBranchOpcode(I->getOpcode()) ||
370  isJumpTableBranchOpcode(I->getOpcode()) ||
371  I->isReturn())) {
372  // Forget any previous condition branch information - it no longer applies.
373  Cond.clear();
374  FBB = nullptr;
375 
376  // If we can modify the function, delete everything below this
377  // unconditional branch.
378  if (AllowModify) {
379  MachineBasicBlock::iterator DI = std::next(I);
380  while (DI != MBB.end()) {
381  MachineInstr &InstToDelete = *DI;
382  ++DI;
383  InstToDelete.eraseFromParent();
384  }
385  }
386  }
387 
388  if (CantAnalyze)
389  return true;
390 
391  if (I == MBB.begin())
392  return false;
393 
394  --I;
395  }
396 
397  // We made it past the terminators without bailing out - we must have
398  // analyzed this branch successfully.
399  return false;
400 }
401 
403  int *BytesRemoved) const {
404  assert(!BytesRemoved && "code size not handled");
405 
407  if (I == MBB.end())
408  return 0;
409 
410  if (!isUncondBranchOpcode(I->getOpcode()) &&
411  !isCondBranchOpcode(I->getOpcode()))
412  return 0;
413 
414  // Remove the branch.
415  I->eraseFromParent();
416 
417  I = MBB.end();
418 
419  if (I == MBB.begin()) return 1;
420  --I;
421  if (!isCondBranchOpcode(I->getOpcode()))
422  return 1;
423 
424  // Remove the branch.
425  I->eraseFromParent();
426  return 2;
427 }
428 
430  MachineBasicBlock *TBB,
431  MachineBasicBlock *FBB,
433  const DebugLoc &DL,
434  int *BytesAdded) const {
435  assert(!BytesAdded && "code size not handled");
437  int BOpc = !AFI->isThumbFunction()
438  ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
439  int BccOpc = !AFI->isThumbFunction()
440  ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
441  bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
442 
443  // Shouldn't be a fall through.
444  assert(TBB && "insertBranch must not be told to insert a fallthrough");
445  assert((Cond.size() == 2 || Cond.size() == 0) &&
446  "ARM branch conditions have two components!");
447 
448  // For conditional branches, we use addOperand to preserve CPSR flags.
449 
450  if (!FBB) {
451  if (Cond.empty()) { // Unconditional branch?
452  if (isThumb)
453  BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL));
454  else
455  BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
456  } else
457  BuildMI(&MBB, DL, get(BccOpc))
458  .addMBB(TBB)
459  .addImm(Cond[0].getImm())
460  .add(Cond[1]);
461  return 1;
462  }
463 
464  // Two-way conditional branch.
465  BuildMI(&MBB, DL, get(BccOpc))
466  .addMBB(TBB)
467  .addImm(Cond[0].getImm())
468  .add(Cond[1]);
469  if (isThumb)
470  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
471  else
472  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
473  return 2;
474 }
475 
478  ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
479  Cond[0].setImm(ARMCC::getOppositeCondition(CC));
480  return false;
481 }
482 
484  if (MI.isBundle()) {
487  while (++I != E && I->isInsideBundle()) {
488  int PIdx = I->findFirstPredOperandIdx();
489  if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
490  return true;
491  }
492  return false;
493  }
494 
495  int PIdx = MI.findFirstPredOperandIdx();
496  return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
497 }
498 
501  unsigned Opc = MI.getOpcode();
502  if (isUncondBranchOpcode(Opc)) {
503  MI.setDesc(get(getMatchingCondBranchOpcode(Opc)));
505  .addImm(Pred[0].getImm())
506  .addReg(Pred[1].getReg());
507  return true;
508  }
509 
510  int PIdx = MI.findFirstPredOperandIdx();
511  if (PIdx != -1) {
512  MachineOperand &PMO = MI.getOperand(PIdx);
513  PMO.setImm(Pred[0].getImm());
514  MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
515  return true;
516  }
517  return false;
518 }
519 
521  ArrayRef<MachineOperand> Pred2) const {
522  if (Pred1.size() > 2 || Pred2.size() > 2)
523  return false;
524 
525  ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
526  ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
527  if (CC1 == CC2)
528  return true;
529 
530  switch (CC1) {
531  default:
532  return false;
533  case ARMCC::AL:
534  return true;
535  case ARMCC::HS:
536  return CC2 == ARMCC::HI;
537  case ARMCC::LS:
538  return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
539  case ARMCC::GE:
540  return CC2 == ARMCC::GT;
541  case ARMCC::LE:
542  return CC2 == ARMCC::LT;
543  }
544 }
545 
547  MachineInstr &MI, std::vector<MachineOperand> &Pred) const {
548  bool Found = false;
549  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
550  const MachineOperand &MO = MI.getOperand(i);
551  if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) ||
552  (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) {
553  Pred.push_back(MO);
554  Found = true;
555  }
556  }
557 
558  return Found;
559 }
560 
562  for (const auto &MO : MI.operands())
563  if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
564  return true;
565  return false;
566 }
567 
569  unsigned Op) const {
570  const MachineOperand &Offset = MI.getOperand(Op + 1);
571  return Offset.getReg() != 0;
572 }
573 
574 // Load with negative register offset requires additional 1cyc and +I unit
575 // for Cortex A57
577  unsigned Op) const {
578  const MachineOperand &Offset = MI.getOperand(Op + 1);
579  const MachineOperand &Opc = MI.getOperand(Op + 2);
580  assert(Opc.isImm());
581  assert(Offset.isReg());
582  int64_t OpcImm = Opc.getImm();
583 
584  bool isSub = ARM_AM::getAM3Op(OpcImm) == ARM_AM::sub;
585  return (isSub && Offset.getReg() != 0);
586 }
587 
589  unsigned Op) const {
590  const MachineOperand &Opc = MI.getOperand(Op + 2);
591  unsigned OffImm = Opc.getImm();
592  return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
593 }
594 
595 // Load, scaled register offset, not plus LSL2
597  unsigned Op) const {
598  const MachineOperand &Opc = MI.getOperand(Op + 2);
599  unsigned OffImm = Opc.getImm();
600 
601  bool isAdd = ARM_AM::getAM2Op(OffImm) == ARM_AM::add;
602  unsigned Amt = ARM_AM::getAM2Offset(OffImm);
604  if (ShiftOpc == ARM_AM::no_shift) return false; // not scaled
605  bool SimpleScaled = (isAdd && ShiftOpc == ARM_AM::lsl && Amt == 2);
606  return !SimpleScaled;
607 }
608 
609 // Minus reg for ldstso addr mode
611  unsigned Op) const {
612  unsigned OffImm = MI.getOperand(Op + 2).getImm();
613  return ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
614 }
615 
616 // Load, scaled register offset
618  unsigned Op) const {
619  unsigned OffImm = MI.getOperand(Op + 2).getImm();
620  return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
621 }
622 
623 static bool isEligibleForITBlock(const MachineInstr *MI) {
624  switch (MI->getOpcode()) {
625  default: return true;
626  case ARM::tADC: // ADC (register) T1
627  case ARM::tADDi3: // ADD (immediate) T1
628  case ARM::tADDi8: // ADD (immediate) T2
629  case ARM::tADDrr: // ADD (register) T1
630  case ARM::tAND: // AND (register) T1
631  case ARM::tASRri: // ASR (immediate) T1
632  case ARM::tASRrr: // ASR (register) T1
633  case ARM::tBIC: // BIC (register) T1
634  case ARM::tEOR: // EOR (register) T1
635  case ARM::tLSLri: // LSL (immediate) T1
636  case ARM::tLSLrr: // LSL (register) T1
637  case ARM::tLSRri: // LSR (immediate) T1
638  case ARM::tLSRrr: // LSR (register) T1
639  case ARM::tMUL: // MUL T1
640  case ARM::tMVN: // MVN (register) T1
641  case ARM::tORR: // ORR (register) T1
642  case ARM::tROR: // ROR (register) T1
643  case ARM::tRSB: // RSB (immediate) T1
644  case ARM::tSBC: // SBC (register) T1
645  case ARM::tSUBi3: // SUB (immediate) T1
646  case ARM::tSUBi8: // SUB (immediate) T2
647  case ARM::tSUBrr: // SUB (register) T1
648  return !ARMBaseInstrInfo::isCPSRDefined(*MI);
649  }
650 }
651 
652 /// isPredicable - Return true if the specified instruction can be predicated.
653 /// By default, this returns true for every instruction with a
654 /// PredicateOperand.
656  if (!MI.isPredicable())
657  return false;
658 
659  if (MI.isBundle())
660  return false;
661 
662  if (!isEligibleForITBlock(&MI))
663  return false;
664 
665  const ARMFunctionInfo *AFI =
667 
668  // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
669  // In their ARM encoding, they can't be encoded in a conditional form.
671  return false;
672 
673  if (AFI->isThumb2Function()) {
674  if (getSubtarget().restrictIT())
675  return isV8EligibleForIT(&MI);
676  }
677 
678  return true;
679 }
680 
681 namespace llvm {
682 
683 template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
684  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
685  const MachineOperand &MO = MI->getOperand(i);
686  if (!MO.isReg() || MO.isUndef() || MO.isUse())
687  continue;
688  if (MO.getReg() != ARM::CPSR)
689  continue;
690  if (!MO.isDead())
691  return false;
692  }
693  // all definitions of CPSR are dead
694  return true;
695 }
696 
697 } // end namespace llvm
698 
699 /// GetInstSize - Return the size of the specified MachineInstr.
700 ///
702  const MachineBasicBlock &MBB = *MI.getParent();
703  const MachineFunction *MF = MBB.getParent();
704  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
705 
706  const MCInstrDesc &MCID = MI.getDesc();
707  if (MCID.getSize())
708  return MCID.getSize();
709 
710  // If this machine instr is an inline asm, measure it.
711  if (MI.getOpcode() == ARM::INLINEASM)
712  return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
713  unsigned Opc = MI.getOpcode();
714  switch (Opc) {
715  default:
716  // pseudo-instruction sizes are zero.
717  return 0;
718  case TargetOpcode::BUNDLE:
719  return getInstBundleLength(MI);
720  case ARM::MOVi16_ga_pcrel:
721  case ARM::MOVTi16_ga_pcrel:
722  case ARM::t2MOVi16_ga_pcrel:
723  case ARM::t2MOVTi16_ga_pcrel:
724  return 4;
725  case ARM::MOVi32imm:
726  case ARM::t2MOVi32imm:
727  return 8;
728  case ARM::CONSTPOOL_ENTRY:
729  case ARM::JUMPTABLE_INSTS:
730  case ARM::JUMPTABLE_ADDRS:
731  case ARM::JUMPTABLE_TBB:
732  case ARM::JUMPTABLE_TBH:
733  // If this machine instr is a constant pool entry, its size is recorded as
734  // operand #2.
735  return MI.getOperand(2).getImm();
736  case ARM::Int_eh_sjlj_longjmp:
737  return 16;
738  case ARM::tInt_eh_sjlj_longjmp:
739  return 10;
740  case ARM::tInt_WIN_eh_sjlj_longjmp:
741  return 12;
742  case ARM::Int_eh_sjlj_setjmp:
743  case ARM::Int_eh_sjlj_setjmp_nofp:
744  return 20;
745  case ARM::tInt_eh_sjlj_setjmp:
746  case ARM::t2Int_eh_sjlj_setjmp:
747  case ARM::t2Int_eh_sjlj_setjmp_nofp:
748  return 12;
749  case ARM::SPACE:
750  return MI.getOperand(1).getImm();
751  }
752 }
753 
754 unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
755  unsigned Size = 0;
758  while (++I != E && I->isInsideBundle()) {
759  assert(!I->isBundle() && "No nested bundle!");
760  Size += getInstSizeInBytes(*I);
761  }
762  return Size;
763 }
764 
767  unsigned DestReg, bool KillSrc,
768  const ARMSubtarget &Subtarget) const {
769  unsigned Opc = Subtarget.isThumb()
770  ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
771  : ARM::MRS;
772 
773  MachineInstrBuilder MIB =
774  BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
775 
776  // There is only 1 A/R class MRS instruction, and it always refers to
777  // APSR. However, there are lots of other possibilities on M-class cores.
778  if (Subtarget.isMClass())
779  MIB.addImm(0x800);
780 
781  MIB.add(predOps(ARMCC::AL))
782  .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
783 }
784 
787  unsigned SrcReg, bool KillSrc,
788  const ARMSubtarget &Subtarget) const {
789  unsigned Opc = Subtarget.isThumb()
790  ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
791  : ARM::MSR;
792 
793  MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
794 
795  if (Subtarget.isMClass())
796  MIB.addImm(0x800);
797  else
798  MIB.addImm(8);
799 
800  MIB.addReg(SrcReg, getKillRegState(KillSrc))
803 }
804 
807  const DebugLoc &DL, unsigned DestReg,
808  unsigned SrcReg, bool KillSrc) const {
809  bool GPRDest = ARM::GPRRegClass.contains(DestReg);
810  bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
811 
812  if (GPRDest && GPRSrc) {
813  BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
814  .addReg(SrcReg, getKillRegState(KillSrc))
816  .add(condCodeOp());
817  return;
818  }
819 
820  bool SPRDest = ARM::SPRRegClass.contains(DestReg);
821  bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
822 
823  unsigned Opc = 0;
824  if (SPRDest && SPRSrc)
825  Opc = ARM::VMOVS;
826  else if (GPRDest && SPRSrc)
827  Opc = ARM::VMOVRS;
828  else if (SPRDest && GPRSrc)
829  Opc = ARM::VMOVSR;
830  else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP())
831  Opc = ARM::VMOVD;
832  else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
833  Opc = ARM::VORRq;
834 
835  if (Opc) {
836  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
837  MIB.addReg(SrcReg, getKillRegState(KillSrc));
838  if (Opc == ARM::VORRq)
839  MIB.addReg(SrcReg, getKillRegState(KillSrc));
840  MIB.add(predOps(ARMCC::AL));
841  return;
842  }
843 
844  // Handle register classes that require multiple instructions.
845  unsigned BeginIdx = 0;
846  unsigned SubRegs = 0;
847  int Spacing = 1;
848 
849  // Use VORRq when possible.
850  if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
851  Opc = ARM::VORRq;
852  BeginIdx = ARM::qsub_0;
853  SubRegs = 2;
854  } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
855  Opc = ARM::VORRq;
856  BeginIdx = ARM::qsub_0;
857  SubRegs = 4;
858  // Fall back to VMOVD.
859  } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
860  Opc = ARM::VMOVD;
861  BeginIdx = ARM::dsub_0;
862  SubRegs = 2;
863  } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
864  Opc = ARM::VMOVD;
865  BeginIdx = ARM::dsub_0;
866  SubRegs = 3;
867  } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
868  Opc = ARM::VMOVD;
869  BeginIdx = ARM::dsub_0;
870  SubRegs = 4;
871  } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
872  Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
873  BeginIdx = ARM::gsub_0;
874  SubRegs = 2;
875  } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
876  Opc = ARM::VMOVD;
877  BeginIdx = ARM::dsub_0;
878  SubRegs = 2;
879  Spacing = 2;
880  } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
881  Opc = ARM::VMOVD;
882  BeginIdx = ARM::dsub_0;
883  SubRegs = 3;
884  Spacing = 2;
885  } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
886  Opc = ARM::VMOVD;
887  BeginIdx = ARM::dsub_0;
888  SubRegs = 4;
889  Spacing = 2;
890  } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) {
891  Opc = ARM::VMOVS;
892  BeginIdx = ARM::ssub_0;
893  SubRegs = 2;
894  } else if (SrcReg == ARM::CPSR) {
895  copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
896  return;
897  } else if (DestReg == ARM::CPSR) {
898  copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
899  return;
900  }
901 
902  assert(Opc && "Impossible reg-to-reg copy");
903 
904  const TargetRegisterInfo *TRI = &getRegisterInfo();
906 
907  // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
908  if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
909  BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
910  Spacing = -Spacing;
911  }
912 #ifndef NDEBUG
913  SmallSet<unsigned, 4> DstRegs;
914 #endif
915  for (unsigned i = 0; i != SubRegs; ++i) {
916  unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
917  unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
918  assert(Dst && Src && "Bad sub-register");
919 #ifndef NDEBUG
920  assert(!DstRegs.count(Src) && "destructive vector copy");
921  DstRegs.insert(Dst);
922 #endif
923  Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
924  // VORR takes two source operands.
925  if (Opc == ARM::VORRq)
926  Mov.addReg(Src);
927  Mov = Mov.add(predOps(ARMCC::AL));
928  // MOVr can set CC.
929  if (Opc == ARM::MOVr)
930  Mov = Mov.add(condCodeOp());
931  }
932  // Add implicit super-register defs and kills to the last instruction.
933  Mov->addRegisterDefined(DestReg, TRI);
934  if (KillSrc)
935  Mov->addRegisterKilled(SrcReg, TRI);
936 }
937 
938 const MachineInstrBuilder &
940  unsigned SubIdx, unsigned State,
941  const TargetRegisterInfo *TRI) const {
942  if (!SubIdx)
943  return MIB.addReg(Reg, State);
944 
946  return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
947  return MIB.addReg(Reg, State, SubIdx);
948 }
949 
952  unsigned SrcReg, bool isKill, int FI,
953  const TargetRegisterClass *RC,
954  const TargetRegisterInfo *TRI) const {
955  DebugLoc DL;
956  if (I != MBB.end()) DL = I->getDebugLoc();
957  MachineFunction &MF = *MBB.getParent();
958  MachineFrameInfo &MFI = MF.getFrameInfo();
959  unsigned Align = MFI.getObjectAlignment(FI);
960 
963  MFI.getObjectSize(FI), Align);
964 
965  switch (TRI->getSpillSize(*RC)) {
966  case 2:
967  if (ARM::HPRRegClass.hasSubClassEq(RC)) {
968  BuildMI(MBB, I, DL, get(ARM::VSTRH))
969  .addReg(SrcReg, getKillRegState(isKill))
970  .addFrameIndex(FI)
971  .addImm(0)
972  .addMemOperand(MMO)
973  .add(predOps(ARMCC::AL));
974  } else
975  llvm_unreachable("Unknown reg class!");
976  break;
977  case 4:
978  if (ARM::GPRRegClass.hasSubClassEq(RC)) {
979  BuildMI(MBB, I, DL, get(ARM::STRi12))
980  .addReg(SrcReg, getKillRegState(isKill))
981  .addFrameIndex(FI)
982  .addImm(0)
983  .addMemOperand(MMO)
984  .add(predOps(ARMCC::AL));
985  } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
986  BuildMI(MBB, I, DL, get(ARM::VSTRS))
987  .addReg(SrcReg, getKillRegState(isKill))
988  .addFrameIndex(FI)
989  .addImm(0)
990  .addMemOperand(MMO)
991  .add(predOps(ARMCC::AL));
992  } else
993  llvm_unreachable("Unknown reg class!");
994  break;
995  case 8:
996  if (ARM::DPRRegClass.hasSubClassEq(RC)) {
997  BuildMI(MBB, I, DL, get(ARM::VSTRD))
998  .addReg(SrcReg, getKillRegState(isKill))
999  .addFrameIndex(FI)
1000  .addImm(0)
1001  .addMemOperand(MMO)
1002  .add(predOps(ARMCC::AL));
1003  } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1004  if (Subtarget.hasV5TEOps()) {
1005  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD));
1006  AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1007  AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1008  MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1009  .add(predOps(ARMCC::AL));
1010  } else {
1011  // Fallback to STM instruction, which has existed since the dawn of
1012  // time.
1013  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STMIA))
1014  .addFrameIndex(FI)
1015  .addMemOperand(MMO)
1016  .add(predOps(ARMCC::AL));
1017  AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1018  AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1019  }
1020  } else
1021  llvm_unreachable("Unknown reg class!");
1022  break;
1023  case 16:
1024  if (ARM::DPairRegClass.hasSubClassEq(RC)) {
1025  // Use aligned spills if the stack can be realigned.
1026  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1027  BuildMI(MBB, I, DL, get(ARM::VST1q64))
1028  .addFrameIndex(FI)
1029  .addImm(16)
1030  .addReg(SrcReg, getKillRegState(isKill))
1031  .addMemOperand(MMO)
1032  .add(predOps(ARMCC::AL));
1033  } else {
1034  BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
1035  .addReg(SrcReg, getKillRegState(isKill))
1036  .addFrameIndex(FI)
1037  .addMemOperand(MMO)
1038  .add(predOps(ARMCC::AL));
1039  }
1040  } else
1041  llvm_unreachable("Unknown reg class!");
1042  break;
1043  case 24:
1044  if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1045  // Use aligned spills if the stack can be realigned.
1046  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1047  BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
1048  .addFrameIndex(FI)
1049  .addImm(16)
1050  .addReg(SrcReg, getKillRegState(isKill))
1051  .addMemOperand(MMO)
1052  .add(predOps(ARMCC::AL));
1053  } else {
1054  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
1055  .addFrameIndex(FI)
1056  .add(predOps(ARMCC::AL))
1057  .addMemOperand(MMO);
1058  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1059  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1060  AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1061  }
1062  } else
1063  llvm_unreachable("Unknown reg class!");
1064  break;
1065  case 32:
1066  if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
1067  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1068  // FIXME: It's possible to only store part of the QQ register if the
1069  // spilled def has a sub-register index.
1070  BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
1071  .addFrameIndex(FI)
1072  .addImm(16)
1073  .addReg(SrcReg, getKillRegState(isKill))
1074  .addMemOperand(MMO)
1075  .add(predOps(ARMCC::AL));
1076  } else {
1077  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
1078  .addFrameIndex(FI)
1079  .add(predOps(ARMCC::AL))
1080  .addMemOperand(MMO);
1081  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1082  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1083  MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1084  AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1085  }
1086  } else
1087  llvm_unreachable("Unknown reg class!");
1088  break;
1089  case 64:
1090  if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1091  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
1092  .addFrameIndex(FI)
1093  .add(predOps(ARMCC::AL))
1094  .addMemOperand(MMO);
1095  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1096  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1097  MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1098  MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1099  MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
1100  MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
1101  MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
1102  AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
1103  } else
1104  llvm_unreachable("Unknown reg class!");
1105  break;
1106  default:
1107  llvm_unreachable("Unknown reg class!");
1108  }
1109 }
1110 
1112  int &FrameIndex) const {
1113  switch (MI.getOpcode()) {
1114  default: break;
1115  case ARM::STRrs:
1116  case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
1117  if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1118  MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1119  MI.getOperand(3).getImm() == 0) {
1120  FrameIndex = MI.getOperand(1).getIndex();
1121  return MI.getOperand(0).getReg();
1122  }
1123  break;
1124  case ARM::STRi12:
1125  case ARM::t2STRi12:
1126  case ARM::tSTRspi:
1127  case ARM::VSTRD:
1128  case ARM::VSTRS:
1129  if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1130  MI.getOperand(2).getImm() == 0) {
1131  FrameIndex = MI.getOperand(1).getIndex();
1132  return MI.getOperand(0).getReg();
1133  }
1134  break;
1135  case ARM::VST1q64:
1136  case ARM::VST1d64TPseudo:
1137  case ARM::VST1d64QPseudo:
1138  if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
1139  FrameIndex = MI.getOperand(0).getIndex();
1140  return MI.getOperand(2).getReg();
1141  }
1142  break;
1143  case ARM::VSTMQIA:
1144  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1145  FrameIndex = MI.getOperand(1).getIndex();
1146  return MI.getOperand(0).getReg();
1147  }
1148  break;
1149  }
1150 
1151  return 0;
1152 }
1153 
1155  int &FrameIndex) const {
1156  const MachineMemOperand *Dummy;
1157  return MI.mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
1158 }
1159 
1160 void ARMBaseInstrInfo::
1162  unsigned DestReg, int FI,
1163  const TargetRegisterClass *RC,
1164  const TargetRegisterInfo *TRI) const {
1165  DebugLoc DL;
1166  if (I != MBB.end()) DL = I->getDebugLoc();
1167  MachineFunction &MF = *MBB.getParent();
1168  MachineFrameInfo &MFI = MF.getFrameInfo();
1169  unsigned Align = MFI.getObjectAlignment(FI);
1172  MFI.getObjectSize(FI), Align);
1173 
1174  switch (TRI->getSpillSize(*RC)) {
1175  case 2:
1176  if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1177  BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg)
1178  .addFrameIndex(FI)
1179  .addImm(0)
1180  .addMemOperand(MMO)
1181  .add(predOps(ARMCC::AL));
1182  } else
1183  llvm_unreachable("Unknown reg class!");
1184  break;
1185  case 4:
1186  if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1187  BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
1188  .addFrameIndex(FI)
1189  .addImm(0)
1190  .addMemOperand(MMO)
1191  .add(predOps(ARMCC::AL));
1192  } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1193  BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
1194  .addFrameIndex(FI)
1195  .addImm(0)
1196  .addMemOperand(MMO)
1197  .add(predOps(ARMCC::AL));
1198  } else
1199  llvm_unreachable("Unknown reg class!");
1200  break;
1201  case 8:
1202  if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1203  BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
1204  .addFrameIndex(FI)
1205  .addImm(0)
1206  .addMemOperand(MMO)
1207  .add(predOps(ARMCC::AL));
1208  } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1209  MachineInstrBuilder MIB;
1210 
1211  if (Subtarget.hasV5TEOps()) {
1212  MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
1213  AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1214  AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1215  MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1216  .add(predOps(ARMCC::AL));
1217  } else {
1218  // Fallback to LDM instruction, which has existed since the dawn of
1219  // time.
1220  MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
1221  .addFrameIndex(FI)
1222  .addMemOperand(MMO)
1223  .add(predOps(ARMCC::AL));
1224  MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1225  MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1226  }
1227 
1229  MIB.addReg(DestReg, RegState::ImplicitDefine);
1230  } else
1231  llvm_unreachable("Unknown reg class!");
1232  break;
1233  case 16:
1234  if (ARM::DPairRegClass.hasSubClassEq(RC)) {
1235  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1236  BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
1237  .addFrameIndex(FI)
1238  .addImm(16)
1239  .addMemOperand(MMO)
1240  .add(predOps(ARMCC::AL));
1241  } else {
1242  BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
1243  .addFrameIndex(FI)
1244  .addMemOperand(MMO)
1245  .add(predOps(ARMCC::AL));
1246  }
1247  } else
1248  llvm_unreachable("Unknown reg class!");
1249  break;
1250  case 24:
1251  if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1252  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1253  BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
1254  .addFrameIndex(FI)
1255  .addImm(16)
1256  .addMemOperand(MMO)
1257  .add(predOps(ARMCC::AL));
1258  } else {
1259  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1260  .addFrameIndex(FI)
1261  .addMemOperand(MMO)
1262  .add(predOps(ARMCC::AL));
1263  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1264  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1265  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1267  MIB.addReg(DestReg, RegState::ImplicitDefine);
1268  }
1269  } else
1270  llvm_unreachable("Unknown reg class!");
1271  break;
1272  case 32:
1273  if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
1274  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1275  BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
1276  .addFrameIndex(FI)
1277  .addImm(16)
1278  .addMemOperand(MMO)
1279  .add(predOps(ARMCC::AL));
1280  } else {
1281  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1282  .addFrameIndex(FI)
1283  .add(predOps(ARMCC::AL))
1284  .addMemOperand(MMO);
1285  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1286  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1287  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1288  MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1290  MIB.addReg(DestReg, RegState::ImplicitDefine);
1291  }
1292  } else
1293  llvm_unreachable("Unknown reg class!");
1294  break;
1295  case 64:
1296  if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1297  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1298  .addFrameIndex(FI)
1299  .add(predOps(ARMCC::AL))
1300  .addMemOperand(MMO);
1301  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1302  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1303  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1304  MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1305  MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
1306  MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
1307  MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
1308  MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
1310  MIB.addReg(DestReg, RegState::ImplicitDefine);
1311  } else
1312  llvm_unreachable("Unknown reg class!");
1313  break;
1314  default:
1315  llvm_unreachable("Unknown regclass!");
1316  }
1317 }
1318 
1320  int &FrameIndex) const {
1321  switch (MI.getOpcode()) {
1322  default: break;
1323  case ARM::LDRrs:
1324  case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
1325  if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1326  MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1327  MI.getOperand(3).getImm() == 0) {
1328  FrameIndex = MI.getOperand(1).getIndex();
1329  return MI.getOperand(0).getReg();
1330  }
1331  break;
1332  case ARM::LDRi12:
1333  case ARM::t2LDRi12:
1334  case ARM::tLDRspi:
1335  case ARM::VLDRD:
1336  case ARM::VLDRS:
1337  if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1338  MI.getOperand(2).getImm() == 0) {
1339  FrameIndex = MI.getOperand(1).getIndex();
1340  return MI.getOperand(0).getReg();
1341  }
1342  break;
1343  case ARM::VLD1q64:
1344  case ARM::VLD1d64TPseudo:
1345  case ARM::VLD1d64QPseudo:
1346  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1347  FrameIndex = MI.getOperand(1).getIndex();
1348  return MI.getOperand(0).getReg();
1349  }
1350  break;
1351  case ARM::VLDMQIA:
1352  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1353  FrameIndex = MI.getOperand(1).getIndex();
1354  return MI.getOperand(0).getReg();
1355  }
1356  break;
1357  }
1358 
1359  return 0;
1360 }
1361 
1363  int &FrameIndex) const {
1364  const MachineMemOperand *Dummy;
1365  return MI.mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
1366 }
1367 
1368 /// \brief Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
1369 /// depending on whether the result is used.
1370 void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
1371  bool isThumb1 = Subtarget.isThumb1Only();
1372  bool isThumb2 = Subtarget.isThumb2();
1373  const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
1374 
1375  DebugLoc dl = MI->getDebugLoc();
1376  MachineBasicBlock *BB = MI->getParent();
1377 
1378  MachineInstrBuilder LDM, STM;
1379  if (isThumb1 || !MI->getOperand(1).isDead()) {
1380  MachineOperand LDWb(MI->getOperand(1));
1381  LDWb.setIsRenamable(false);
1382  LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
1383  : isThumb1 ? ARM::tLDMIA_UPD
1384  : ARM::LDMIA_UPD))
1385  .add(LDWb);
1386  } else {
1387  LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
1388  }
1389 
1390  if (isThumb1 || !MI->getOperand(0).isDead()) {
1391  MachineOperand STWb(MI->getOperand(0));
1392  STWb.setIsRenamable(false);
1393  STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
1394  : isThumb1 ? ARM::tSTMIA_UPD
1395  : ARM::STMIA_UPD))
1396  .add(STWb);
1397  } else {
1398  STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
1399  }
1400 
1401  MachineOperand LDBase(MI->getOperand(3));
1402  LDBase.setIsRenamable(false);
1403  LDM.add(LDBase).add(predOps(ARMCC::AL));
1404 
1405  MachineOperand STBase(MI->getOperand(2));
1406  STBase.setIsRenamable(false);
1407  STM.add(STBase).add(predOps(ARMCC::AL));
1408 
1409  // Sort the scratch registers into ascending order.
1410  const TargetRegisterInfo &TRI = getRegisterInfo();
1411  SmallVector<unsigned, 6> ScratchRegs;
1412  for(unsigned I = 5; I < MI->getNumOperands(); ++I)
1413  ScratchRegs.push_back(MI->getOperand(I).getReg());
1414  std::sort(ScratchRegs.begin(), ScratchRegs.end(),
1415  [&TRI](const unsigned &Reg1,
1416  const unsigned &Reg2) -> bool {
1417  return TRI.getEncodingValue(Reg1) <
1418  TRI.getEncodingValue(Reg2);
1419  });
1420 
1421  for (const auto &Reg : ScratchRegs) {
1422  LDM.addReg(Reg, RegState::Define);
1423  STM.addReg(Reg, RegState::Kill);
1424  }
1425 
1426  BB->erase(MI);
1427 }
1428 
1430  if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
1431  assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() &&
1432  "LOAD_STACK_GUARD currently supported only for MachO.");
1433  expandLoadStackGuard(MI);
1434  MI.getParent()->erase(MI);
1435  return true;
1436  }
1437 
1438  if (MI.getOpcode() == ARM::MEMCPY) {
1439  expandMEMCPY(MI);
1440  return true;
1441  }
1442 
1443  // This hook gets to expand COPY instructions before they become
1444  // copyPhysReg() calls. Look for VMOVS instructions that can legally be
1445  // widened to VMOVD. We prefer the VMOVD when possible because it may be
1446  // changed into a VORR that can go down the NEON pipeline.
1447  if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || Subtarget.isFPOnlySP())
1448  return false;
1449 
1450  // Look for a copy between even S-registers. That is where we keep floats
1451  // when using NEON v2f32 instructions for f32 arithmetic.
1452  unsigned DstRegS = MI.getOperand(0).getReg();
1453  unsigned SrcRegS = MI.getOperand(1).getReg();
1454  if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
1455  return false;
1456 
1457  const TargetRegisterInfo *TRI = &getRegisterInfo();
1458  unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
1459  &ARM::DPRRegClass);
1460  unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
1461  &ARM::DPRRegClass);
1462  if (!DstRegD || !SrcRegD)
1463  return false;
1464 
1465  // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
1466  // legal if the COPY already defines the full DstRegD, and it isn't a
1467  // sub-register insertion.
1468  if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
1469  return false;
1470 
1471  // A dead copy shouldn't show up here, but reject it just in case.
1472  if (MI.getOperand(0).isDead())
1473  return false;
1474 
1475  // All clear, widen the COPY.
1476  DEBUG(dbgs() << "widening: " << MI);
1477  MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
1478 
1479  // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg
1480  // or some other super-register.
1481  int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD);
1482  if (ImpDefIdx != -1)
1483  MI.RemoveOperand(ImpDefIdx);
1484 
1485  // Change the opcode and operands.
1486  MI.setDesc(get(ARM::VMOVD));
1487  MI.getOperand(0).setReg(DstRegD);
1488  MI.getOperand(1).setReg(SrcRegD);
1489  MIB.add(predOps(ARMCC::AL));
1490 
1491  // We are now reading SrcRegD instead of SrcRegS. This may upset the
1492  // register scavenger and machine verifier, so we need to indicate that we
1493  // are reading an undefined value from SrcRegD, but a proper value from
1494  // SrcRegS.
1495  MI.getOperand(1).setIsUndef();
1496  MIB.addReg(SrcRegS, RegState::Implicit);
1497 
1498  // SrcRegD may actually contain an unrelated value in the ssub_1
1499  // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
1500  if (MI.getOperand(1).isKill()) {
1501  MI.getOperand(1).setIsKill(false);
1502  MI.addRegisterKilled(SrcRegS, TRI, true);
1503  }
1504 
1505  DEBUG(dbgs() << "replaced by: " << MI);
1506  return true;
1507 }
1508 
1509 /// Create a copy of a const pool value. Update CPI to the new index and return
1510 /// the label UID.
1511 static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1514 
1515  const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1516  assert(MCPE.isMachineConstantPoolEntry() &&
1517  "Expecting a machine constantpool entry!");
1518  ARMConstantPoolValue *ACPV =
1519  static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1520 
1521  unsigned PCLabelId = AFI->createPICLabelUId();
1522  ARMConstantPoolValue *NewCPV = nullptr;
1523 
1524  // FIXME: The below assumes PIC relocation model and that the function
1525  // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1526  // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1527  // instructions, so that's probably OK, but is PIC always correct when
1528  // we get here?
1529  if (ACPV->isGlobalValue())
1531  cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
1532  4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
1533  else if (ACPV->isExtSymbol())
1534  NewCPV = ARMConstantPoolSymbol::
1535  Create(MF.getFunction().getContext(),
1536  cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1537  else if (ACPV->isBlockAddress())
1538  NewCPV = ARMConstantPoolConstant::
1539  Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1541  else if (ACPV->isLSDA())
1542  NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId,
1543  ARMCP::CPLSDA, 4);
1544  else if (ACPV->isMachineBasicBlock())
1545  NewCPV = ARMConstantPoolMBB::
1546  Create(MF.getFunction().getContext(),
1547  cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1548  else
1549  llvm_unreachable("Unexpected ARM constantpool value type!!");
1550  CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
1551  return PCLabelId;
1552 }
1553 
1556  unsigned DestReg, unsigned SubIdx,
1557  const MachineInstr &Orig,
1558  const TargetRegisterInfo &TRI) const {
1559  unsigned Opcode = Orig.getOpcode();
1560  switch (Opcode) {
1561  default: {
1562  MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
1563  MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1564  MBB.insert(I, MI);
1565  break;
1566  }
1567  case ARM::tLDRpci_pic:
1568  case ARM::t2LDRpci_pic: {
1569  MachineFunction &MF = *MBB.getParent();
1570  unsigned CPI = Orig.getOperand(1).getIndex();
1571  unsigned PCLabelId = duplicateCPV(MF, CPI);
1572  MachineInstrBuilder MIB =
1573  BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
1574  .addConstantPoolIndex(CPI)
1575  .addImm(PCLabelId);
1576  MIB->setMemRefs(Orig.memoperands_begin(), Orig.memoperands_end());
1577  break;
1578  }
1579  }
1580 }
1581 
1582 MachineInstr &
1584  MachineBasicBlock::iterator InsertBefore,
1585  const MachineInstr &Orig) const {
1586  MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
1588  for (;;) {
1589  switch (I->getOpcode()) {
1590  case ARM::tLDRpci_pic:
1591  case ARM::t2LDRpci_pic: {
1592  MachineFunction &MF = *MBB.getParent();
1593  unsigned CPI = I->getOperand(1).getIndex();
1594  unsigned PCLabelId = duplicateCPV(MF, CPI);
1595  I->getOperand(1).setIndex(CPI);
1596  I->getOperand(2).setImm(PCLabelId);
1597  break;
1598  }
1599  }
1600  if (!I->isBundledWithSucc())
1601  break;
1602  ++I;
1603  }
1604  return Cloned;
1605 }
1606 
1608  const MachineInstr &MI1,
1609  const MachineRegisterInfo *MRI) const {
1610  unsigned Opcode = MI0.getOpcode();
1611  if (Opcode == ARM::t2LDRpci ||
1612  Opcode == ARM::t2LDRpci_pic ||
1613  Opcode == ARM::tLDRpci ||
1614  Opcode == ARM::tLDRpci_pic ||
1615  Opcode == ARM::LDRLIT_ga_pcrel ||
1616  Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1617  Opcode == ARM::tLDRLIT_ga_pcrel ||
1618  Opcode == ARM::MOV_ga_pcrel ||
1619  Opcode == ARM::MOV_ga_pcrel_ldr ||
1620  Opcode == ARM::t2MOV_ga_pcrel) {
1621  if (MI1.getOpcode() != Opcode)
1622  return false;
1623  if (MI0.getNumOperands() != MI1.getNumOperands())
1624  return false;
1625 
1626  const MachineOperand &MO0 = MI0.getOperand(1);
1627  const MachineOperand &MO1 = MI1.getOperand(1);
1628  if (MO0.getOffset() != MO1.getOffset())
1629  return false;
1630 
1631  if (Opcode == ARM::LDRLIT_ga_pcrel ||
1632  Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1633  Opcode == ARM::tLDRLIT_ga_pcrel ||
1634  Opcode == ARM::MOV_ga_pcrel ||
1635  Opcode == ARM::MOV_ga_pcrel_ldr ||
1636  Opcode == ARM::t2MOV_ga_pcrel)
1637  // Ignore the PC labels.
1638  return MO0.getGlobal() == MO1.getGlobal();
1639 
1640  const MachineFunction *MF = MI0.getParent()->getParent();
1641  const MachineConstantPool *MCP = MF->getConstantPool();
1642  int CPI0 = MO0.getIndex();
1643  int CPI1 = MO1.getIndex();
1644  const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1645  const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1646  bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1647  bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1648  if (isARMCP0 && isARMCP1) {
1649  ARMConstantPoolValue *ACPV0 =
1650  static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1651  ARMConstantPoolValue *ACPV1 =
1652  static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1653  return ACPV0->hasSameValue(ACPV1);
1654  } else if (!isARMCP0 && !isARMCP1) {
1655  return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1656  }
1657  return false;
1658  } else if (Opcode == ARM::PICLDR) {
1659  if (MI1.getOpcode() != Opcode)
1660  return false;
1661  if (MI0.getNumOperands() != MI1.getNumOperands())
1662  return false;
1663 
1664  unsigned Addr0 = MI0.getOperand(1).getReg();
1665  unsigned Addr1 = MI1.getOperand(1).getReg();
1666  if (Addr0 != Addr1) {
1667  if (!MRI ||
1670  return false;
1671 
1672  // This assumes SSA form.
1673  MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1674  MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1675  // Check if the loaded value, e.g. a constantpool of a global address, are
1676  // the same.
1677  if (!produceSameValue(*Def0, *Def1, MRI))
1678  return false;
1679  }
1680 
1681  for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
1682  // %12 = PICLDR %11, 0, 14, %noreg
1683  const MachineOperand &MO0 = MI0.getOperand(i);
1684  const MachineOperand &MO1 = MI1.getOperand(i);
1685  if (!MO0.isIdenticalTo(MO1))
1686  return false;
1687  }
1688  return true;
1689  }
1690 
1692 }
1693 
1694 /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1695 /// determine if two loads are loading from the same base address. It should
1696 /// only return true if the base pointers are the same and the only differences
1697 /// between the two addresses is the offset. It also returns the offsets by
1698 /// reference.
1699 ///
1700 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1701 /// is permanently disabled.
1703  int64_t &Offset1,
1704  int64_t &Offset2) const {
1705  // Don't worry about Thumb: just ARM and Thumb2.
1706  if (Subtarget.isThumb1Only()) return false;
1707 
1708  if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1709  return false;
1710 
1711  switch (Load1->getMachineOpcode()) {
1712  default:
1713  return false;
1714  case ARM::LDRi12:
1715  case ARM::LDRBi12:
1716  case ARM::LDRD:
1717  case ARM::LDRH:
1718  case ARM::LDRSB:
1719  case ARM::LDRSH:
1720  case ARM::VLDRD:
1721  case ARM::VLDRS:
1722  case ARM::t2LDRi8:
1723  case ARM::t2LDRBi8:
1724  case ARM::t2LDRDi8:
1725  case ARM::t2LDRSHi8:
1726  case ARM::t2LDRi12:
1727  case ARM::t2LDRBi12:
1728  case ARM::t2LDRSHi12:
1729  break;
1730  }
1731 
1732  switch (Load2->getMachineOpcode()) {
1733  default:
1734  return false;
1735  case ARM::LDRi12:
1736  case ARM::LDRBi12:
1737  case ARM::LDRD:
1738  case ARM::LDRH:
1739  case ARM::LDRSB:
1740  case ARM::LDRSH:
1741  case ARM::VLDRD:
1742  case ARM::VLDRS:
1743  case ARM::t2LDRi8:
1744  case ARM::t2LDRBi8:
1745  case ARM::t2LDRSHi8:
1746  case ARM::t2LDRi12:
1747  case ARM::t2LDRBi12:
1748  case ARM::t2LDRSHi12:
1749  break;
1750  }
1751 
1752  // Check if base addresses and chain operands match.
1753  if (Load1->getOperand(0) != Load2->getOperand(0) ||
1754  Load1->getOperand(4) != Load2->getOperand(4))
1755  return false;
1756 
1757  // Index should be Reg0.
1758  if (Load1->getOperand(3) != Load2->getOperand(3))
1759  return false;
1760 
1761  // Determine the offsets.
1762  if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
1763  isa<ConstantSDNode>(Load2->getOperand(1))) {
1764  Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
1765  Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
1766  return true;
1767  }
1768 
1769  return false;
1770 }
1771 
1772 /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
1773 /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
1774 /// be scheduled togther. On some targets if two loads are loading from
1775 /// addresses in the same cache line, it's better if they are scheduled
1776 /// together. This function takes two integers that represent the load offsets
1777 /// from the common base address. It returns true if it decides it's desirable
1778 /// to schedule the two loads together. "NumLoads" is the number of loads that
1779 /// have already been scheduled after Load1.
1780 ///
1781 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1782 /// is permanently disabled.
1784  int64_t Offset1, int64_t Offset2,
1785  unsigned NumLoads) const {
1786  // Don't worry about Thumb: just ARM and Thumb2.
1787  if (Subtarget.isThumb1Only()) return false;
1788 
1789  assert(Offset2 > Offset1);
1790 
1791  if ((Offset2 - Offset1) / 8 > 64)
1792  return false;
1793 
1794  // Check if the machine opcodes are different. If they are different
1795  // then we consider them to not be of the same base address,
1796  // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
1797  // In this case, they are considered to be the same because they are different
1798  // encoding forms of the same basic instruction.
1799  if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
1800  !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
1801  Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
1802  (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
1803  Load2->getMachineOpcode() == ARM::t2LDRBi8)))
1804  return false; // FIXME: overly conservative?
1805 
1806  // Four loads in a row should be sufficient.
1807  if (NumLoads >= 3)
1808  return false;
1809 
1810  return true;
1811 }
1812 
1814  const MachineBasicBlock *MBB,
1815  const MachineFunction &MF) const {
1816  // Debug info is never a scheduling boundary. It's necessary to be explicit
1817  // due to the special treatment of IT instructions below, otherwise a
1818  // dbg_value followed by an IT will result in the IT instruction being
1819  // considered a scheduling hazard, which is wrong. It should be the actual
1820  // instruction preceding the dbg_value instruction(s), just like it is
1821  // when debug info is not present.
1822  if (MI.isDebugValue())
1823  return false;
1824 
1825  // Terminators and labels can't be scheduled around.
1826  if (MI.isTerminator() || MI.isPosition())
1827  return true;
1828 
1829  // Treat the start of the IT block as a scheduling boundary, but schedule
1830  // t2IT along with all instructions following it.
1831  // FIXME: This is a big hammer. But the alternative is to add all potential
1832  // true and anti dependencies to IT block instructions as implicit operands
1833  // to the t2IT instruction. The added compile time and complexity does not
1834  // seem worth it.
1836  // Make sure to skip any dbg_value instructions
1837  while (++I != MBB->end() && I->isDebugValue())
1838  ;
1839  if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
1840  return true;
1841 
1842  // Don't attempt to schedule around any instruction that defines
1843  // a stack-oriented pointer, as it's unlikely to be profitable. This
1844  // saves compile time, because it doesn't require every single
1845  // stack slot reference to depend on the instruction that does the
1846  // modification.
1847  // Calls don't actually change the stack pointer, even if they have imp-defs.
1848  // No ARM calling conventions change the stack pointer. (X86 calling
1849  // conventions sometimes do).
1850  if (!MI.isCall() && MI.definesRegister(ARM::SP))
1851  return true;
1852 
1853  return false;
1854 }
1855 
1856 bool ARMBaseInstrInfo::
1858  unsigned NumCycles, unsigned ExtraPredCycles,
1859  BranchProbability Probability) const {
1860  if (!NumCycles)
1861  return false;
1862 
1863  // If we are optimizing for size, see if the branch in the predecessor can be
1864  // lowered to cbn?z by the constant island lowering pass, and return false if
1865  // so. This results in a shorter instruction sequence.
1866  if (MBB.getParent()->getFunction().optForSize()) {
1867  MachineBasicBlock *Pred = *MBB.pred_begin();
1868  if (!Pred->empty()) {
1869  MachineInstr *LastMI = &*Pred->rbegin();
1870  if (LastMI->getOpcode() == ARM::t2Bcc) {
1871  MachineBasicBlock::iterator CmpMI = LastMI;
1872  if (CmpMI != Pred->begin()) {
1873  --CmpMI;
1874  if (CmpMI->getOpcode() == ARM::tCMPi8 ||
1875  CmpMI->getOpcode() == ARM::t2CMPri) {
1876  unsigned Reg = CmpMI->getOperand(0).getReg();
1877  unsigned PredReg = 0;
1878  ARMCC::CondCodes P = getInstrPredicate(*CmpMI, PredReg);
1879  if (P == ARMCC::AL && CmpMI->getOperand(1).getImm() == 0 &&
1880  isARMLowRegister(Reg))
1881  return false;
1882  }
1883  }
1884  }
1885  }
1886  }
1887  return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
1888  MBB, 0, 0, Probability);
1889 }
1890 
1891 bool ARMBaseInstrInfo::
1893  unsigned TCycles, unsigned TExtra,
1894  MachineBasicBlock &FBB,
1895  unsigned FCycles, unsigned FExtra,
1896  BranchProbability Probability) const {
1897  if (!TCycles)
1898  return false;
1899 
1900  // Attempt to estimate the relative costs of predication versus branching.
1901  // Here we scale up each component of UnpredCost to avoid precision issue when
1902  // scaling TCycles/FCycles by Probability.
1903  const unsigned ScalingUpFactor = 1024;
1904 
1905  unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
1906  unsigned UnpredCost;
1907  if (!Subtarget.hasBranchPredictor()) {
1908  // When we don't have a branch predictor it's always cheaper to not take a
1909  // branch than take it, so we have to take that into account.
1910  unsigned NotTakenBranchCost = 1;
1911  unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
1912  unsigned TUnpredCycles, FUnpredCycles;
1913  if (!FCycles) {
1914  // Triangle: TBB is the fallthrough
1915  TUnpredCycles = TCycles + NotTakenBranchCost;
1916  FUnpredCycles = TakenBranchCost;
1917  } else {
1918  // Diamond: TBB is the block that is branched to, FBB is the fallthrough
1919  TUnpredCycles = TCycles + TakenBranchCost;
1920  FUnpredCycles = FCycles + NotTakenBranchCost;
1921  // The branch at the end of FBB will disappear when it's predicated, so
1922  // discount it from PredCost.
1923  PredCost -= 1 * ScalingUpFactor;
1924  }
1925  // The total cost is the cost of each path scaled by their probabilites
1926  unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
1927  unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
1928  UnpredCost = TUnpredCost + FUnpredCost;
1929  // When predicating assume that the first IT can be folded away but later
1930  // ones cost one cycle each
1931  if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
1932  PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
1933  }
1934  } else {
1935  unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
1936  unsigned FUnpredCost =
1937  Probability.getCompl().scale(FCycles * ScalingUpFactor);
1938  UnpredCost = TUnpredCost + FUnpredCost;
1939  UnpredCost += 1 * ScalingUpFactor; // The branch itself
1940  UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
1941  }
1942 
1943  return PredCost <= UnpredCost;
1944 }
1945 
1946 bool
1948  MachineBasicBlock &FMBB) const {
1949  // Reduce false anti-dependencies to let the target's out-of-order execution
1950  // engine do its thing.
1951  return Subtarget.isProfitableToUnpredicate();
1952 }
1953 
1954 /// getInstrPredicate - If instruction is predicated, returns its predicate
1955 /// condition, otherwise returns AL. It also returns the condition code
1956 /// register by reference.
1958  unsigned &PredReg) {
1959  int PIdx = MI.findFirstPredOperandIdx();
1960  if (PIdx == -1) {
1961  PredReg = 0;
1962  return ARMCC::AL;
1963  }
1964 
1965  PredReg = MI.getOperand(PIdx+1).getReg();
1966  return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
1967 }
1968 
1969 unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {
1970  if (Opc == ARM::B)
1971  return ARM::Bcc;
1972  if (Opc == ARM::tB)
1973  return ARM::tBcc;
1974  if (Opc == ARM::t2B)
1975  return ARM::t2Bcc;
1976 
1977  llvm_unreachable("Unknown unconditional branch opcode!");
1978 }
1979 
1981  bool NewMI,
1982  unsigned OpIdx1,
1983  unsigned OpIdx2) const {
1984  switch (MI.getOpcode()) {
1985  case ARM::MOVCCr:
1986  case ARM::t2MOVCCr: {
1987  // MOVCC can be commuted by inverting the condition.
1988  unsigned PredReg = 0;
1989  ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
1990  // MOVCC AL can't be inverted. Shouldn't happen.
1991  if (CC == ARMCC::AL || PredReg != ARM::CPSR)
1992  return nullptr;
1993  MachineInstr *CommutedMI =
1994  TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
1995  if (!CommutedMI)
1996  return nullptr;
1997  // After swapping the MOVCC operands, also invert the condition.
1998  CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
1999  .setImm(ARMCC::getOppositeCondition(CC));
2000  return CommutedMI;
2001  }
2002  }
2003  return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2004 }
2005 
2006 /// Identify instructions that can be folded into a MOVCC instruction, and
2007 /// return the defining instruction.
2009  const MachineRegisterInfo &MRI,
2010  const TargetInstrInfo *TII) {
2012  return nullptr;
2013  if (!MRI.hasOneNonDBGUse(Reg))
2014  return nullptr;
2015  MachineInstr *MI = MRI.getVRegDef(Reg);
2016  if (!MI)
2017  return nullptr;
2018  // MI is folded into the MOVCC by predicating it.
2019  if (!MI->isPredicable())
2020  return nullptr;
2021  // Check if MI has any non-dead defs or physreg uses. This also detects
2022  // predicated instructions which will be reading CPSR.
2023  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
2024  const MachineOperand &MO = MI->getOperand(i);
2025  // Reject frame index operands, PEI can't handle the predicated pseudos.
2026  if (MO.isFI() || MO.isCPI() || MO.isJTI())
2027  return nullptr;
2028  if (!MO.isReg())
2029  continue;
2030  // MI can't have any tied operands, that would conflict with predication.
2031  if (MO.isTied())
2032  return nullptr;
2034  return nullptr;
2035  if (MO.isDef() && !MO.isDead())
2036  return nullptr;
2037  }
2038  bool DontMoveAcrossStores = true;
2039  if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
2040  return nullptr;
2041  return MI;
2042 }
2043 
2046  unsigned &TrueOp, unsigned &FalseOp,
2047  bool &Optimizable) const {
2048  assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2049  "Unknown select instruction");
2050  // MOVCC operands:
2051  // 0: Def.
2052  // 1: True use.
2053  // 2: False use.
2054  // 3: Condition code.
2055  // 4: CPSR use.
2056  TrueOp = 1;
2057  FalseOp = 2;
2058  Cond.push_back(MI.getOperand(3));
2059  Cond.push_back(MI.getOperand(4));
2060  // We can always fold a def.
2061  Optimizable = true;
2062  return false;
2063 }
2064 
2065 MachineInstr *
2068  bool PreferFalse) const {
2069  assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2070  "Unknown select instruction");
2073  bool Invert = !DefMI;
2074  if (!DefMI)
2075  DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
2076  if (!DefMI)
2077  return nullptr;
2078 
2079  // Find new register class to use.
2080  MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
2081  unsigned DestReg = MI.getOperand(0).getReg();
2082  const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
2083  if (!MRI.constrainRegClass(DestReg, PreviousClass))
2084  return nullptr;
2085 
2086  // Create a new predicated version of DefMI.
2087  // Rfalse is the first use.
2088  MachineInstrBuilder NewMI =
2089  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
2090 
2091  // Copy all the DefMI operands, excluding its (null) predicate.
2092  const MCInstrDesc &DefDesc = DefMI->getDesc();
2093  for (unsigned i = 1, e = DefDesc.getNumOperands();
2094  i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
2095  NewMI.add(DefMI->getOperand(i));
2096 
2097  unsigned CondCode = MI.getOperand(3).getImm();
2098  if (Invert)
2100  else
2101  NewMI.addImm(CondCode);
2102  NewMI.add(MI.getOperand(4));
2103 
2104  // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
2105  if (NewMI->hasOptionalDef())
2106  NewMI.add(condCodeOp());
2107 
2108  // The output register value when the predicate is false is an implicit
2109  // register operand tied to the first def.
2110  // The tie makes the register allocator ensure the FalseReg is allocated the
2111  // same register as operand 0.
2112  FalseReg.setImplicit();
2113  NewMI.add(FalseReg);
2114  NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
2115 
2116  // Update SeenMIs set: register newly created MI and erase removed DefMI.
2117  SeenMIs.insert(NewMI);
2118  SeenMIs.erase(DefMI);
2119 
2120  // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
2121  // DefMI would be invalid when tranferred inside the loop. Checking for a
2122  // loop is expensive, but at least remove kill flags if they are in different
2123  // BBs.
2124  if (DefMI->getParent() != MI.getParent())
2125  NewMI->clearKillInfo();
2126 
2127  // The caller will erase MI, but not DefMI.
2128  DefMI->eraseFromParent();
2129  return NewMI;
2130 }
2131 
2132 /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
2133 /// instruction is encoded with an 'S' bit is determined by the optional CPSR
2134 /// def operand.
2135 ///
2136 /// This will go away once we can teach tblgen how to set the optional CPSR def
2137 /// operand itself.
2139  uint16_t PseudoOpc;
2140  uint16_t MachineOpc;
2141 };
2142 
2144  {ARM::ADDSri, ARM::ADDri},
2145  {ARM::ADDSrr, ARM::ADDrr},
2146  {ARM::ADDSrsi, ARM::ADDrsi},
2147  {ARM::ADDSrsr, ARM::ADDrsr},
2148 
2149  {ARM::SUBSri, ARM::SUBri},
2150  {ARM::SUBSrr, ARM::SUBrr},
2151  {ARM::SUBSrsi, ARM::SUBrsi},
2152  {ARM::SUBSrsr, ARM::SUBrsr},
2153 
2154  {ARM::RSBSri, ARM::RSBri},
2155  {ARM::RSBSrsi, ARM::RSBrsi},
2156  {ARM::RSBSrsr, ARM::RSBrsr},
2157 
2158  {ARM::tADDSi3, ARM::tADDi3},
2159  {ARM::tADDSi8, ARM::tADDi8},
2160  {ARM::tADDSrr, ARM::tADDrr},
2161  {ARM::tADCS, ARM::tADC},
2162 
2163  {ARM::tSUBSi3, ARM::tSUBi3},
2164  {ARM::tSUBSi8, ARM::tSUBi8},
2165  {ARM::tSUBSrr, ARM::tSUBrr},
2166  {ARM::tSBCS, ARM::tSBC},
2167 
2168  {ARM::t2ADDSri, ARM::t2ADDri},
2169  {ARM::t2ADDSrr, ARM::t2ADDrr},
2170  {ARM::t2ADDSrs, ARM::t2ADDrs},
2171 
2172  {ARM::t2SUBSri, ARM::t2SUBri},
2173  {ARM::t2SUBSrr, ARM::t2SUBrr},
2174  {ARM::t2SUBSrs, ARM::t2SUBrs},
2175 
2176  {ARM::t2RSBSri, ARM::t2RSBri},
2177  {ARM::t2RSBSrs, ARM::t2RSBrs},
2178 };
2179 
2180 unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
2181  for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i)
2182  if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc)
2183  return AddSubFlagsOpcodeMap[i].MachineOpc;
2184  return 0;
2185 }
2186 
2189  const DebugLoc &dl, unsigned DestReg,
2190  unsigned BaseReg, int NumBytes,
2191  ARMCC::CondCodes Pred, unsigned PredReg,
2192  const ARMBaseInstrInfo &TII,
2193  unsigned MIFlags) {
2194  if (NumBytes == 0 && DestReg != BaseReg) {
2195  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
2196  .addReg(BaseReg, RegState::Kill)
2197  .add(predOps(Pred, PredReg))
2198  .add(condCodeOp())
2199  .setMIFlags(MIFlags);
2200  return;
2201  }
2202 
2203  bool isSub = NumBytes < 0;
2204  if (isSub) NumBytes = -NumBytes;
2205 
2206  while (NumBytes) {
2207  unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
2208  unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
2209  assert(ThisVal && "Didn't extract field correctly");
2210 
2211  // We will handle these bits from offset, clear them.
2212  NumBytes &= ~ThisVal;
2213 
2214  assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
2215 
2216  // Build the new ADD / SUB.
2217  unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
2218  BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
2219  .addReg(BaseReg, RegState::Kill)
2220  .addImm(ThisVal)
2221  .add(predOps(Pred, PredReg))
2222  .add(condCodeOp())
2223  .setMIFlags(MIFlags);
2224  BaseReg = DestReg;
2225  }
2226 }
2227 
2229  MachineFunction &MF, MachineInstr *MI,
2230  unsigned NumBytes) {
2231  // This optimisation potentially adds lots of load and store
2232  // micro-operations, it's only really a great benefit to code-size.
2233  if (!MF.getFunction().optForMinSize())
2234  return false;
2235 
2236  // If only one register is pushed/popped, LLVM can use an LDR/STR
2237  // instead. We can't modify those so make sure we're dealing with an
2238  // instruction we understand.
2239  bool IsPop = isPopOpcode(MI->getOpcode());
2240  bool IsPush = isPushOpcode(MI->getOpcode());
2241  if (!IsPush && !IsPop)
2242  return false;
2243 
2244  bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
2245  MI->getOpcode() == ARM::VLDMDIA_UPD;
2246  bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
2247  MI->getOpcode() == ARM::tPOP ||
2248  MI->getOpcode() == ARM::tPOP_RET;
2249 
2250  assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
2251  MI->getOperand(1).getReg() == ARM::SP)) &&
2252  "trying to fold sp update into non-sp-updating push/pop");
2253 
2254  // The VFP push & pop act on D-registers, so we can only fold an adjustment
2255  // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
2256  // if this is violated.
2257  if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
2258  return false;
2259 
2260  // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
2261  // pred) so the list starts at 4. Thumb1 starts after the predicate.
2262  int RegListIdx = IsT1PushPop ? 2 : 4;
2263 
2264  // Calculate the space we'll need in terms of registers.
2265  unsigned RegsNeeded;
2266  const TargetRegisterClass *RegClass;
2267  if (IsVFPPushPop) {
2268  RegsNeeded = NumBytes / 8;
2269  RegClass = &ARM::DPRRegClass;
2270  } else {
2271  RegsNeeded = NumBytes / 4;
2272  RegClass = &ARM::GPRRegClass;
2273  }
2274 
2275  // We're going to have to strip all list operands off before
2276  // re-adding them since the order matters, so save the existing ones
2277  // for later.
2279 
2280  // We're also going to need the first register transferred by this
2281  // instruction, which won't necessarily be the first register in the list.
2282  unsigned FirstRegEnc = -1;
2283 
2285  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
2286  MachineOperand &MO = MI->getOperand(i);
2287  RegList.push_back(MO);
2288 
2289  if (MO.isReg() && TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
2290  FirstRegEnc = TRI->getEncodingValue(MO.getReg());
2291  }
2292 
2293  const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
2294 
2295  // Now try to find enough space in the reglist to allocate NumBytes.
2296  for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
2297  --CurRegEnc) {
2298  unsigned CurReg = RegClass->getRegister(CurRegEnc);
2299  if (!IsPop) {
2300  // Pushing any register is completely harmless, mark the register involved
2301  // as undef since we don't care about its value and must not restore it
2302  // during stack unwinding.
2303  RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
2304  false, false, true));
2305  --RegsNeeded;
2306  continue;
2307  }
2308 
2309  // However, we can only pop an extra register if it's not live. For
2310  // registers live within the function we might clobber a return value
2311  // register; the other way a register can be live here is if it's
2312  // callee-saved.
2313  if (isCalleeSavedRegister(CurReg, CSRegs) ||
2314  MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
2316  // VFP pops don't allow holes in the register list, so any skip is fatal
2317  // for our transformation. GPR pops do, so we should just keep looking.
2318  if (IsVFPPushPop)
2319  return false;
2320  else
2321  continue;
2322  }
2323 
2324  // Mark the unimportant registers as <def,dead> in the POP.
2325  RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
2326  true));
2327  --RegsNeeded;
2328  }
2329 
2330  if (RegsNeeded > 0)
2331  return false;
2332 
2333  // Finally we know we can profitably perform the optimisation so go
2334  // ahead: strip all existing registers off and add them back again
2335  // in the right order.
2336  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
2337  MI->RemoveOperand(i);
2338 
2339  // Add the complete list back in.
2340  MachineInstrBuilder MIB(MF, &*MI);
2341  for (int i = RegList.size() - 1; i >= 0; --i)
2342  MIB.add(RegList[i]);
2343 
2344  return true;
2345 }
2346 
2347 bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2348  unsigned FrameReg, int &Offset,
2349  const ARMBaseInstrInfo &TII) {
2350  unsigned Opcode = MI.getOpcode();
2351  const MCInstrDesc &Desc = MI.getDesc();
2352  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2353  bool isSub = false;
2354 
2355  // Memory operands in inline assembly always use AddrMode2.
2356  if (Opcode == ARM::INLINEASM)
2357  AddrMode = ARMII::AddrMode2;
2358 
2359  if (Opcode == ARM::ADDri) {
2360  Offset += MI.getOperand(FrameRegIdx+1).getImm();
2361  if (Offset == 0) {
2362  // Turn it into a move.
2363  MI.setDesc(TII.get(ARM::MOVr));
2364  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2365  MI.RemoveOperand(FrameRegIdx+1);
2366  Offset = 0;
2367  return true;
2368  } else if (Offset < 0) {
2369  Offset = -Offset;
2370  isSub = true;
2371  MI.setDesc(TII.get(ARM::SUBri));
2372  }
2373 
2374  // Common case: small offset, fits into instruction.
2375  if (ARM_AM::getSOImmVal(Offset) != -1) {
2376  // Replace the FrameIndex with sp / fp
2377  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2378  MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
2379  Offset = 0;
2380  return true;
2381  }
2382 
2383  // Otherwise, pull as much of the immedidate into this ADDri/SUBri
2384  // as possible.
2385  unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
2386  unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
2387 
2388  // We will handle these bits from offset, clear them.
2389  Offset &= ~ThisImmVal;
2390 
2391  // Get the properly encoded SOImmVal field.
2392  assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2393  "Bit extraction didn't work?");
2394  MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
2395  } else {
2396  unsigned ImmIdx = 0;
2397  int InstrOffs = 0;
2398  unsigned NumBits = 0;
2399  unsigned Scale = 1;
2400  switch (AddrMode) {
2401  case ARMII::AddrMode_i12:
2402  ImmIdx = FrameRegIdx + 1;
2403  InstrOffs = MI.getOperand(ImmIdx).getImm();
2404  NumBits = 12;
2405  break;
2406  case ARMII::AddrMode2:
2407  ImmIdx = FrameRegIdx+2;
2408  InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
2409  if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2410  InstrOffs *= -1;
2411  NumBits = 12;
2412  break;
2413  case ARMII::AddrMode3:
2414  ImmIdx = FrameRegIdx+2;
2415  InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
2416  if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2417  InstrOffs *= -1;
2418  NumBits = 8;
2419  break;
2420  case ARMII::AddrMode4:
2421  case ARMII::AddrMode6:
2422  // Can't fold any offset even if it's zero.
2423  return false;
2424  case ARMII::AddrMode5:
2425  ImmIdx = FrameRegIdx+1;
2426  InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2427  if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2428  InstrOffs *= -1;
2429  NumBits = 8;
2430  Scale = 4;
2431  break;
2432  case ARMII::AddrMode5FP16:
2433  ImmIdx = FrameRegIdx+1;
2434  InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2435  if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2436  InstrOffs *= -1;
2437  NumBits = 8;
2438  Scale = 2;
2439  break;
2440  default:
2441  llvm_unreachable("Unsupported addressing mode!");
2442  }
2443 
2444  Offset += InstrOffs * Scale;
2445  assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2446  if (Offset < 0) {
2447  Offset = -Offset;
2448  isSub = true;
2449  }
2450 
2451  // Attempt to fold address comp. if opcode has offset bits
2452  if (NumBits > 0) {
2453  // Common case: small offset, fits into instruction.
2454  MachineOperand &ImmOp = MI.getOperand(ImmIdx);
2455  int ImmedOffset = Offset / Scale;
2456  unsigned Mask = (1 << NumBits) - 1;
2457  if ((unsigned)Offset <= Mask * Scale) {
2458  // Replace the FrameIndex with sp
2459  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2460  // FIXME: When addrmode2 goes away, this will simplify (like the
2461  // T2 version), as the LDR.i12 versions don't need the encoding
2462  // tricks for the offset value.
2463  if (isSub) {
2464  if (AddrMode == ARMII::AddrMode_i12)
2465  ImmedOffset = -ImmedOffset;
2466  else
2467  ImmedOffset |= 1 << NumBits;
2468  }
2469  ImmOp.ChangeToImmediate(ImmedOffset);
2470  Offset = 0;
2471  return true;
2472  }
2473 
2474  // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2475  ImmedOffset = ImmedOffset & Mask;
2476  if (isSub) {
2477  if (AddrMode == ARMII::AddrMode_i12)
2478  ImmedOffset = -ImmedOffset;
2479  else
2480  ImmedOffset |= 1 << NumBits;
2481  }
2482  ImmOp.ChangeToImmediate(ImmedOffset);
2483  Offset &= ~(Mask*Scale);
2484  }
2485  }
2486 
2487  Offset = (isSub) ? -Offset : Offset;
2488  return Offset == 0;
2489 }
2490 
2491 /// analyzeCompare - For a comparison instruction, return the source registers
2492 /// in SrcReg and SrcReg2 if having two register operands, and the value it
2493 /// compares against in CmpValue. Return true if the comparison instruction
2494 /// can be analyzed.
2495 bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
2496  unsigned &SrcReg2, int &CmpMask,
2497  int &CmpValue) const {
2498  switch (MI.getOpcode()) {
2499  default: break;
2500  case ARM::CMPri:
2501  case ARM::t2CMPri:
2502  case ARM::tCMPi8:
2503  SrcReg = MI.getOperand(0).getReg();
2504  SrcReg2 = 0;
2505  CmpMask = ~0;
2506  CmpValue = MI.getOperand(1).getImm();
2507  return true;
2508  case ARM::CMPrr:
2509  case ARM::t2CMPrr:
2510  SrcReg = MI.getOperand(0).getReg();
2511  SrcReg2 = MI.getOperand(1).getReg();
2512  CmpMask = ~0;
2513  CmpValue = 0;
2514  return true;
2515  case ARM::TSTri:
2516  case ARM::t2TSTri:
2517  SrcReg = MI.getOperand(0).getReg();
2518  SrcReg2 = 0;
2519  CmpMask = MI.getOperand(1).getImm();
2520  CmpValue = 0;
2521  return true;
2522  }
2523 
2524  return false;
2525 }
2526 
2527 /// isSuitableForMask - Identify a suitable 'and' instruction that
2528 /// operates on the given source register and applies the same mask
2529 /// as a 'tst' instruction. Provide a limited look-through for copies.
2530 /// When successful, MI will hold the found instruction.
2531 static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
2532  int CmpMask, bool CommonUse) {
2533  switch (MI->getOpcode()) {
2534  case ARM::ANDri:
2535  case ARM::t2ANDri:
2536  if (CmpMask != MI->getOperand(2).getImm())
2537  return false;
2538  if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
2539  return true;
2540  break;
2541  }
2542 
2543  return false;
2544 }
2545 
2546 /// getSwappedCondition - assume the flags are set by MI(a,b), return
2547 /// the condition code if we modify the instructions such that flags are
2548 /// set by MI(b,a).
2550  switch (CC) {
2551  default: return ARMCC::AL;
2552  case ARMCC::EQ: return ARMCC::EQ;
2553  case ARMCC::NE: return ARMCC::NE;
2554  case ARMCC::HS: return ARMCC::LS;
2555  case ARMCC::LO: return ARMCC::HI;
2556  case ARMCC::HI: return ARMCC::LO;
2557  case ARMCC::LS: return ARMCC::HS;
2558  case ARMCC::GE: return ARMCC::LE;
2559  case ARMCC::LT: return ARMCC::GT;
2560  case ARMCC::GT: return ARMCC::LT;
2561  case ARMCC::LE: return ARMCC::GE;
2562  }
2563 }
2564 
2565 /// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
2566 /// the condition code if we modify the instructions such that flags are
2567 /// set by ADD(a,b,X).
2569  switch (CC) {
2570  default: return ARMCC::AL;
2571  case ARMCC::HS: return ARMCC::LO;
2572  case ARMCC::LO: return ARMCC::HS;
2573  case ARMCC::VS: return ARMCC::VS;
2574  case ARMCC::VC: return ARMCC::VC;
2575  }
2576 }
2577 
2578 /// isRedundantFlagInstr - check whether the first instruction, whose only
2579 /// purpose is to update flags, can be made redundant.
2580 /// CMPrr can be made redundant by SUBrr if the operands are the same.
2581 /// CMPri can be made redundant by SUBri if the operands are the same.
2582 /// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
2583 /// This function can be extended later on.
2584 inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
2585  unsigned SrcReg, unsigned SrcReg2,
2586  int ImmValue, const MachineInstr *OI) {
2587  if ((CmpI->getOpcode() == ARM::CMPrr ||
2588  CmpI->getOpcode() == ARM::t2CMPrr) &&
2589  (OI->getOpcode() == ARM::SUBrr ||
2590  OI->getOpcode() == ARM::t2SUBrr) &&
2591  ((OI->getOperand(1).getReg() == SrcReg &&
2592  OI->getOperand(2).getReg() == SrcReg2) ||
2593  (OI->getOperand(1).getReg() == SrcReg2 &&
2594  OI->getOperand(2).getReg() == SrcReg)))
2595  return true;
2596 
2597  if ((CmpI->getOpcode() == ARM::CMPri ||
2598  CmpI->getOpcode() == ARM::t2CMPri) &&
2599  (OI->getOpcode() == ARM::SUBri ||
2600  OI->getOpcode() == ARM::t2SUBri) &&
2601  OI->getOperand(1).getReg() == SrcReg &&
2602  OI->getOperand(2).getImm() == ImmValue)
2603  return true;
2604 
2605  if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2606  (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
2607  OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
2608  OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
2609  OI->getOperand(0).getReg() == SrcReg &&
2610  OI->getOperand(1).getReg() == SrcReg2)
2611  return true;
2612  return false;
2613 }
2614 
2615 static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
2616  switch (MI->getOpcode()) {
2617  default: return false;
2618  case ARM::tLSLri:
2619  case ARM::tLSRri:
2620  case ARM::tLSLrr:
2621  case ARM::tLSRrr:
2622  case ARM::tSUBrr:
2623  case ARM::tADDrr:
2624  case ARM::tADDi3:
2625  case ARM::tADDi8:
2626  case ARM::tSUBi3:
2627  case ARM::tSUBi8:
2628  case ARM::tMUL:
2629  IsThumb1 = true;
2631  case ARM::RSBrr:
2632  case ARM::RSBri:
2633  case ARM::RSCrr:
2634  case ARM::RSCri:
2635  case ARM::ADDrr:
2636  case ARM::ADDri:
2637  case ARM::ADCrr:
2638  case ARM::ADCri:
2639  case ARM::SUBrr:
2640  case ARM::SUBri:
2641  case ARM::SBCrr:
2642  case ARM::SBCri:
2643  case ARM::t2RSBri:
2644  case ARM::t2ADDrr:
2645  case ARM::t2ADDri:
2646  case ARM::t2ADCrr:
2647  case ARM::t2ADCri:
2648  case ARM::t2SUBrr:
2649  case ARM::t2SUBri:
2650  case ARM::t2SBCrr:
2651  case ARM::t2SBCri:
2652  case ARM::ANDrr:
2653  case ARM::ANDri:
2654  case ARM::t2ANDrr:
2655  case ARM::t2ANDri:
2656  case ARM::ORRrr:
2657  case ARM::ORRri:
2658  case ARM::t2ORRrr:
2659  case ARM::t2ORRri:
2660  case ARM::EORrr:
2661  case ARM::EORri:
2662  case ARM::t2EORrr:
2663  case ARM::t2EORri:
2664  case ARM::t2LSRri:
2665  case ARM::t2LSRrr:
2666  case ARM::t2LSLri:
2667  case ARM::t2LSLrr:
2668  return true;
2669  }
2670 }
2671 
2672 /// optimizeCompareInstr - Convert the instruction supplying the argument to the
2673 /// comparison into one that sets the zero bit in the flags register;
2674 /// Remove a redundant Compare instruction if an earlier instruction can set the
2675 /// flags in the same way as Compare.
2676 /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
2677 /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
2678 /// condition code of instructions which use the flags.
2680  MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
2681  int CmpValue, const MachineRegisterInfo *MRI) const {
2682  // Get the unique definition of SrcReg.
2683  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2684  if (!MI) return false;
2685 
2686  // Masked compares sometimes use the same register as the corresponding 'and'.
2687  if (CmpMask != ~0) {
2688  if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) {
2689  MI = nullptr;
2691  UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
2692  UI != UE; ++UI) {
2693  if (UI->getParent() != CmpInstr.getParent())
2694  continue;
2695  MachineInstr *PotentialAND = &*UI;
2696  if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
2697  isPredicated(*PotentialAND))
2698  continue;
2699  MI = PotentialAND;
2700  break;
2701  }
2702  if (!MI) return false;
2703  }
2704  }
2705 
2706  // Get ready to iterate backward from CmpInstr.
2707  MachineBasicBlock::iterator I = CmpInstr, E = MI,
2708  B = CmpInstr.getParent()->begin();
2709 
2710  // Early exit if CmpInstr is at the beginning of the BB.
2711  if (I == B) return false;
2712 
2713  // There are two possible candidates which can be changed to set CPSR:
2714  // One is MI, the other is a SUB or ADD instruction.
2715  // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
2716  // ADDr[ri](r1, r2, X).
2717  // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
2718  MachineInstr *SubAdd = nullptr;
2719  if (SrcReg2 != 0)
2720  // MI is not a candidate for CMPrr.
2721  MI = nullptr;
2722  else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
2723  // Conservatively refuse to convert an instruction which isn't in the same
2724  // BB as the comparison.
2725  // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
2726  // Thus we cannot return here.
2727  if (CmpInstr.getOpcode() == ARM::CMPri ||
2728  CmpInstr.getOpcode() == ARM::t2CMPri)
2729  MI = nullptr;
2730  else
2731  return false;
2732  }
2733 
2734  bool IsThumb1 = false;
2735  if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
2736  return false;
2737 
2738  // We also want to do this peephole for cases like this: if (a*b == 0),
2739  // and optimise away the CMP instruction from the generated code sequence:
2740  // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
2741  // resulting from the select instruction, but these MOVS instructions for
2742  // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
2743  // However, if we only have MOVS instructions in between the CMP and the
2744  // other instruction (the MULS in this example), then the CPSR is dead so we
2745  // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
2746  // reordering and then continue the analysis hoping we can eliminate the
2747  // CMP. This peephole works on the vregs, so is still in SSA form. As a
2748  // consequence, the movs won't redefine/kill the MUL operands which would
2749  // make this reordering illegal.
2750  if (MI && IsThumb1) {
2751  --I;
2752  bool CanReorder = true;
2753  const bool HasStmts = I != E;
2754  for (; I != E; --I) {
2755  if (I->getOpcode() != ARM::tMOVi8) {
2756  CanReorder = false;
2757  break;
2758  }
2759  }
2760  if (HasStmts && CanReorder) {
2761  MI = MI->removeFromParent();
2762  E = CmpInstr;
2763  CmpInstr.getParent()->insert(E, MI);
2764  }
2765  I = CmpInstr;
2766  E = MI;
2767  }
2768 
2769  // Check that CPSR isn't set between the comparison instruction and the one we
2770  // want to change. At the same time, search for SubAdd.
2771  const TargetRegisterInfo *TRI = &getRegisterInfo();
2772  do {
2773  const MachineInstr &Instr = *--I;
2774 
2775  // Check whether CmpInstr can be made redundant by the current instruction.
2776  if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr)) {
2777  SubAdd = &*I;
2778  break;
2779  }
2780 
2781  // Allow E (which was initially MI) to be SubAdd but do not search before E.
2782  if (I == E)
2783  break;
2784 
2785  if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
2786  Instr.readsRegister(ARM::CPSR, TRI))
2787  // This instruction modifies or uses CPSR after the one we want to
2788  // change. We can't do this transformation.
2789  return false;
2790 
2791  } while (I != B);
2792 
2793  // Return false if no candidates exist.
2794  if (!MI && !SubAdd)
2795  return false;
2796 
2797  // The single candidate is called MI.
2798  if (!MI) MI = SubAdd;
2799 
2800  // We can't use a predicated instruction - it doesn't always write the flags.
2801  if (isPredicated(*MI))
2802  return false;
2803 
2804  // Scan forward for the use of CPSR
2805  // When checking against MI: if it's a conditional code that requires
2806  // checking of the V bit or C bit, then this is not safe to do.
2807  // It is safe to remove CmpInstr if CPSR is redefined or killed.
2808  // If we are done with the basic block, we need to check whether CPSR is
2809  // live-out.
2811  OperandsToUpdate;
2812  bool isSafe = false;
2813  I = CmpInstr;
2814  E = CmpInstr.getParent()->end();
2815  while (!isSafe && ++I != E) {
2816  const MachineInstr &Instr = *I;
2817  for (unsigned IO = 0, EO = Instr.getNumOperands();
2818  !isSafe && IO != EO; ++IO) {
2819  const MachineOperand &MO = Instr.getOperand(IO);
2820  if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
2821  isSafe = true;
2822  break;
2823  }
2824  if (!MO.isReg() || MO.getReg() != ARM::CPSR)
2825  continue;
2826  if (MO.isDef()) {
2827  isSafe = true;
2828  break;
2829  }
2830  // Condition code is after the operand before CPSR except for VSELs.
2831  ARMCC::CondCodes CC;
2832  bool IsInstrVSel = true;
2833  switch (Instr.getOpcode()) {
2834  default:
2835  IsInstrVSel = false;
2836  CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
2837  break;
2838  case ARM::VSELEQD:
2839  case ARM::VSELEQS:
2840  CC = ARMCC::EQ;
2841  break;
2842  case ARM::VSELGTD:
2843  case ARM::VSELGTS:
2844  CC = ARMCC::GT;
2845  break;
2846  case ARM::VSELGED:
2847  case ARM::VSELGES:
2848  CC = ARMCC::GE;
2849  break;
2850  case ARM::VSELVSS:
2851  case ARM::VSELVSD:
2852  CC = ARMCC::VS;
2853  break;
2854  }
2855 
2856  if (SubAdd) {
2857  // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
2858  // on CMP needs to be updated to be based on SUB.
2859  // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
2860  // needs to be modified.
2861  // Push the condition code operands to OperandsToUpdate.
2862  // If it is safe to remove CmpInstr, the condition code of these
2863  // operands will be modified.
2864  unsigned Opc = SubAdd->getOpcode();
2865  bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
2866  Opc == ARM::SUBri || Opc == ARM::t2SUBri;
2867  if (!IsSub || (SrcReg2 != 0 && SubAdd->getOperand(1).getReg() == SrcReg2 &&
2868  SubAdd->getOperand(2).getReg() == SrcReg)) {
2869  // VSel doesn't support condition code update.
2870  if (IsInstrVSel)
2871  return false;
2872  // Ensure we can swap the condition.
2873  ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC));
2874  if (NewCC == ARMCC::AL)
2875  return false;
2876  OperandsToUpdate.push_back(
2877  std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
2878  }
2879  } else {
2880  // No SubAdd, so this is x = <op> y, z; cmp x, 0.
2881  switch (CC) {
2882  case ARMCC::EQ: // Z
2883  case ARMCC::NE: // Z
2884  case ARMCC::MI: // N
2885  case ARMCC::PL: // N
2886  case ARMCC::AL: // none
2887  // CPSR can be used multiple times, we should continue.
2888  break;
2889  case ARMCC::HS: // C
2890  case ARMCC::LO: // C
2891  case ARMCC::VS: // V
2892  case ARMCC::VC: // V
2893  case ARMCC::HI: // C Z
2894  case ARMCC::LS: // C Z
2895  case ARMCC::GE: // N V
2896  case ARMCC::LT: // N V
2897  case ARMCC::GT: // Z N V
2898  case ARMCC::LE: // Z N V
2899  // The instruction uses the V bit or C bit which is not safe.
2900  return false;
2901  }
2902  }
2903  }
2904  }
2905 
2906  // If CPSR is not killed nor re-defined, we should check whether it is
2907  // live-out. If it is live-out, do not optimize.
2908  if (!isSafe) {
2909  MachineBasicBlock *MBB = CmpInstr.getParent();
2911  SE = MBB->succ_end(); SI != SE; ++SI)
2912  if ((*SI)->isLiveIn(ARM::CPSR))
2913  return false;
2914  }
2915 
2916  // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
2917  // set CPSR so this is represented as an explicit output)
2918  if (!IsThumb1) {
2919  MI->getOperand(5).setReg(ARM::CPSR);
2920  MI->getOperand(5).setIsDef(true);
2921  }
2922  assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
2923  CmpInstr.eraseFromParent();
2924 
2925  // Modify the condition code of operands in OperandsToUpdate.
2926  // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
2927  // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
2928  for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
2929  OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
2930 
2931  return true;
2932 }
2933 
2935  // Do not sink MI if it might be used to optimize a redundant compare.
2936  // We heuristically only look at the instruction immediately following MI to
2937  // avoid potentially searching the entire basic block.
2938  if (isPredicated(MI))
2939  return true;
2941  ++Next;
2942  unsigned SrcReg, SrcReg2;
2943  int CmpMask, CmpValue;
2944  if (Next != MI.getParent()->end() &&
2945  analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
2946  isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI))
2947  return false;
2948  return true;
2949 }
2950 
2952  unsigned Reg,
2953  MachineRegisterInfo *MRI) const {
2954  // Fold large immediates into add, sub, or, xor.
2955  unsigned DefOpc = DefMI.getOpcode();
2956  if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
2957  return false;
2958  if (!DefMI.getOperand(1).isImm())
2959  // Could be t2MOVi32imm @xx
2960  return false;
2961 
2962  if (!MRI->hasOneNonDBGUse(Reg))
2963  return false;
2964 
2965  const MCInstrDesc &DefMCID = DefMI.getDesc();
2966  if (DefMCID.hasOptionalDef()) {
2967  unsigned NumOps = DefMCID.getNumOperands();
2968  const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
2969  if (MO.getReg() == ARM::CPSR && !MO.isDead())
2970  // If DefMI defines CPSR and it is not dead, it's obviously not safe
2971  // to delete DefMI.
2972  return false;
2973  }
2974 
2975  const MCInstrDesc &UseMCID = UseMI.getDesc();
2976  if (UseMCID.hasOptionalDef()) {
2977  unsigned NumOps = UseMCID.getNumOperands();
2978  if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
2979  // If the instruction sets the flag, do not attempt this optimization
2980  // since it may change the semantics of the code.
2981  return false;
2982  }
2983 
2984  unsigned UseOpc = UseMI.getOpcode();
2985  unsigned NewUseOpc = 0;
2986  uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();
2987  uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
2988  bool Commute = false;
2989  switch (UseOpc) {
2990  default: return false;
2991  case ARM::SUBrr:
2992  case ARM::ADDrr:
2993  case ARM::ORRrr:
2994  case ARM::EORrr:
2995  case ARM::t2SUBrr:
2996  case ARM::t2ADDrr:
2997  case ARM::t2ORRrr:
2998  case ARM::t2EORrr: {
2999  Commute = UseMI.getOperand(2).getReg() != Reg;
3000  switch (UseOpc) {
3001  default: break;
3002  case ARM::ADDrr:
3003  case ARM::SUBrr:
3004  if (UseOpc == ARM::SUBrr && Commute)
3005  return false;
3006 
3007  // ADD/SUB are special because they're essentially the same operation, so
3008  // we can handle a larger range of immediates.
3009  if (ARM_AM::isSOImmTwoPartVal(ImmVal))
3010  NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
3011  else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
3012  ImmVal = -ImmVal;
3013  NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
3014  } else
3015  return false;
3016  SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3017  SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3018  break;
3019  case ARM::ORRrr:
3020  case ARM::EORrr:
3021  if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
3022  return false;
3023  SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3024  SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3025  switch (UseOpc) {
3026  default: break;
3027  case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
3028  case ARM::EORrr: NewUseOpc = ARM::EORri; break;
3029  }
3030  break;
3031  case ARM::t2ADDrr:
3032  case ARM::t2SUBrr:
3033  if (UseOpc == ARM::t2SUBrr && Commute)
3034  return false;
3035 
3036  // ADD/SUB are special because they're essentially the same operation, so
3037  // we can handle a larger range of immediates.
3038  if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3039  NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2ADDri : ARM::t2SUBri;
3040  else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
3041  ImmVal = -ImmVal;
3042  NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2SUBri : ARM::t2ADDri;
3043  } else
3044  return false;
3045  SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3046  SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3047  break;
3048  case ARM::t2ORRrr:
3049  case ARM::t2EORrr:
3050  if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3051  return false;
3052  SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3053  SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3054  switch (UseOpc) {
3055  default: break;
3056  case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
3057  case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
3058  }
3059  break;
3060  }
3061  }
3062  }
3063 
3064  unsigned OpIdx = Commute ? 2 : 1;
3065  unsigned Reg1 = UseMI.getOperand(OpIdx).getReg();
3066  bool isKill = UseMI.getOperand(OpIdx).isKill();
3067  unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
3068  BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
3069  NewReg)
3070  .addReg(Reg1, getKillRegState(isKill))
3071  .addImm(SOImmValV1)
3072  .add(predOps(ARMCC::AL))
3073  .add(condCodeOp());
3074  UseMI.setDesc(get(NewUseOpc));
3075  UseMI.getOperand(1).setReg(NewReg);
3076  UseMI.getOperand(1).setIsKill();
3077  UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
3078  DefMI.eraseFromParent();
3079  return true;
3080 }
3081 
3082 static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
3083  const MachineInstr &MI) {
3084  switch (MI.getOpcode()) {
3085  default: {
3086  const MCInstrDesc &Desc = MI.getDesc();
3087  int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
3088  assert(UOps >= 0 && "bad # UOps");
3089  return UOps;
3090  }
3091 
3092  case ARM::LDRrs:
3093  case ARM::LDRBrs:
3094  case ARM::STRrs:
3095  case ARM::STRBrs: {
3096  unsigned ShOpVal = MI.getOperand(3).getImm();
3097  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3098  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3099  if (!isSub &&
3100  (ShImm == 0 ||
3101  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3102  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3103  return 1;
3104  return 2;
3105  }
3106 
3107  case ARM::LDRH:
3108  case ARM::STRH: {
3109  if (!MI.getOperand(2).getReg())
3110  return 1;
3111 
3112  unsigned ShOpVal = MI.getOperand(3).getImm();
3113  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3114  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3115  if (!isSub &&
3116  (ShImm == 0 ||
3117  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3118  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3119  return 1;
3120  return 2;
3121  }
3122 
3123  case ARM::LDRSB:
3124  case ARM::LDRSH:
3125  return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;
3126 
3127  case ARM::LDRSB_POST:
3128  case ARM::LDRSH_POST: {
3129  unsigned Rt = MI.getOperand(0).getReg();
3130  unsigned Rm = MI.getOperand(3).getReg();
3131  return (Rt == Rm) ? 4 : 3;
3132  }
3133 
3134  case ARM::LDR_PRE_REG:
3135  case ARM::LDRB_PRE_REG: {
3136  unsigned Rt = MI.getOperand(0).getReg();
3137  unsigned Rm = MI.getOperand(3).getReg();
3138  if (Rt == Rm)
3139  return 3;
3140  unsigned ShOpVal = MI.getOperand(4).getImm();
3141  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3142  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3143  if (!isSub &&
3144  (ShImm == 0 ||
3145  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3146  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3147  return 2;
3148  return 3;
3149  }
3150 
3151  case ARM::STR_PRE_REG:
3152  case ARM::STRB_PRE_REG: {
3153  unsigned ShOpVal = MI.getOperand(4).getImm();
3154  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3155  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3156  if (!isSub &&
3157  (ShImm == 0 ||
3158  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3159  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3160  return 2;
3161  return 3;
3162  }
3163 
3164  case ARM::LDRH_PRE:
3165  case ARM::STRH_PRE: {
3166  unsigned Rt = MI.getOperand(0).getReg();
3167  unsigned Rm = MI.getOperand(3).getReg();
3168  if (!Rm)
3169  return 2;
3170  if (Rt == Rm)
3171  return 3;
3172  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;
3173  }
3174 
3175  case ARM::LDR_POST_REG:
3176  case ARM::LDRB_POST_REG:
3177  case ARM::LDRH_POST: {
3178  unsigned Rt = MI.getOperand(0).getReg();
3179  unsigned Rm = MI.getOperand(3).getReg();
3180  return (Rt == Rm) ? 3 : 2;
3181  }
3182 
3183  case ARM::LDR_PRE_IMM:
3184  case ARM::LDRB_PRE_IMM:
3185  case ARM::LDR_POST_IMM:
3186  case ARM::LDRB_POST_IMM:
3187  case ARM::STRB_POST_IMM:
3188  case ARM::STRB_POST_REG:
3189  case ARM::STRB_PRE_IMM:
3190  case ARM::STRH_POST:
3191  case ARM::STR_POST_IMM:
3192  case ARM::STR_POST_REG:
3193  case ARM::STR_PRE_IMM:
3194  return 2;
3195 
3196  case ARM::LDRSB_PRE:
3197  case ARM::LDRSH_PRE: {
3198  unsigned Rm = MI.getOperand(3).getReg();
3199  if (Rm == 0)
3200  return 3;
3201  unsigned Rt = MI.getOperand(0).getReg();
3202  if (Rt == Rm)
3203  return 4;
3204  unsigned ShOpVal = MI.getOperand(4).getImm();
3205  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3206  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3207  if (!isSub &&
3208  (ShImm == 0 ||
3209  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3210  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3211  return 3;
3212  return 4;
3213  }
3214 
3215  case ARM::LDRD: {
3216  unsigned Rt = MI.getOperand(0).getReg();
3217  unsigned Rn = MI.getOperand(2).getReg();
3218  unsigned Rm = MI.getOperand(3).getReg();
3219  if (Rm)
3220  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3221  : 3;
3222  return (Rt == Rn) ? 3 : 2;
3223  }
3224 
3225  case ARM::STRD: {
3226  unsigned Rm = MI.getOperand(3).getReg();
3227  if (Rm)
3228  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3229  : 3;
3230  return 2;
3231  }
3232 
3233  case ARM::LDRD_POST:
3234  case ARM::t2LDRD_POST:
3235  return 3;
3236 
3237  case ARM::STRD_POST:
3238  case ARM::t2STRD_POST:
3239  return 4;
3240 
3241  case ARM::LDRD_PRE: {
3242  unsigned Rt = MI.getOperand(0).getReg();
3243  unsigned Rn = MI.getOperand(3).getReg();
3244  unsigned Rm = MI.getOperand(4).getReg();
3245  if (Rm)
3246  return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3247  : 4;
3248  return (Rt == Rn) ? 4 : 3;
3249  }
3250 
3251  case ARM::t2LDRD_PRE: {
3252  unsigned Rt = MI.getOperand(0).getReg();
3253  unsigned Rn = MI.getOperand(3).getReg();
3254  return (Rt == Rn) ? 4 : 3;
3255  }
3256 
3257  case ARM::STRD_PRE: {
3258  unsigned Rm = MI.getOperand(4).getReg();
3259  if (Rm)
3260  return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3261  : 4;
3262  return 3;
3263  }
3264 
3265  case ARM::t2STRD_PRE:
3266  return 3;
3267 
3268  case ARM::t2LDR_POST:
3269  case ARM::t2LDRB_POST:
3270  case ARM::t2LDRB_PRE:
3271  case ARM::t2LDRSBi12:
3272  case ARM::t2LDRSBi8:
3273  case ARM::t2LDRSBpci:
3274  case ARM::t2LDRSBs:
3275  case ARM::t2LDRH_POST:
3276  case ARM::t2LDRH_PRE:
3277  case ARM::t2LDRSBT:
3278  case ARM::t2LDRSB_POST:
3279  case ARM::t2LDRSB_PRE:
3280  case ARM::t2LDRSH_POST:
3281  case ARM::t2LDRSH_PRE:
3282  case ARM::t2LDRSHi12:
3283  case ARM::t2LDRSHi8:
3284  case ARM::t2LDRSHpci:
3285  case ARM::t2LDRSHs:
3286  return 2;
3287 
3288  case ARM::t2LDRDi8: {
3289  unsigned Rt = MI.getOperand(0).getReg();
3290  unsigned Rn = MI.getOperand(2).getReg();
3291  return (Rt == Rn) ? 3 : 2;
3292  }
3293 
3294  case ARM::t2STRB_POST:
3295  case ARM::t2STRB_PRE:
3296  case ARM::t2STRBs:
3297  case ARM::t2STRDi8:
3298  case ARM::t2STRH_POST:
3299  case ARM::t2STRH_PRE:
3300  case ARM::t2STRHs:
3301  case ARM::t2STR_POST:
3302  case ARM::t2STR_PRE:
3303  case ARM::t2STRs:
3304  return 2;
3305  }
3306 }
3307 
3308 // Return the number of 32-bit words loaded by LDM or stored by STM. If this
3309 // can't be easily determined return 0 (missing MachineMemOperand).
3310 //
3311 // FIXME: The current MachineInstr design does not support relying on machine
3312 // mem operands to determine the width of a memory access. Instead, we expect
3313 // the target to provide this information based on the instruction opcode and
3314 // operands. However, using MachineMemOperand is the best solution now for
3315 // two reasons:
3316 //
3317 // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
3318 // operands. This is much more dangerous than using the MachineMemOperand
3319 // sizes because CodeGen passes can insert/remove optional machine operands. In
3320 // fact, it's totally incorrect for preRA passes and appears to be wrong for
3321 // postRA passes as well.
3322 //
3323 // 2) getNumLDMAddresses is only used by the scheduling machine model and any
3324 // machine model that calls this should handle the unknown (zero size) case.
3325 //
3326 // Long term, we should require a target hook that verifies MachineMemOperand
3327 // sizes during MC lowering. That target hook should be local to MC lowering
3328 // because we can't ensure that it is aware of other MI forms. Doing this will
3329 // ensure that MachineMemOperands are correctly propagated through all passes.
3331  unsigned Size = 0;
3333  E = MI.memoperands_end();
3334  I != E; ++I) {
3335  Size += (*I)->getSize();
3336  }
3337  return Size / 4;
3338 }
3339 
3340 static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,
3341  unsigned NumRegs) {
3342  unsigned UOps = 1 + NumRegs; // 1 for address computation.
3343  switch (Opc) {
3344  default:
3345  break;
3346  case ARM::VLDMDIA_UPD:
3347  case ARM::VLDMDDB_UPD:
3348  case ARM::VLDMSIA_UPD:
3349  case ARM::VLDMSDB_UPD:
3350  case ARM::VSTMDIA_UPD:
3351  case ARM::VSTMDDB_UPD:
3352  case ARM::VSTMSIA_UPD:
3353  case ARM::VSTMSDB_UPD:
3354  case ARM::LDMIA_UPD:
3355  case ARM::LDMDA_UPD:
3356  case ARM::LDMDB_UPD:
3357  case ARM::LDMIB_UPD:
3358  case ARM::STMIA_UPD:
3359  case ARM::STMDA_UPD:
3360  case ARM::STMDB_UPD:
3361  case ARM::STMIB_UPD:
3362  case ARM::tLDMIA_UPD:
3363  case ARM::tSTMIA_UPD:
3364  case ARM::t2LDMIA_UPD:
3365  case ARM::t2LDMDB_UPD:
3366  case ARM::t2STMIA_UPD:
3367  case ARM::t2STMDB_UPD:
3368  ++UOps; // One for base register writeback.
3369  break;
3370  case ARM::LDMIA_RET:
3371  case ARM::tPOP_RET:
3372  case ARM::t2LDMIA_RET:
3373  UOps += 2; // One for base reg wb, one for write to pc.
3374  break;
3375  }
3376  return UOps;
3377 }
3378 
3380  const MachineInstr &MI) const {
3381  if (!ItinData || ItinData->isEmpty())
3382  return 1;
3383 
3384  const MCInstrDesc &Desc = MI.getDesc();
3385  unsigned Class = Desc.getSchedClass();
3386  int ItinUOps = ItinData->getNumMicroOps(Class);
3387  if (ItinUOps >= 0) {
3388  if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
3389  return getNumMicroOpsSwiftLdSt(ItinData, MI);
3390 
3391  return ItinUOps;
3392  }
3393 
3394  unsigned Opc = MI.getOpcode();
3395  switch (Opc) {
3396  default:
3397  llvm_unreachable("Unexpected multi-uops instruction!");
3398  case ARM::VLDMQIA:
3399  case ARM::VSTMQIA:
3400  return 2;
3401 
3402  // The number of uOps for load / store multiple are determined by the number
3403  // registers.
3404  //
3405  // On Cortex-A8, each pair of register loads / stores can be scheduled on the
3406  // same cycle. The scheduling for the first load / store must be done
3407  // separately by assuming the address is not 64-bit aligned.
3408  //
3409  // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
3410  // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
3411  // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
3412  case ARM::VLDMDIA:
3413  case ARM::VLDMDIA_UPD:
3414  case ARM::VLDMDDB_UPD:
3415  case ARM::VLDMSIA:
3416  case ARM::VLDMSIA_UPD:
3417  case ARM::VLDMSDB_UPD:
3418  case ARM::VSTMDIA:
3419  case ARM::VSTMDIA_UPD:
3420  case ARM::VSTMDDB_UPD:
3421  case ARM::VSTMSIA:
3422  case ARM::VSTMSIA_UPD:
3423  case ARM::VSTMSDB_UPD: {
3424  unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
3425  return (NumRegs / 2) + (NumRegs % 2) + 1;
3426  }
3427 
3428  case ARM::LDMIA_RET:
3429  case ARM::LDMIA:
3430  case ARM::LDMDA:
3431  case ARM::LDMDB:
3432  case ARM::LDMIB:
3433  case ARM::LDMIA_UPD:
3434  case ARM::LDMDA_UPD:
3435  case ARM::LDMDB_UPD:
3436  case ARM::LDMIB_UPD:
3437  case ARM::STMIA:
3438  case ARM::STMDA:
3439  case ARM::STMDB:
3440  case ARM::STMIB:
3441  case ARM::STMIA_UPD:
3442  case ARM::STMDA_UPD:
3443  case ARM::STMDB_UPD:
3444  case ARM::STMIB_UPD:
3445  case ARM::tLDMIA:
3446  case ARM::tLDMIA_UPD:
3447  case ARM::tSTMIA_UPD:
3448  case ARM::tPOP_RET:
3449  case ARM::tPOP:
3450  case ARM::tPUSH:
3451  case ARM::t2LDMIA_RET:
3452  case ARM::t2LDMIA:
3453  case ARM::t2LDMDB:
3454  case ARM::t2LDMIA_UPD:
3455  case ARM::t2LDMDB_UPD:
3456  case ARM::t2STMIA:
3457  case ARM::t2STMDB:
3458  case ARM::t2STMIA_UPD:
3459  case ARM::t2STMDB_UPD: {
3460  unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
3461  switch (Subtarget.getLdStMultipleTiming()) {
3463  return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs);
3465  // Assume the worst.
3466  return NumRegs;
3468  if (NumRegs < 4)
3469  return 2;
3470  // 4 registers would be issued: 2, 2.
3471  // 5 registers would be issued: 2, 2, 1.
3472  unsigned UOps = (NumRegs / 2);
3473  if (NumRegs % 2)
3474  ++UOps;
3475  return UOps;
3476  }
3478  unsigned UOps = (NumRegs / 2);
3479  // If there are odd number of registers or if it's not 64-bit aligned,
3480  // then it takes an extra AGU (Address Generation Unit) cycle.
3481  if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
3482  (*MI.memoperands_begin())->getAlignment() < 8)
3483  ++UOps;
3484  return UOps;
3485  }
3486  }
3487  }
3488  }
3489  llvm_unreachable("Didn't find the number of microops");
3490 }
3491 
3492 int
3493 ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
3494  const MCInstrDesc &DefMCID,
3495  unsigned DefClass,
3496  unsigned DefIdx, unsigned DefAlign) const {
3497  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3498  if (RegNo <= 0)
3499  // Def is the address writeback.
3500  return ItinData->getOperandCycle(DefClass, DefIdx);
3501 
3502  int DefCycle;
3503  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3504  // (regno / 2) + (regno % 2) + 1
3505  DefCycle = RegNo / 2 + 1;
3506  if (RegNo % 2)
3507  ++DefCycle;
3508  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3509  DefCycle = RegNo;
3510  bool isSLoad = false;
3511 
3512  switch (DefMCID.getOpcode()) {
3513  default: break;
3514  case ARM::VLDMSIA:
3515  case ARM::VLDMSIA_UPD:
3516  case ARM::VLDMSDB_UPD:
3517  isSLoad = true;
3518  break;
3519  }
3520 
3521  // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3522  // then it takes an extra cycle.
3523  if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
3524  ++DefCycle;
3525  } else {
3526  // Assume the worst.
3527  DefCycle = RegNo + 2;
3528  }
3529 
3530  return DefCycle;
3531 }
3532 
3534  unsigned BaseReg = MI.getOperand(0).getReg();
3535  for (unsigned i = 1, sz = MI.getNumOperands(); i < sz; ++i) {
3536  const auto &Op = MI.getOperand(i);
3537  if (Op.isReg() && Op.getReg() == BaseReg)
3538  return true;
3539  }
3540  return false;
3541 }
3542 unsigned
3544  // ins GPR:$Rn, $p (2xOp), reglist:$regs, variable_ops
3545  // (outs GPR:$wb), (ins GPR:$Rn, $p (2xOp), reglist:$regs, variable_ops)
3546  return MI.getNumOperands() + 1 - MI.getDesc().getNumOperands();
3547 }
3548 
3549 int
3550 ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
3551  const MCInstrDesc &DefMCID,
3552  unsigned DefClass,
3553  unsigned DefIdx, unsigned DefAlign) const {
3554  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3555  if (RegNo <= 0)
3556  // Def is the address writeback.
3557  return ItinData->getOperandCycle(DefClass, DefIdx);
3558 
3559  int DefCycle;
3560  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3561  // 4 registers would be issued: 1, 2, 1.
3562  // 5 registers would be issued: 1, 2, 2.
3563  DefCycle = RegNo / 2;
3564  if (DefCycle < 1)
3565  DefCycle = 1;
3566  // Result latency is issue cycle + 2: E2.
3567  DefCycle += 2;
3568  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3569  DefCycle = (RegNo / 2);
3570  // If there are odd number of registers or if it's not 64-bit aligned,
3571  // then it takes an extra AGU (Address Generation Unit) cycle.
3572  if ((RegNo % 2) || DefAlign < 8)
3573  ++DefCycle;
3574  // Result latency is AGU cycles + 2.
3575  DefCycle += 2;
3576  } else {
3577  // Assume the worst.
3578  DefCycle = RegNo + 2;
3579  }
3580 
3581  return DefCycle;
3582 }
3583 
3584 int
3585 ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
3586  const MCInstrDesc &UseMCID,
3587  unsigned UseClass,
3588  unsigned UseIdx, unsigned UseAlign) const {
3589  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3590  if (RegNo <= 0)
3591  return ItinData->getOperandCycle(UseClass, UseIdx);
3592 
3593  int UseCycle;
3594  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3595  // (regno / 2) + (regno % 2) + 1
3596  UseCycle = RegNo / 2 + 1;
3597  if (RegNo % 2)
3598  ++UseCycle;
3599  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3600  UseCycle = RegNo;
3601  bool isSStore = false;
3602 
3603  switch (UseMCID.getOpcode()) {
3604  default: break;
3605  case ARM::VSTMSIA:
3606  case ARM::VSTMSIA_UPD:
3607  case ARM::VSTMSDB_UPD:
3608  isSStore = true;
3609  break;
3610  }
3611 
3612  // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3613  // then it takes an extra cycle.
3614  if ((isSStore && (RegNo % 2)) || UseAlign < 8)
3615  ++UseCycle;
3616  } else {
3617  // Assume the worst.
3618  UseCycle = RegNo + 2;
3619  }
3620 
3621  return UseCycle;
3622 }
3623 
3624 int
3625 ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
3626  const MCInstrDesc &UseMCID,
3627  unsigned UseClass,
3628  unsigned UseIdx, unsigned UseAlign) const {
3629  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3630  if (RegNo <= 0)
3631  return ItinData->getOperandCycle(UseClass, UseIdx);
3632 
3633  int UseCycle;
3634  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3635  UseCycle = RegNo / 2;
3636  if (UseCycle < 2)
3637  UseCycle = 2;
3638  // Read in E3.
3639  UseCycle += 2;
3640  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3641  UseCycle = (RegNo / 2);
3642  // If there are odd number of registers or if it's not 64-bit aligned,
3643  // then it takes an extra AGU (Address Generation Unit) cycle.
3644  if ((RegNo % 2) || UseAlign < 8)
3645  ++UseCycle;
3646  } else {
3647  // Assume the worst.
3648  UseCycle = 1;
3649  }
3650  return UseCycle;
3651 }
3652 
3653 int
3655  const MCInstrDesc &DefMCID,
3656  unsigned DefIdx, unsigned DefAlign,
3657  const MCInstrDesc &UseMCID,
3658  unsigned UseIdx, unsigned UseAlign) const {
3659  unsigned DefClass = DefMCID.getSchedClass();
3660  unsigned UseClass = UseMCID.getSchedClass();
3661 
3662  if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
3663  return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
3664 
3665  // This may be a def / use of a variable_ops instruction, the operand
3666  // latency might be determinable dynamically. Let the target try to
3667  // figure it out.
3668  int DefCycle = -1;
3669  bool LdmBypass = false;
3670  switch (DefMCID.getOpcode()) {
3671  default:
3672  DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
3673  break;
3674 
3675  case ARM::VLDMDIA:
3676  case ARM::VLDMDIA_UPD:
3677  case ARM::VLDMDDB_UPD:
3678  case ARM::VLDMSIA:
3679  case ARM::VLDMSIA_UPD:
3680  case ARM::VLDMSDB_UPD:
3681  DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3682  break;
3683 
3684  case ARM::LDMIA_RET:
3685  case ARM::LDMIA:
3686  case ARM::LDMDA:
3687  case ARM::LDMDB:
3688  case ARM::LDMIB:
3689  case ARM::LDMIA_UPD:
3690  case ARM::LDMDA_UPD:
3691  case ARM::LDMDB_UPD:
3692  case ARM::LDMIB_UPD:
3693  case ARM::tLDMIA:
3694  case ARM::tLDMIA_UPD:
3695  case ARM::tPUSH:
3696  case ARM::t2LDMIA_RET:
3697  case ARM::t2LDMIA:
3698  case ARM::t2LDMDB:
3699  case ARM::t2LDMIA_UPD:
3700  case ARM::t2LDMDB_UPD:
3701  LdmBypass = true;
3702  DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3703  break;
3704  }
3705 
3706  if (DefCycle == -1)
3707  // We can't seem to determine the result latency of the def, assume it's 2.
3708  DefCycle = 2;
3709 
3710  int UseCycle = -1;
3711  switch (UseMCID.getOpcode()) {
3712  default:
3713  UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
3714  break;
3715 
3716  case ARM::VSTMDIA:
3717  case ARM::VSTMDIA_UPD:
3718  case ARM::VSTMDDB_UPD:
3719  case ARM::VSTMSIA:
3720  case ARM::VSTMSIA_UPD:
3721  case ARM::VSTMSDB_UPD:
3722  UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3723  break;
3724 
3725  case ARM::STMIA:
3726  case ARM::STMDA:
3727  case ARM::STMDB:
3728  case ARM::STMIB:
3729  case ARM::STMIA_UPD:
3730  case ARM::STMDA_UPD:
3731  case ARM::STMDB_UPD:
3732  case ARM::STMIB_UPD:
3733  case ARM::tSTMIA_UPD:
3734  case ARM::tPOP_RET:
3735  case ARM::tPOP:
3736  case ARM::t2STMIA:
3737  case ARM::t2STMDB:
3738  case ARM::t2STMIA_UPD:
3739  case ARM::t2STMDB_UPD:
3740  UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3741  break;
3742  }
3743 
3744  if (UseCycle == -1)
3745  // Assume it's read in the first stage.
3746  UseCycle = 1;
3747 
3748  UseCycle = DefCycle - UseCycle + 1;
3749  if (UseCycle > 0) {
3750  if (LdmBypass) {
3751  // It's a variable_ops instruction so we can't use DefIdx here. Just use
3752  // first def operand.
3753  if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
3754  UseClass, UseIdx))
3755  --UseCycle;
3756  } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
3757  UseClass, UseIdx)) {
3758  --UseCycle;
3759  }
3760  }
3761 
3762  return UseCycle;
3763 }
3764 
3766  const MachineInstr *MI, unsigned Reg,
3767  unsigned &DefIdx, unsigned &Dist) {
3768  Dist = 0;
3769 
3772  assert(II->isInsideBundle() && "Empty bundle?");
3773 
3774  int Idx = -1;
3775  while (II->isInsideBundle()) {
3776  Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
3777  if (Idx != -1)
3778  break;
3779  --II;
3780  ++Dist;
3781  }
3782 
3783  assert(Idx != -1 && "Cannot find bundled definition!");
3784  DefIdx = Idx;
3785  return &*II;
3786 }
3787 
3789  const MachineInstr &MI, unsigned Reg,
3790  unsigned &UseIdx, unsigned &Dist) {
3791  Dist = 0;
3792 
3794  assert(II->isInsideBundle() && "Empty bundle?");
3796 
3797  // FIXME: This doesn't properly handle multiple uses.
3798  int Idx = -1;
3799  while (II != E && II->isInsideBundle()) {
3800  Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
3801  if (Idx != -1)
3802  break;
3803  if (II->getOpcode() != ARM::t2IT)
3804  ++Dist;
3805  ++II;
3806  }
3807 
3808  if (Idx == -1) {
3809  Dist = 0;
3810  return nullptr;
3811  }
3812 
3813  UseIdx = Idx;
3814  return &*II;
3815 }
3816 
3817 /// Return the number of cycles to add to (or subtract from) the static
3818 /// itinerary based on the def opcode and alignment. The caller will ensure that
3819 /// adjusted latency is at least one cycle.
3820 static int adjustDefLatency(const ARMSubtarget &Subtarget,
3821  const MachineInstr &DefMI,
3822  const MCInstrDesc &DefMCID, unsigned DefAlign) {
3823  int Adjust = 0;
3824  if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
3825  // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
3826  // variants are one cycle cheaper.
3827  switch (DefMCID.getOpcode()) {
3828  default: break;
3829  case ARM::LDRrs:
3830  case ARM::LDRBrs: {
3831  unsigned ShOpVal = DefMI.getOperand(3).getImm();
3832  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3833  if (ShImm == 0 ||
3834  (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
3835  --Adjust;
3836  break;
3837  }
3838  case ARM::t2LDRs:
3839  case ARM::t2LDRBs:
3840  case ARM::t2LDRHs:
3841  case ARM::t2LDRSHs: {
3842  // Thumb2 mode: lsl only.
3843  unsigned ShAmt = DefMI.getOperand(3).getImm();
3844  if (ShAmt == 0 || ShAmt == 2)
3845  --Adjust;
3846  break;
3847  }
3848  }
3849  } else if (Subtarget.isSwift()) {
3850  // FIXME: Properly handle all of the latency adjustments for address
3851  // writeback.
3852  switch (DefMCID.getOpcode()) {
3853  default: break;
3854  case ARM::LDRrs:
3855  case ARM::LDRBrs: {
3856  unsigned ShOpVal = DefMI.getOperand(3).getImm();
3857  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3858  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3859  if (!isSub &&
3860  (ShImm == 0 ||
3861  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3862  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3863  Adjust -= 2;
3864  else if (!isSub &&
3865  ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
3866  --Adjust;
3867  break;
3868  }
3869  case ARM::t2LDRs:
3870  case ARM::t2LDRBs:
3871  case ARM::t2LDRHs:
3872  case ARM::t2LDRSHs: {
3873  // Thumb2 mode: lsl only.
3874  unsigned ShAmt = DefMI.getOperand(3).getImm();
3875  if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
3876  Adjust -= 2;
3877  break;
3878  }
3879  }
3880  }
3881 
3882  if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
3883  switch (DefMCID.getOpcode()) {
3884  default: break;
3885  case ARM::VLD1q8:
3886  case ARM::VLD1q16:
3887  case ARM::VLD1q32:
3888  case ARM::VLD1q64:
3889  case ARM::VLD1q8wb_fixed:
3890  case ARM::VLD1q16wb_fixed:
3891  case ARM::VLD1q32wb_fixed:
3892  case ARM::VLD1q64wb_fixed:
3893  case ARM::VLD1q8wb_register:
3894  case ARM::VLD1q16wb_register:
3895  case ARM::VLD1q32wb_register:
3896  case ARM::VLD1q64wb_register:
3897  case ARM::VLD2d8:
3898  case ARM::VLD2d16:
3899  case ARM::VLD2d32:
3900  case ARM::VLD2q8:
3901  case ARM::VLD2q16:
3902  case ARM::VLD2q32:
3903  case ARM::VLD2d8wb_fixed:
3904  case ARM::VLD2d16wb_fixed:
3905  case ARM::VLD2d32wb_fixed:
3906  case ARM::VLD2q8wb_fixed:
3907  case ARM::VLD2q16wb_fixed:
3908  case ARM::VLD2q32wb_fixed:
3909  case ARM::VLD2d8wb_register:
3910  case ARM::VLD2d16wb_register:
3911  case ARM::VLD2d32wb_register:
3912  case ARM::VLD2q8wb_register:
3913  case ARM::VLD2q16wb_register:
3914  case ARM::VLD2q32wb_register:
3915  case ARM::VLD3d8:
3916  case ARM::VLD3d16:
3917  case ARM::VLD3d32:
3918  case ARM::VLD1d64T:
3919  case ARM::VLD3d8_UPD:
3920  case ARM::VLD3d16_UPD:
3921  case ARM::VLD3d32_UPD:
3922  case ARM::VLD1d64Twb_fixed:
3923  case ARM::VLD1d64Twb_register:
3924  case ARM::VLD3q8_UPD:
3925  case ARM::VLD3q16_UPD:
3926  case ARM::VLD3q32_UPD:
3927  case ARM::VLD4d8:
3928  case ARM::VLD4d16:
3929  case ARM::VLD4d32:
3930  case ARM::VLD1d64Q:
3931  case ARM::VLD4d8_UPD:
3932  case ARM::VLD4d16_UPD:
3933  case ARM::VLD4d32_UPD:
3934  case ARM::VLD1d64Qwb_fixed:
3935  case ARM::VLD1d64Qwb_register:
3936  case ARM::VLD4q8_UPD:
3937  case ARM::VLD4q16_UPD:
3938  case ARM::VLD4q32_UPD:
3939  case ARM::VLD1DUPq8:
3940  case ARM::VLD1DUPq16:
3941  case ARM::VLD1DUPq32:
3942  case ARM::VLD1DUPq8wb_fixed:
3943  case ARM::VLD1DUPq16wb_fixed:
3944  case ARM::VLD1DUPq32wb_fixed:
3945  case ARM::VLD1DUPq8wb_register:
3946  case ARM::VLD1DUPq16wb_register:
3947  case ARM::VLD1DUPq32wb_register:
3948  case ARM::VLD2DUPd8:
3949  case ARM::VLD2DUPd16:
3950  case ARM::VLD2DUPd32:
3951  case ARM::VLD2DUPd8wb_fixed:
3952  case ARM::VLD2DUPd16wb_fixed:
3953  case ARM::VLD2DUPd32wb_fixed:
3954  case ARM::VLD2DUPd8wb_register:
3955  case ARM::VLD2DUPd16wb_register:
3956  case ARM::VLD2DUPd32wb_register:
3957  case ARM::VLD4DUPd8:
3958  case ARM::VLD4DUPd16:
3959  case ARM::VLD4DUPd32:
3960  case ARM::VLD4DUPd8_UPD:
3961  case ARM::VLD4DUPd16_UPD:
3962  case ARM::VLD4DUPd32_UPD:
3963  case ARM::VLD1LNd8:
3964  case ARM::VLD1LNd16:
3965  case ARM::VLD1LNd32:
3966  case ARM::VLD1LNd8_UPD:
3967  case ARM::VLD1LNd16_UPD:
3968  case ARM::VLD1LNd32_UPD:
3969  case ARM::VLD2LNd8:
3970  case ARM::VLD2LNd16:
3971  case ARM::VLD2LNd32:
3972  case ARM::VLD2LNq16:
3973  case ARM::VLD2LNq32:
3974  case ARM::VLD2LNd8_UPD:
3975  case ARM::VLD2LNd16_UPD:
3976  case ARM::VLD2LNd32_UPD:
3977  case ARM::VLD2LNq16_UPD:
3978  case ARM::VLD2LNq32_UPD:
3979  case ARM::VLD4LNd8:
3980  case ARM::VLD4LNd16:
3981  case ARM::VLD4LNd32:
3982  case ARM::VLD4LNq16:
3983  case ARM::VLD4LNq32:
3984  case ARM::VLD4LNd8_UPD:
3985  case ARM::VLD4LNd16_UPD:
3986  case ARM::VLD4LNd32_UPD:
3987  case ARM::VLD4LNq16_UPD:
3988  case ARM::VLD4LNq32_UPD:
3989  // If the address is not 64-bit aligned, the latencies of these
3990  // instructions increases by one.
3991  ++Adjust;
3992  break;
3993  }
3994  }
3995  return Adjust;
3996 }
3997 
3999  const MachineInstr &DefMI,
4000  unsigned DefIdx,
4001  const MachineInstr &UseMI,
4002  unsigned UseIdx) const {
4003  // No operand latency. The caller may fall back to getInstrLatency.
4004  if (!ItinData || ItinData->isEmpty())
4005  return -1;
4006 
4007  const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
4008  unsigned Reg = DefMO.getReg();
4009 
4010  const MachineInstr *ResolvedDefMI = &DefMI;
4011  unsigned DefAdj = 0;
4012  if (DefMI.isBundle())
4013  ResolvedDefMI =
4014  getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);
4015  if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
4016  ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
4017  return 1;
4018  }
4019 
4020  const MachineInstr *ResolvedUseMI = &UseMI;
4021  unsigned UseAdj = 0;
4022  if (UseMI.isBundle()) {
4023  ResolvedUseMI =
4024  getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
4025  if (!ResolvedUseMI)
4026  return -1;
4027  }
4028 
4029  return getOperandLatencyImpl(
4030  ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
4031  Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
4032 }
4033 
4034 int ARMBaseInstrInfo::getOperandLatencyImpl(
4035  const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4036  unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
4037  const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
4038  unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
4039  if (Reg == ARM::CPSR) {
4040  if (DefMI.getOpcode() == ARM::FMSTAT) {
4041  // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
4042  return Subtarget.isLikeA9() ? 1 : 20;
4043  }
4044 
4045  // CPSR set and branch can be paired in the same cycle.
4046  if (UseMI.isBranch())
4047  return 0;
4048 
4049  // Otherwise it takes the instruction latency (generally one).
4050  unsigned Latency = getInstrLatency(ItinData, DefMI);
4051 
4052  // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
4053  // its uses. Instructions which are otherwise scheduled between them may
4054  // incur a code size penalty (not able to use the CPSR setting 16-bit
4055  // instructions).
4056  if (Latency > 0 && Subtarget.isThumb2()) {
4057  const MachineFunction *MF = DefMI.getParent()->getParent();
4058  // FIXME: Use Function::optForSize().
4059  if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize))
4060  --Latency;
4061  }
4062  return Latency;
4063  }
4064 
4065  if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
4066  return -1;
4067 
4068  unsigned DefAlign = DefMI.hasOneMemOperand()
4069  ? (*DefMI.memoperands_begin())->getAlignment()
4070  : 0;
4071  unsigned UseAlign = UseMI.hasOneMemOperand()
4072  ? (*UseMI.memoperands_begin())->getAlignment()
4073  : 0;
4074 
4075  // Get the itinerary's latency if possible, and handle variable_ops.
4076  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, UseMCID,
4077  UseIdx, UseAlign);
4078  // Unable to find operand latency. The caller may resort to getInstrLatency.
4079  if (Latency < 0)
4080  return Latency;
4081 
4082  // Adjust for IT block position.
4083  int Adj = DefAdj + UseAdj;
4084 
4085  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4086  Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
4087  if (Adj >= 0 || (int)Latency > -Adj) {
4088  return Latency + Adj;
4089  }
4090  // Return the itinerary latency, which may be zero but not less than zero.
4091  return Latency;
4092 }
4093 
4094 int
4096  SDNode *DefNode, unsigned DefIdx,
4097  SDNode *UseNode, unsigned UseIdx) const {
4098  if (!DefNode->isMachineOpcode())
4099  return 1;
4100 
4101  const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
4102 
4103  if (isZeroCost(DefMCID.Opcode))
4104  return 0;
4105 
4106  if (!ItinData || ItinData->isEmpty())
4107  return DefMCID.mayLoad() ? 3 : 1;
4108 
4109  if (!UseNode->isMachineOpcode()) {
4110  int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
4111  int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
4112  int Threshold = 1 + Adj;
4113  return Latency <= Threshold ? 1 : Latency - Adj;
4114  }
4115 
4116  const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
4117  const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
4118  unsigned DefAlign = !DefMN->memoperands_empty()
4119  ? (*DefMN->memoperands_begin())->getAlignment() : 0;
4120  const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
4121  unsigned UseAlign = !UseMN->memoperands_empty()
4122  ? (*UseMN->memoperands_begin())->getAlignment() : 0;
4123  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
4124  UseMCID, UseIdx, UseAlign);
4125 
4126  if (Latency > 1 &&
4127  (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
4128  Subtarget.isCortexA7())) {
4129  // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4130  // variants are one cycle cheaper.
4131  switch (DefMCID.getOpcode()) {
4132  default: break;
4133  case ARM::LDRrs:
4134  case ARM::LDRBrs: {
4135  unsigned ShOpVal =
4136  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4137  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4138  if (ShImm == 0 ||
4139  (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4140  --Latency;
4141  break;
4142  }
4143  case ARM::t2LDRs:
4144  case ARM::t2LDRBs:
4145  case ARM::t2LDRHs:
4146  case ARM::t2LDRSHs: {
4147  // Thumb2 mode: lsl only.
4148  unsigned ShAmt =
4149  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4150  if (ShAmt == 0 || ShAmt == 2)
4151  --Latency;
4152  break;
4153  }
4154  }
4155  } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) {
4156  // FIXME: Properly handle all of the latency adjustments for address
4157  // writeback.
4158  switch (DefMCID.getOpcode()) {
4159  default: break;
4160  case ARM::LDRrs:
4161  case ARM::LDRBrs: {
4162  unsigned ShOpVal =
4163  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4164  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4165  if (ShImm == 0 ||
4166  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4167  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4168  Latency -= 2;
4169  else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4170  --Latency;
4171  break;
4172  }
4173  case ARM::t2LDRs:
4174  case ARM::t2LDRBs:
4175  case ARM::t2LDRHs:
4176  case ARM::t2LDRSHs:
4177  // Thumb2 mode: lsl 0-3 only.
4178  Latency -= 2;
4179  break;
4180  }
4181  }
4182 
4183  if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
4184  switch (DefMCID.getOpcode()) {
4185  default: break;
4186  case ARM::VLD1q8:
4187  case ARM::VLD1q16:
4188  case ARM::VLD1q32:
4189  case ARM::VLD1q64:
4190  case ARM::VLD1q8wb_register:
4191  case ARM::VLD1q16wb_register:
4192  case ARM::VLD1q32wb_register:
4193  case ARM::VLD1q64wb_register:
4194  case ARM::VLD1q8wb_fixed:
4195  case ARM::VLD1q16wb_fixed:
4196  case ARM::VLD1q32wb_fixed:
4197  case ARM::VLD1q64wb_fixed:
4198  case ARM::VLD2d8:
4199  case ARM::VLD2d16:
4200  case ARM::VLD2d32:
4201  case ARM::VLD2q8Pseudo:
4202  case ARM::VLD2q16Pseudo:
4203  case ARM::VLD2q32Pseudo:
4204  case ARM::VLD2d8wb_fixed:
4205  case ARM::VLD2d16wb_fixed:
4206  case ARM::VLD2d32wb_fixed:
4207  case ARM::VLD2q8PseudoWB_fixed:
4208  case ARM::VLD2q16PseudoWB_fixed:
4209  case ARM::VLD2q32PseudoWB_fixed:
4210  case ARM::VLD2d8wb_register:
4211  case ARM::VLD2d16wb_register:
4212  case ARM::VLD2d32wb_register:
4213  case ARM::VLD2q8PseudoWB_register:
4214  case ARM::VLD2q16PseudoWB_register:
4215  case ARM::VLD2q32PseudoWB_register:
4216  case ARM::VLD3d8Pseudo:
4217  case ARM::VLD3d16Pseudo:
4218  case ARM::VLD3d32Pseudo:
4219  case ARM::VLD1d64TPseudo:
4220  case ARM::VLD1d64TPseudoWB_fixed:
4221  case ARM::VLD3d8Pseudo_UPD:
4222  case ARM::VLD3d16Pseudo_UPD:
4223  case ARM::VLD3d32Pseudo_UPD:
4224  case ARM::VLD3q8Pseudo_UPD:
4225  case ARM::VLD3q16Pseudo_UPD:
4226  case ARM::VLD3q32Pseudo_UPD:
4227  case ARM::VLD3q8oddPseudo:
4228  case ARM::VLD3q16oddPseudo:
4229  case ARM::VLD3q32oddPseudo:
4230  case ARM::VLD3q8oddPseudo_UPD:
4231  case ARM::VLD3q16oddPseudo_UPD:
4232  case ARM::VLD3q32oddPseudo_UPD:
4233  case ARM::VLD4d8Pseudo:
4234  case ARM::VLD4d16Pseudo:
4235  case ARM::VLD4d32Pseudo:
4236  case ARM::VLD1d64QPseudo:
4237  case ARM::VLD1d64QPseudoWB_fixed:
4238  case ARM::VLD4d8Pseudo_UPD:
4239  case ARM::VLD4d16Pseudo_UPD:
4240  case ARM::VLD4d32Pseudo_UPD:
4241  case ARM::VLD4q8Pseudo_UPD:
4242  case ARM::VLD4q16Pseudo_UPD:
4243  case ARM::VLD4q32Pseudo_UPD:
4244  case ARM::VLD4q8oddPseudo:
4245  case ARM::VLD4q16oddPseudo:
4246  case ARM::VLD4q32oddPseudo:
4247  case ARM::VLD4q8oddPseudo_UPD:
4248  case ARM::VLD4q16oddPseudo_UPD:
4249  case ARM::VLD4q32oddPseudo_UPD:
4250  case ARM::VLD1DUPq8:
4251  case ARM::VLD1DUPq16:
4252  case ARM::VLD1DUPq32:
4253  case ARM::VLD1DUPq8wb_fixed:
4254  case ARM::VLD1DUPq16wb_fixed:
4255  case ARM::VLD1DUPq32wb_fixed:
4256  case ARM::VLD1DUPq8wb_register:
4257  case ARM::VLD1DUPq16wb_register:
4258  case ARM::VLD1DUPq32wb_register:
4259  case ARM::VLD2DUPd8:
4260  case ARM::VLD2DUPd16:
4261  case ARM::VLD2DUPd32:
4262  case ARM::VLD2DUPd8wb_fixed:
4263  case ARM::VLD2DUPd16wb_fixed:
4264  case ARM::VLD2DUPd32wb_fixed:
4265  case ARM::VLD2DUPd8wb_register:
4266  case ARM::VLD2DUPd16wb_register:
4267  case ARM::VLD2DUPd32wb_register:
4268  case ARM::VLD4DUPd8Pseudo:
4269  case ARM::VLD4DUPd16Pseudo:
4270  case ARM::VLD4DUPd32Pseudo:
4271  case ARM::VLD4DUPd8Pseudo_UPD:
4272  case ARM::VLD4DUPd16Pseudo_UPD:
4273  case ARM::VLD4DUPd32Pseudo_UPD:
4274  case ARM::VLD1LNq8Pseudo:
4275  case ARM::VLD1LNq16Pseudo:
4276  case ARM::VLD1LNq32Pseudo:
4277  case ARM::VLD1LNq8Pseudo_UPD:
4278  case ARM::VLD1LNq16Pseudo_UPD:
4279  case ARM::VLD1LNq32Pseudo_UPD:
4280  case ARM::VLD2LNd8Pseudo:
4281  case ARM::VLD2LNd16Pseudo:
4282  case ARM::VLD2LNd32Pseudo:
4283  case ARM::VLD2LNq16Pseudo:
4284  case ARM::VLD2LNq32Pseudo:
4285  case ARM::VLD2LNd8Pseudo_UPD:
4286  case ARM::VLD2LNd16Pseudo_UPD:
4287  case ARM::VLD2LNd32Pseudo_UPD:
4288  case ARM::VLD2LNq16Pseudo_UPD:
4289  case ARM::VLD2LNq32Pseudo_UPD:
4290  case ARM::VLD4LNd8Pseudo:
4291  case ARM::VLD4LNd16Pseudo:
4292  case ARM::VLD4LNd32Pseudo:
4293  case ARM::VLD4LNq16Pseudo:
4294  case ARM::VLD4LNq32Pseudo:
4295  case ARM::VLD4LNd8Pseudo_UPD:
4296  case ARM::VLD4LNd16Pseudo_UPD:
4297  case ARM::VLD4LNd32Pseudo_UPD:
4298  case ARM::VLD4LNq16Pseudo_UPD:
4299  case ARM::VLD4LNq32Pseudo_UPD:
4300  // If the address is not 64-bit aligned, the latencies of these
4301  // instructions increases by one.
4302  ++Latency;
4303  break;
4304  }
4305 
4306  return Latency;
4307 }
4308 
4309 unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
4310  if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4311  MI.isImplicitDef())
4312  return 0;
4313 
4314  if (MI.isBundle())
4315  return 0;
4316 
4317  const MCInstrDesc &MCID = MI.getDesc();
4318 
4319  if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4320  !Subtarget.cheapPredicableCPSRDef())) {
4321  // When predicated, CPSR is an additional source operand for CPSR updating
4322  // instructions, this apparently increases their latencies.
4323  return 1;
4324  }
4325  return 0;
4326 }
4327 
4328 unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4329  const MachineInstr &MI,
4330  unsigned *PredCost) const {
4331  if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4332  MI.isImplicitDef())
4333  return 1;
4334 
4335  // An instruction scheduler typically runs on unbundled instructions, however
4336  // other passes may query the latency of a bundled instruction.
4337  if (MI.isBundle()) {
4338  unsigned Latency = 0;
4341  while (++I != E && I->isInsideBundle()) {
4342  if (I->getOpcode() != ARM::t2IT)
4343  Latency += getInstrLatency(ItinData, *I, PredCost);
4344  }
4345  return Latency;
4346  }
4347 
4348  const MCInstrDesc &MCID = MI.getDesc();
4349  if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4350  !Subtarget.cheapPredicableCPSRDef()))) {
4351  // When predicated, CPSR is an additional source operand for CPSR updating
4352  // instructions, this apparently increases their latencies.
4353  *PredCost = 1;
4354  }
4355  // Be sure to call getStageLatency for an empty itinerary in case it has a
4356  // valid MinLatency property.
4357  if (!ItinData)
4358  return MI.mayLoad() ? 3 : 1;
4359 
4360  unsigned Class = MCID.getSchedClass();
4361 
4362  // For instructions with variable uops, use uops as latency.
4363  if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
4364  return getNumMicroOps(ItinData, MI);
4365 
4366  // For the common case, fall back on the itinerary's latency.
4367  unsigned Latency = ItinData->getStageLatency(Class);
4368 
4369  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4370  unsigned DefAlign =
4371  MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlignment() : 0;
4372  int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
4373  if (Adj >= 0 || (int)Latency > -Adj) {
4374  return Latency + Adj;
4375  }
4376  return Latency;
4377 }
4378 
4379 int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4380  SDNode *Node) const {
4381  if (!Node->isMachineOpcode())
4382  return 1;
4383 
4384  if (!ItinData || ItinData->isEmpty())
4385  return 1;
4386 
4387  unsigned Opcode = Node->getMachineOpcode();
4388  switch (Opcode) {
4389  default:
4390  return ItinData->getStageLatency(get(Opcode).getSchedClass());
4391  case ARM::VLDMQIA:
4392  case ARM::VSTMQIA:
4393  return 2;
4394  }
4395 }
4396 
4397 bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
4398  const MachineRegisterInfo *MRI,
4399  const MachineInstr &DefMI,
4400  unsigned DefIdx,
4401  const MachineInstr &UseMI,
4402  unsigned UseIdx) const {
4403  unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4404  unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
4405  if (Subtarget.nonpipelinedVFP() &&
4406  (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
4407  return true;
4408 
4409  // Hoist VFP / NEON instructions with 4 or higher latency.
4410  unsigned Latency =
4411  SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);
4412  if (Latency <= 3)
4413  return false;
4414  return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
4415  UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
4416 }
4417 
4418 bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
4419  const MachineInstr &DefMI,
4420  unsigned DefIdx) const {
4421  const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
4422  if (!ItinData || ItinData->isEmpty())
4423  return false;
4424 
4425  unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4426  if (DDomain == ARMII::DomainGeneral) {
4427  unsigned DefClass = DefMI.getDesc().getSchedClass();
4428  int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
4429  return (DefCycle != -1 && DefCycle <= 2);
4430  }
4431  return false;
4432 }
4433 
4434 bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
4435  StringRef &ErrInfo) const {
4436  if (convertAddSubFlagsOpcode(MI.getOpcode())) {
4437  ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
4438  return false;
4439  }
4440  return true;
4441 }
4442 
4443 // LoadStackGuard has so far only been implemented for MachO. Different code
4444 // sequence is needed for other targets.
4446  unsigned LoadImmOpc,
4447  unsigned LoadOpc) const {
4448  assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
4449  "ROPI/RWPI not currently supported with stack guard");
4450 
4451  MachineBasicBlock &MBB = *MI->getParent();
4452  DebugLoc DL = MI->getDebugLoc();
4453  unsigned Reg = MI->getOperand(0).getReg();
4454  const GlobalValue *GV =
4455  cast<GlobalValue>((*MI->memoperands_begin())->getValue());
4456  MachineInstrBuilder MIB;
4457 
4458  BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4460 
4461  if (Subtarget.isGVIndirectSymbol(GV)) {
4462  MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4463  MIB.addReg(Reg, RegState::Kill).addImm(0);
4464  auto Flags = MachineMemOperand::MOLoad |
4467  MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
4468  MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4);
4469  MIB.addMemOperand(MMO).add(predOps(ARMCC::AL));
4470  }
4471 
4472  MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4473  MIB.addReg(Reg, RegState::Kill)
4474  .addImm(0)
4475  .setMemRefs(MI->memoperands_begin(), MI->memoperands_end())
4476  .add(predOps(ARMCC::AL));
4477 }
4478 
4479 bool
4480 ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
4481  unsigned &AddSubOpc,
4482  bool &NegAcc, bool &HasLane) const {
4483  DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
4484  if (I == MLxEntryMap.end())
4485  return false;
4486 
4487  const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
4488  MulOpc = Entry.MulOpc;
4489  AddSubOpc = Entry.AddSubOpc;
4490  NegAcc = Entry.NegAcc;
4491  HasLane = Entry.HasLane;
4492  return true;
4493 }
4494 
4495 //===----------------------------------------------------------------------===//
4496 // Execution domains.
4497 //===----------------------------------------------------------------------===//
4498 //
4499 // Some instructions go down the NEON pipeline, some go down the VFP pipeline,
4500 // and some can go down both. The vmov instructions go down the VFP pipeline,
4501 // but they can be changed to vorr equivalents that are executed by the NEON
4502 // pipeline.
4503 //
4504 // We use the following execution domain numbering:
4505 //
4508  ExeVFP = 1,
4510 };
4511 
4512 //
4513 // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
4514 //
4515 std::pair<uint16_t, uint16_t>
4517  // If we don't have access to NEON instructions then we won't be able
4518  // to swizzle anything to the NEON domain. Check to make sure.
4519  if (Subtarget.hasNEON()) {
4520  // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
4521  // if they are not predicated.
4522  if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI))
4523  return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4524 
4525  // CortexA9 is particularly picky about mixing the two and wants these
4526  // converted.
4527  if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) &&
4528  (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR ||
4529  MI.getOpcode() == ARM::VMOVS))
4530  return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4531  }
4532  // No other instructions can be swizzled, so just determine their domain.
4533  unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;
4534 
4535  if (Domain & ARMII::DomainNEON)
4536  return std::make_pair(ExeNEON, 0);
4537 
4538  // Certain instructions can go either way on Cortex-A8.
4539  // Treat them as NEON instructions.
4540  if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
4541  return std::make_pair(ExeNEON, 0);
4542 
4543  if (Domain & ARMII::DomainVFP)
4544  return std::make_pair(ExeVFP, 0);
4545 
4546  return std::make_pair(ExeGeneric, 0);
4547 }
4548 
4550  unsigned SReg, unsigned &Lane) {
4551  unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
4552  Lane = 0;
4553 
4554  if (DReg != ARM::NoRegister)
4555  return DReg;
4556 
4557  Lane = 1;
4558  DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
4559 
4560  assert(DReg && "S-register with no D super-register?");
4561  return DReg;
4562 }
4563 
4564 /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
4565 /// set ImplicitSReg to a register number that must be marked as implicit-use or
4566 /// zero if no register needs to be defined as implicit-use.
4567 ///
4568 /// If the function cannot determine if an SPR should be marked implicit use or
4569 /// not, it returns false.
4570 ///
4571 /// This function handles cases where an instruction is being modified from taking
4572 /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
4573 /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
4574 /// lane of the DPR).
4575 ///
4576 /// If the other SPR is defined, an implicit-use of it should be added. Else,
4577 /// (including the case where the DPR itself is defined), it should not.
4578 ///
4580  MachineInstr &MI, unsigned DReg,
4581  unsigned Lane, unsigned &ImplicitSReg) {
4582  // If the DPR is defined or used already, the other SPR lane will be chained
4583  // correctly, so there is nothing to be done.
4584  if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) {
4585  ImplicitSReg = 0;
4586  return true;
4587  }
4588 
4589  // Otherwise we need to go searching to see if the SPR is set explicitly.
4590  ImplicitSReg = TRI->getSubReg(DReg,
4591  (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
4593  MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
4594 
4595  if (LQR == MachineBasicBlock::LQR_Live)
4596  return true;
4597  else if (LQR == MachineBasicBlock::LQR_Unknown)
4598  return false;
4599 
4600  // If the register is known not to be live, there is no need to add an
4601  // implicit-use.
4602  ImplicitSReg = 0;
4603  return true;
4604 }
4605 
4607  unsigned Domain) const {
4608  unsigned DstReg, SrcReg, DReg;
4609  unsigned Lane;
4610  MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
4611  const TargetRegisterInfo *TRI = &getRegisterInfo();
4612  switch (MI.getOpcode()) {
4613  default:
4614  llvm_unreachable("cannot handle opcode!");
4615  break;
4616  case ARM::VMOVD:
4617  if (Domain != ExeNEON)
4618  break;
4619 
4620  // Zap the predicate operands.
4621  assert(!isPredicated(MI) && "Cannot predicate a VORRd");
4622 
4623  // Make sure we've got NEON instructions.
4624  assert(Subtarget.hasNEON() && "VORRd requires NEON");
4625 
4626  // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
4627  DstReg = MI.getOperand(0).getReg();
4628  SrcReg = MI.getOperand(1).getReg();
4629 
4630  for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4631  MI.RemoveOperand(i - 1);
4632 
4633  // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
4634  MI.setDesc(get(ARM::VORRd));
4635  MIB.addReg(DstReg, RegState::Define)
4636  .addReg(SrcReg)
4637  .addReg(SrcReg)
4638  .add(predOps(ARMCC::AL));
4639  break;
4640  case ARM::VMOVRS:
4641  if (Domain != ExeNEON)
4642  break;
4643  assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
4644 
4645  // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
4646  DstReg = MI.getOperand(0).getReg();
4647  SrcReg = MI.getOperand(1).getReg();
4648 
4649  for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4650  MI.RemoveOperand(i - 1);
4651 
4652  DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
4653 
4654  // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
4655  // Note that DSrc has been widened and the other lane may be undef, which
4656  // contaminates the entire register.
4657  MI.setDesc(get(ARM::VGETLNi32));
4658  MIB.addReg(DstReg, RegState::Define)
4659  .addReg(DReg, RegState::Undef)
4660  .addImm(Lane)
4661  .add(predOps(ARMCC::AL));
4662 
4663  // The old source should be an implicit use, otherwise we might think it
4664  // was dead before here.
4665  MIB.addReg(SrcReg, RegState::Implicit);
4666  break;
4667  case ARM::VMOVSR: {
4668  if (Domain != ExeNEON)
4669  break;
4670  assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
4671 
4672  // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
4673  DstReg = MI.getOperand(0).getReg();
4674  SrcReg = MI.getOperand(1).getReg();
4675 
4676  DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
4677 
4678  unsigned ImplicitSReg;
4679  if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
4680  break;
4681 
4682  for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4683  MI.RemoveOperand(i - 1);
4684 
4685  // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
4686  // Again DDst may be undefined at the beginning of this instruction.
4687  MI.setDesc(get(ARM::VSETLNi32));
4688  MIB.addReg(DReg, RegState::Define)
4689  .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI)))
4690  .addReg(SrcReg)
4691  .addImm(Lane)
4692  .add(predOps(ARMCC::AL));
4693 
4694  // The narrower destination must be marked as set to keep previous chains
4695  // in place.
4696  MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
4697  if (ImplicitSReg != 0)
4698  MIB.addReg(ImplicitSReg, RegState::Implicit);
4699  break;
4700  }
4701  case ARM::VMOVS: {
4702  if (Domain != ExeNEON)
4703  break;
4704 
4705  // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
4706  DstReg = MI.getOperand(0).getReg();
4707  SrcReg = MI.getOperand(1).getReg();
4708 
4709  unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
4710  DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
4711  DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
4712 
4713  unsigned ImplicitSReg;
4714  if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
4715  break;
4716 
4717  for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4718  MI.RemoveOperand(i - 1);
4719 
4720  if (DSrc == DDst) {
4721  // Destination can be:
4722  // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
4723  MI.setDesc(get(ARM::VDUPLN32d));
4724  MIB.addReg(DDst, RegState::Define)
4725  .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI)))
4726  .addImm(SrcLane)
4727  .add(predOps(ARMCC::AL));
4728 
4729  // Neither the source or the destination are naturally represented any
4730  // more, so add them in manually.
4731  MIB.addReg(DstReg, RegState::Implicit | RegState::Define);
4732  MIB.addReg(SrcReg, RegState::Implicit);
4733  if (ImplicitSReg != 0)
4734  MIB.addReg(ImplicitSReg, RegState::Implicit);
4735  break;
4736  }
4737 
4738  // In general there's no single instruction that can perform an S <-> S
4739  // move in NEON space, but a pair of VEXT instructions *can* do the
4740  // job. It turns out that the VEXTs needed will only use DSrc once, with
4741  // the position based purely on the combination of lane-0 and lane-1
4742  // involved. For example
4743  // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
4744  // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
4745  // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
4746  // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
4747  //
4748  // Pattern of the MachineInstrs is:
4749  // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
4750  MachineInstrBuilder NewMIB;
4751  NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32),
4752  DDst);
4753 
4754  // On the first instruction, both DSrc and DDst may be undef if present.
4755  // Specifically when the original instruction didn't have them as an
4756  // <imp-use>.
4757  unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
4758  bool CurUndef = !MI.readsRegister(CurReg, TRI);
4759  NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
4760 
4761  CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
4762  CurUndef = !MI.readsRegister(CurReg, TRI);
4763  NewMIB.addReg(CurReg, getUndefRegState(CurUndef))
4764  .addImm(1)
4765  .add(predOps(ARMCC::AL));
4766 
4767  if (SrcLane == DstLane)
4768  NewMIB.addReg(SrcReg, RegState::Implicit);
4769 
4770  MI.setDesc(get(ARM::VEXTd32));
4771  MIB.addReg(DDst, RegState::Define);
4772 
4773  // On the second instruction, DDst has definitely been defined above, so
4774  // it is not undef. DSrc, if present, can be undef as above.
4775  CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
4776  CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
4777  MIB.addReg(CurReg, getUndefRegState(CurUndef));
4778 
4779  CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
4780  CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
4781  MIB.addReg(CurReg, getUndefRegState(CurUndef))
4782  .addImm(1)
4783  .add(predOps(ARMCC::AL));
4784 
4785  if (SrcLane != DstLane)
4786  MIB.addReg(SrcReg, RegState::Implicit);
4787 
4788  // As before, the original destination is no longer represented, add it
4789  // implicitly.
4790  MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
4791  if (ImplicitSReg != 0)
4792  MIB.addReg(ImplicitSReg, RegState::Implicit);
4793  break;
4794  }
4795  }
4796 }
4797 
4798 //===----------------------------------------------------------------------===//
4799 // Partial register updates
4800 //===----------------------------------------------------------------------===//
4801 //
4802 // Swift renames NEON registers with 64-bit granularity. That means any
4803 // instruction writing an S-reg implicitly reads the containing D-reg. The
4804 // problem is mostly avoided by translating f32 operations to v2f32 operations
4805 // on D-registers, but f32 loads are still a problem.
4806 //
4807 // These instructions can load an f32 into a NEON register:
4808 //
4809 // VLDRS - Only writes S, partial D update.
4810 // VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
4811 // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
4812 //
4813 // FCONSTD can be used as a dependency-breaking instruction.
4815  const MachineInstr &MI, unsigned OpNum,
4816  const TargetRegisterInfo *TRI) const {
4817  auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance();
4818  if (!PartialUpdateClearance)
4819  return 0;
4820 
4821  assert(TRI && "Need TRI instance");
4822 
4823  const MachineOperand &MO = MI.getOperand(OpNum);
4824  if (MO.readsReg())
4825  return 0;
4826  unsigned Reg = MO.getReg();
4827  int UseOp = -1;
4828 
4829  switch (MI.getOpcode()) {
4830  // Normal instructions writing only an S-register.
4831  case ARM::VLDRS:
4832  case ARM::FCONSTS:
4833  case ARM::VMOVSR:
4834  case ARM::VMOVv8i8:
4835  case ARM::VMOVv4i16:
4836  case ARM::VMOVv2i32:
4837  case ARM::VMOVv2f32:
4838  case ARM::VMOVv1i64:
4839  UseOp = MI.findRegisterUseOperandIdx(Reg, false, TRI);
4840  break;
4841 
4842  // Explicitly reads the dependency.
4843  case ARM::VLD1LNd32:
4844  UseOp = 3;
4845  break;
4846  default:
4847  return 0;
4848  }
4849 
4850  // If this instruction actually reads a value from Reg, there is no unwanted
4851  // dependency.
4852  if (UseOp != -1 && MI.getOperand(UseOp).readsReg())
4853  return 0;
4854 
4855  // We must be able to clobber the whole D-reg.
4857  // Virtual register must be a def undef foo:ssub_0 operand.
4858  if (!MO.getSubReg() || MI.readsVirtualRegister(Reg))
4859  return 0;
4860  } else if (ARM::SPRRegClass.contains(Reg)) {
4861  // Physical register: MI must define the full D-reg.
4862  unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
4863  &ARM::DPRRegClass);
4864  if (!DReg || !MI.definesRegister(DReg, TRI))
4865  return 0;
4866  }
4867 
4868  // MI has an unwanted D-register dependency.
4869  // Avoid defs in the previous N instructrions.
4870  return PartialUpdateClearance;
4871 }
4872 
4873 // Break a partial register dependency after getPartialRegUpdateClearance
4874 // returned non-zero.
4876  MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
4877  assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def");
4878  assert(TRI && "Need TRI instance");
4879 
4880  const MachineOperand &MO = MI.getOperand(OpNum);
4881  unsigned Reg = MO.getReg();
4883  "Can't break virtual register dependencies.");
4884  unsigned DReg = Reg;
4885 
4886  // If MI defines an S-reg, find the corresponding D super-register.
4887  if (ARM::SPRRegClass.contains(Reg)) {
4888  DReg = ARM::D0 + (Reg - ARM::S0) / 2;
4889  assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
4890  }
4891 
4892  assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
4893  assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
4894 
4895  // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
4896  // the full D-register by loading the same value to both lanes. The
4897  // instruction is micro-coded with 2 uops, so don't do this until we can
4898  // properly schedule micro-coded instructions. The dispatcher stalls cause
4899  // too big regressions.
4900 
4901  // Insert the dependency-breaking FCONSTD before MI.
4902  // 96 is the encoding of 0.5, but the actual value doesn't matter here.
4903  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
4904  .addImm(96)
4905  .add(predOps(ARMCC::AL));
4906  MI.addRegisterKilled(DReg, TRI, true);
4907 }
4908 
4910  return Subtarget.getFeatureBits()[ARM::HasV6KOps];
4911 }
4912 
4914  if (MI->getNumOperands() < 4)
4915  return true;
4916  unsigned ShOpVal = MI->getOperand(3).getImm();
4917  unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
4918  // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
4919  if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
4920  ((ShImm == 1 || ShImm == 2) &&
4921  ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
4922  return true;
4923 
4924  return false;
4925 }
4926 
4928  const MachineInstr &MI, unsigned DefIdx,
4929  SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
4930  assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
4931  assert(MI.isRegSequenceLike() && "Invalid kind of instruction");
4932 
4933  switch (MI.getOpcode()) {
4934  case ARM::VMOVDRR:
4935  // dX = VMOVDRR rY, rZ
4936  // is the same as:
4937  // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
4938  // Populate the InputRegs accordingly.
4939  // rY
4940  const MachineOperand *MOReg = &MI.getOperand(1);
4941  if (!MOReg->isUndef())
4942  InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
4943  MOReg->getSubReg(), ARM::ssub_0));
4944  // rZ
4945  MOReg = &MI.getOperand(2);
4946  if (!MOReg->isUndef())
4947  InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
4948  MOReg->getSubReg(), ARM::ssub_1));
4949  return true;
4950  }
4951  llvm_unreachable("Target dependent opcode missing");
4952 }
4953 
4955  const MachineInstr &MI, unsigned DefIdx,
4956  RegSubRegPairAndIdx &InputReg) const {
4957  assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
4958  assert(MI.isExtractSubregLike() && "Invalid kind of instruction");
4959 
4960  switch (MI.getOpcode()) {
4961  case ARM::VMOVRRD:
4962  // rX, rY = VMOVRRD dZ
4963  // is the same as:
4964  // rX = EXTRACT_SUBREG dZ, ssub_0
4965  // rY = EXTRACT_SUBREG dZ, ssub_1
4966  const MachineOperand &MOReg = MI.getOperand(2);
4967  if (MOReg.isUndef())
4968  return false;
4969  InputReg.Reg = MOReg.getReg();
4970  InputReg.SubReg = MOReg.getSubReg();
4971  InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
4972  return true;
4973  }
4974  llvm_unreachable("Target dependent opcode missing");
4975 }
4976 
4978  const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
4979  RegSubRegPairAndIdx &InsertedReg) const {
4980  assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
4981  assert(MI.isInsertSubregLike() && "Invalid kind of instruction");
4982 
4983  switch (MI.getOpcode()) {
4984  case ARM::VSETLNi32:
4985  // dX = VSETLNi32 dY, rZ, imm
4986  const MachineOperand &MOBaseReg = MI.getOperand(1);
4987  const MachineOperand &MOInsertedReg = MI.getOperand(2);
4988  if (MOInsertedReg.isUndef())
4989  return false;
4990  const MachineOperand &MOIndex = MI.getOperand(3);
4991  BaseReg.Reg = MOBaseReg.getReg();
4992  BaseReg.SubReg = MOBaseReg.getSubReg();
4993 
4994  InsertedReg.Reg = MOInsertedReg.getReg();
4995  InsertedReg.SubReg = MOInsertedReg.getSubReg();
4996  InsertedReg.SubIdx = MOIndex.getImm() == 0 ? ARM::ssub_0 : ARM::ssub_1;
4997  return true;
4998  }
4999  llvm_unreachable("Target dependent opcode missing");
5000 }
bool isLdstSoMinusReg(const MachineInstr &MI, unsigned Op) const
MachineConstantPoolValue * MachineCPVal
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const override
bool getInsertSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const override
Build the equivalent inputs of a INSERT_SUBREG for the given MI and DefIdx.
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
Definition: MachineInstr.h:970
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
bool checkVLDnAccessAlignment() const
Definition: ARMSubtarget.h:581
BranchProbability getCompl() const
int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate...
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:461
instr_iterator instr_end()
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
unsigned getT2SOImmTwoPartFirst(unsigned Imm)
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isThumb() const
Definition: ARMSubtarget.h:677
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
bool DefinesPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred) const override
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, unsigned Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before...
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
unsigned getRegister(unsigned i) const
Return the specified register in the class.
bool isExtractSubregLike(QueryType Type=IgnoreBundle) const
Return true if this instruction behaves the same way as the generic EXTRACT_SUBREG instructions...
Definition: MachineInstr.h:604
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:271
ARMConstantPoolValue - ARM specific constantpool value.
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
bool expandPostRAPseudo(MachineInstr &MI) const override
void setIsUndef(bool Val=true)
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, int CmpMask, bool CommonUse)
isSuitableForMask - Identify a suitable &#39;and&#39; instruction that operates on the given source register ...
bool isPredicated(const MachineInstr &MI) const override
virtual MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const
Clones instruction or the whole instruction bundle Orig and insert into MBB before InsertBefore...
unsigned getSubReg() const
bool isPredicable(QueryType Type=AllInBundle) const
Return true if this instruction has a predicate operand that controls execution.
Definition: MachineInstr.h:515
bool isV8EligibleForIT(const InstrType *Instr)
Definition: ARMFeatures.h:25
static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, unsigned SReg, unsigned &Lane)
bool isRegSequence() const
Definition: MachineInstr.h:852
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:302
bool isSOImmTwoPartVal(unsigned V)
isSOImmTwoPartVal - Return true if the specified value can be obtained by or&#39;ing together two SOImmVa...
VarInfo - This represents the regions where a virtual register is live in the program.
Definition: LiveVariables.h:79
A debug info location.
Definition: DebugLoc.h:34
void setIsDead(bool Val=true)
unsigned getPartialRegUpdateClearance(const MachineInstr &, unsigned, const TargetRegisterInfo *) const override
setjmp/longjmp based exceptions
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
bool isLdstScaledRegNotPlusLsl2(const MachineInstr &MI, unsigned Op) const
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
bool mayLoad() const
Return true if this instruction could possibly read memory.
Definition: MCInstrDesc.h:387
unsigned getSOImmTwoPartSecond(unsigned V)
getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal, return the second chunk of ...
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:335
bool isCopyLike() const
Return true if the instruction behaves like a copy.
Definition: MachineInstr.h:874
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsRenamable(bool Val=true)
static bool isThumb(const MCSubtargetInfo &STI)
unsigned getSpillSize(const TargetRegisterClass &RC) const
Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...
static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1)
return AArch64::GPR64RegClass contains(Reg)
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
bool removeKill(MachineInstr &MI)
removeKill - Delete a kill corresponding to the specified machine instruction.
Definition: LiveVariables.h:94
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
void clearKillInfo()
Clears kill flags on all operands.
static bool isCPSRDefined(const MachineInstr &MI)
Can load/store 1 register/cycle.
Definition: ARMSubtarget.h:123
static uint32_t getAlignment(const MCSectionCOFF &Sec)
bool isFpMLxInstruction(unsigned Opcode) const
isFpMLxInstruction - Return true if the specified opcode is a fp MLA / MLS instruction.
A description of a memory reference used in the backend.
bool isMachineConstantPoolEntry() const
isMachineConstantPoolEntry - Return true if the MachineConstantPoolEntry is indeed a target specific ...
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
ShiftOpc getAM2ShiftOpc(unsigned AM2Opc)
unsigned getSOImmValRotate(unsigned Imm)
getSOImmValRotate - Try to handle Imm with an immediate shifter operand, computing the rotate amount ...
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:296
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, unsigned &SrcReg2, int &CmpMask, int &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2 if h...
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void setImplicit(bool Val=true)
static bool isLoad(int Opcode)
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int &Offset, const ARMBaseInstrInfo &TII)
rewriteARMFrameIndex / rewriteT2FrameIndex - Rewrite MI to access &#39;Offset&#39; bytes from the FP...
Reg
All possible values of the reg field in the ModR/M byte.
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:477
This file contains the simple types necessary to represent the attributes associated with functions a...
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
const MachineInstrBuilder & AddDReg(MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI) const
void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override
bool isAm2ScaledReg(const MachineInstr &MI, unsigned Op) const
The memory access is dereferenceable (i.e., doesn&#39;t trap).
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:293
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override
const InstrItineraryData * getInstrItineraries() const
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override
shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to determine (in conjunction w...
const char * getSymbolName() const
static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC)
getCmpToAddCondition - assume the flags are set by CMP(a,b), return the condition code if we modify t...
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.