LLVM  6.0.0svn
ARMBaseInstrInfo.cpp
Go to the documentation of this file.
1 //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the Base ARM implementation of the TargetInstrInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMBaseRegisterInfo.h"
16 #include "ARMConstantPoolValue.h"
17 #include "ARMFeatures.h"
18 #include "ARMHazardRecognizer.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/Triple.h"
43 #include "llvm/IR/Attributes.h"
44 #include "llvm/IR/Constants.h"
45 #include "llvm/IR/DebugLoc.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/GlobalValue.h"
48 #include "llvm/MC/MCAsmInfo.h"
49 #include "llvm/MC/MCInstrDesc.h"
52 #include "llvm/Support/Casting.h"
54 #include "llvm/Support/Compiler.h"
55 #include "llvm/Support/Debug.h"
59 #include <algorithm>
60 #include <cassert>
61 #include <cstdint>
62 #include <iterator>
63 #include <new>
64 #include <utility>
65 #include <vector>
66 
67 using namespace llvm;
68 
69 #define DEBUG_TYPE "arm-instrinfo"
70 
71 #define GET_INSTRINFO_CTOR_DTOR
72 #include "ARMGenInstrInfo.inc"
73 
74 static cl::opt<bool>
75 EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
76  cl::desc("Enable ARM 2-addr to 3-addr conv"));
77 
78 /// ARM_MLxEntry - Record information about MLA / MLS instructions.
79 struct ARM_MLxEntry {
80  uint16_t MLxOpc; // MLA / MLS opcode
81  uint16_t MulOpc; // Expanded multiplication opcode
82  uint16_t AddSubOpc; // Expanded add / sub opcode
83  bool NegAcc; // True if the acc is negated before the add / sub.
84  bool HasLane; // True if instruction has an extra "lane" operand.
85 };
86 
87 static const ARM_MLxEntry ARM_MLxTable[] = {
88  // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
89  // fp scalar ops
90  { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
91  { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
92  { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
93  { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
94  { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
95  { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
96  { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
97  { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
98 
99  // fp SIMD ops
100  { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
101  { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
102  { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
103  { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
104  { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
105  { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
106  { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
107  { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
108 };
109 
111  : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
112  Subtarget(STI) {
113  for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
114  if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
115  llvm_unreachable("Duplicated entries?");
116  MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
117  MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
118  }
119 }
120 
121 // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
122 // currently defaults to no prepass hazard recognizer.
125  const ScheduleDAG *DAG) const {
126  if (usePreRAHazardRecognizer()) {
127  const InstrItineraryData *II =
128  static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
129  return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
130  }
132 }
133 
136  const ScheduleDAG *DAG) const {
137  if (Subtarget.isThumb2() || Subtarget.hasVFP2())
138  return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG);
140 }
141 
144  // FIXME: Thumb2 support.
145 
146  if (!EnableARM3Addr)
147  return nullptr;
148 
149  MachineFunction &MF = *MI.getParent()->getParent();
150  uint64_t TSFlags = MI.getDesc().TSFlags;
151  bool isPre = false;
152  switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
153  default: return nullptr;
154  case ARMII::IndexModePre:
155  isPre = true;
156  break;
158  break;
159  }
160 
161  // Try splitting an indexed load/store to an un-indexed one plus an add/sub
162  // operation.
163  unsigned MemOpc = getUnindexedOpcode(MI.getOpcode());
164  if (MemOpc == 0)
165  return nullptr;
166 
167  MachineInstr *UpdateMI = nullptr;
168  MachineInstr *MemMI = nullptr;
169  unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
170  const MCInstrDesc &MCID = MI.getDesc();
171  unsigned NumOps = MCID.getNumOperands();
172  bool isLoad = !MI.mayStore();
173  const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0);
174  const MachineOperand &Base = MI.getOperand(2);
175  const MachineOperand &Offset = MI.getOperand(NumOps - 3);
176  unsigned WBReg = WB.getReg();
177  unsigned BaseReg = Base.getReg();
178  unsigned OffReg = Offset.getReg();
179  unsigned OffImm = MI.getOperand(NumOps - 2).getImm();
180  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm();
181  switch (AddrMode) {
182  default: llvm_unreachable("Unknown indexed op!");
183  case ARMII::AddrMode2: {
184  bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
185  unsigned Amt = ARM_AM::getAM2Offset(OffImm);
186  if (OffReg == 0) {
187  if (ARM_AM::getSOImmVal(Amt) == -1)
188  // Can't encode it in a so_imm operand. This transformation will
189  // add more than 1 instruction. Abandon!
190  return nullptr;
191  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
192  get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
193  .addReg(BaseReg)
194  .addImm(Amt)
195  .add(predOps(Pred))
196  .add(condCodeOp());
197  } else if (Amt != 0) {
199  unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
200  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
201  get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
202  .addReg(BaseReg)
203  .addReg(OffReg)
204  .addReg(0)
205  .addImm(SOOpc)
206  .add(predOps(Pred))
207  .add(condCodeOp());
208  } else
209  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
210  get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
211  .addReg(BaseReg)
212  .addReg(OffReg)
213  .add(predOps(Pred))
214  .add(condCodeOp());
215  break;
216  }
217  case ARMII::AddrMode3 : {
218  bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
219  unsigned Amt = ARM_AM::getAM3Offset(OffImm);
220  if (OffReg == 0)
221  // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
222  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
223  get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
224  .addReg(BaseReg)
225  .addImm(Amt)
226  .add(predOps(Pred))
227  .add(condCodeOp());
228  else
229  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
230  get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
231  .addReg(BaseReg)
232  .addReg(OffReg)
233  .add(predOps(Pred))
234  .add(condCodeOp());
235  break;
236  }
237  }
238 
239  std::vector<MachineInstr*> NewMIs;
240  if (isPre) {
241  if (isLoad)
242  MemMI =
243  BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
244  .addReg(WBReg)
245  .addImm(0)
246  .addImm(Pred);
247  else
248  MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
249  .addReg(MI.getOperand(1).getReg())
250  .addReg(WBReg)
251  .addReg(0)
252  .addImm(0)
253  .addImm(Pred);
254  NewMIs.push_back(MemMI);
255  NewMIs.push_back(UpdateMI);
256  } else {
257  if (isLoad)
258  MemMI =
259  BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
260  .addReg(BaseReg)
261  .addImm(0)
262  .addImm(Pred);
263  else
264  MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
265  .addReg(MI.getOperand(1).getReg())
266  .addReg(BaseReg)
267  .addReg(0)
268  .addImm(0)
269  .addImm(Pred);
270  if (WB.isDead())
271  UpdateMI->getOperand(0).setIsDead();
272  NewMIs.push_back(UpdateMI);
273  NewMIs.push_back(MemMI);
274  }
275 
276  // Transfer LiveVariables states, kill / dead info.
277  if (LV) {
278  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
279  MachineOperand &MO = MI.getOperand(i);
281  unsigned Reg = MO.getReg();
282 
284  if (MO.isDef()) {
285  MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
286  if (MO.isDead())
287  LV->addVirtualRegisterDead(Reg, *NewMI);
288  }
289  if (MO.isUse() && MO.isKill()) {
290  for (unsigned j = 0; j < 2; ++j) {
291  // Look at the two new MI's in reverse order.
292  MachineInstr *NewMI = NewMIs[j];
293  if (!NewMI->readsRegister(Reg))
294  continue;
295  LV->addVirtualRegisterKilled(Reg, *NewMI);
296  if (VI.removeKill(MI))
297  VI.Kills.push_back(NewMI);
298  break;
299  }
300  }
301  }
302  }
303  }
304 
306  MFI->insert(MBBI, NewMIs[1]);
307  MFI->insert(MBBI, NewMIs[0]);
308  return NewMIs[0];
309 }
310 
311 // Branch analysis.
313  MachineBasicBlock *&TBB,
314  MachineBasicBlock *&FBB,
316  bool AllowModify) const {
317  TBB = nullptr;
318  FBB = nullptr;
319 
321  if (I == MBB.begin())
322  return false; // Empty blocks are easy.
323  --I;
324 
325  // Walk backwards from the end of the basic block until the branch is
326  // analyzed or we give up.
327  while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
328  // Flag to be raised on unanalyzeable instructions. This is useful in cases
329  // where we want to clean up on the end of the basic block before we bail
330  // out.
331  bool CantAnalyze = false;
332 
333  // Skip over DEBUG values and predicated nonterminators.
334  while (I->isDebugValue() || !I->isTerminator()) {
335  if (I == MBB.begin())
336  return false;
337  --I;
338  }
339 
340  if (isIndirectBranchOpcode(I->getOpcode()) ||
341  isJumpTableBranchOpcode(I->getOpcode())) {
342  // Indirect branches and jump tables can't be analyzed, but we still want
343  // to clean up any instructions at the tail of the basic block.
344  CantAnalyze = true;
345  } else if (isUncondBranchOpcode(I->getOpcode())) {
346  TBB = I->getOperand(0).getMBB();
347  } else if (isCondBranchOpcode(I->getOpcode())) {
348  // Bail out if we encounter multiple conditional branches.
349  if (!Cond.empty())
350  return true;
351 
352  assert(!FBB && "FBB should have been null.");
353  FBB = TBB;
354  TBB = I->getOperand(0).getMBB();
355  Cond.push_back(I->getOperand(1));
356  Cond.push_back(I->getOperand(2));
357  } else if (I->isReturn()) {
358  // Returns can't be analyzed, but we should run cleanup.
359  CantAnalyze = !isPredicated(*I);
360  } else {
361  // We encountered other unrecognized terminator. Bail out immediately.
362  return true;
363  }
364 
365  // Cleanup code - to be run for unpredicated unconditional branches and
366  // returns.
367  if (!isPredicated(*I) &&
368  (isUncondBranchOpcode(I->getOpcode()) ||
369  isIndirectBranchOpcode(I->getOpcode()) ||
370  isJumpTableBranchOpcode(I->getOpcode()) ||
371  I->isReturn())) {
372  // Forget any previous condition branch information - it no longer applies.
373  Cond.clear();
374  FBB = nullptr;
375 
376  // If we can modify the function, delete everything below this
377  // unconditional branch.
378  if (AllowModify) {
379  MachineBasicBlock::iterator DI = std::next(I);
380  while (DI != MBB.end()) {
381  MachineInstr &InstToDelete = *DI;
382  ++DI;
383  InstToDelete.eraseFromParent();
384  }
385  }
386  }
387 
388  if (CantAnalyze)
389  return true;
390 
391  if (I == MBB.begin())
392  return false;
393 
394  --I;
395  }
396 
397  // We made it past the terminators without bailing out - we must have
398  // analyzed this branch successfully.
399  return false;
400 }
401 
403  int *BytesRemoved) const {
404  assert(!BytesRemoved && "code size not handled");
405 
407  if (I == MBB.end())
408  return 0;
409 
410  if (!isUncondBranchOpcode(I->getOpcode()) &&
411  !isCondBranchOpcode(I->getOpcode()))
412  return 0;
413 
414  // Remove the branch.
415  I->eraseFromParent();
416 
417  I = MBB.end();
418 
419  if (I == MBB.begin()) return 1;
420  --I;
421  if (!isCondBranchOpcode(I->getOpcode()))
422  return 1;
423 
424  // Remove the branch.
425  I->eraseFromParent();
426  return 2;
427 }
428 
430  MachineBasicBlock *TBB,
431  MachineBasicBlock *FBB,
433  const DebugLoc &DL,
434  int *BytesAdded) const {
435  assert(!BytesAdded && "code size not handled");
437  int BOpc = !AFI->isThumbFunction()
438  ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
439  int BccOpc = !AFI->isThumbFunction()
440  ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
441  bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
442 
443  // Shouldn't be a fall through.
444  assert(TBB && "insertBranch must not be told to insert a fallthrough");
445  assert((Cond.size() == 2 || Cond.size() == 0) &&
446  "ARM branch conditions have two components!");
447 
448  // For conditional branches, we use addOperand to preserve CPSR flags.
449 
450  if (!FBB) {
451  if (Cond.empty()) { // Unconditional branch?
452  if (isThumb)
453  BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL));
454  else
455  BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
456  } else
457  BuildMI(&MBB, DL, get(BccOpc))
458  .addMBB(TBB)
459  .addImm(Cond[0].getImm())
460  .add(Cond[1]);
461  return 1;
462  }
463 
464  // Two-way conditional branch.
465  BuildMI(&MBB, DL, get(BccOpc))
466  .addMBB(TBB)
467  .addImm(Cond[0].getImm())
468  .add(Cond[1]);
469  if (isThumb)
470  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
471  else
472  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
473  return 2;
474 }
475 
478  ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
479  Cond[0].setImm(ARMCC::getOppositeCondition(CC));
480  return false;
481 }
482 
484  if (MI.isBundle()) {
487  while (++I != E && I->isInsideBundle()) {
488  int PIdx = I->findFirstPredOperandIdx();
489  if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
490  return true;
491  }
492  return false;
493  }
494 
495  int PIdx = MI.findFirstPredOperandIdx();
496  return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
497 }
498 
501  unsigned Opc = MI.getOpcode();
502  if (isUncondBranchOpcode(Opc)) {
503  MI.setDesc(get(getMatchingCondBranchOpcode(Opc)));
505  .addImm(Pred[0].getImm())
506  .addReg(Pred[1].getReg());
507  return true;
508  }
509 
510  int PIdx = MI.findFirstPredOperandIdx();
511  if (PIdx != -1) {
512  MachineOperand &PMO = MI.getOperand(PIdx);
513  PMO.setImm(Pred[0].getImm());
514  MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
515  return true;
516  }
517  return false;
518 }
519 
521  ArrayRef<MachineOperand> Pred2) const {
522  if (Pred1.size() > 2 || Pred2.size() > 2)
523  return false;
524 
525  ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
526  ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
527  if (CC1 == CC2)
528  return true;
529 
530  switch (CC1) {
531  default:
532  return false;
533  case ARMCC::AL:
534  return true;
535  case ARMCC::HS:
536  return CC2 == ARMCC::HI;
537  case ARMCC::LS:
538  return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
539  case ARMCC::GE:
540  return CC2 == ARMCC::GT;
541  case ARMCC::LE:
542  return CC2 == ARMCC::LT;
543  }
544 }
545 
547  MachineInstr &MI, std::vector<MachineOperand> &Pred) const {
548  bool Found = false;
549  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
550  const MachineOperand &MO = MI.getOperand(i);
551  if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) ||
552  (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) {
553  Pred.push_back(MO);
554  Found = true;
555  }
556  }
557 
558  return Found;
559 }
560 
562  for (const auto &MO : MI.operands())
563  if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
564  return true;
565  return false;
566 }
567 
569  unsigned Op) const {
570  const MachineOperand &Offset = MI.getOperand(Op + 1);
571  return Offset.getReg() != 0;
572 }
573 
574 // Load with negative register offset requires additional 1cyc and +I unit
575 // for Cortex A57
577  unsigned Op) const {
578  const MachineOperand &Offset = MI.getOperand(Op + 1);
579  const MachineOperand &Opc = MI.getOperand(Op + 2);
580  assert(Opc.isImm());
581  assert(Offset.isReg());
582  int64_t OpcImm = Opc.getImm();
583 
584  bool isSub = ARM_AM::getAM3Op(OpcImm) == ARM_AM::sub;
585  return (isSub && Offset.getReg() != 0);
586 }
587 
589  unsigned Op) const {
590  const MachineOperand &Opc = MI.getOperand(Op + 2);
591  unsigned OffImm = Opc.getImm();
592  return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
593 }
594 
595 // Load, scaled register offset, not plus LSL2
597  unsigned Op) const {
598  const MachineOperand &Opc = MI.getOperand(Op + 2);
599  unsigned OffImm = Opc.getImm();
600 
601  bool isAdd = ARM_AM::getAM2Op(OffImm) == ARM_AM::add;
602  unsigned Amt = ARM_AM::getAM2Offset(OffImm);
604  if (ShiftOpc == ARM_AM::no_shift) return false; // not scaled
605  bool SimpleScaled = (isAdd && ShiftOpc == ARM_AM::lsl && Amt == 2);
606  return !SimpleScaled;
607 }
608 
609 // Minus reg for ldstso addr mode
611  unsigned Op) const {
612  unsigned OffImm = MI.getOperand(Op + 2).getImm();
613  return ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
614 }
615 
616 // Load, scaled register offset
618  unsigned Op) const {
619  unsigned OffImm = MI.getOperand(Op + 2).getImm();
620  return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
621 }
622 
623 static bool isEligibleForITBlock(const MachineInstr *MI) {
624  switch (MI->getOpcode()) {
625  default: return true;
626  case ARM::tADC: // ADC (register) T1
627  case ARM::tADDi3: // ADD (immediate) T1
628  case ARM::tADDi8: // ADD (immediate) T2
629  case ARM::tADDrr: // ADD (register) T1
630  case ARM::tAND: // AND (register) T1
631  case ARM::tASRri: // ASR (immediate) T1
632  case ARM::tASRrr: // ASR (register) T1
633  case ARM::tBIC: // BIC (register) T1
634  case ARM::tEOR: // EOR (register) T1
635  case ARM::tLSLri: // LSL (immediate) T1
636  case ARM::tLSLrr: // LSL (register) T1
637  case ARM::tLSRri: // LSR (immediate) T1
638  case ARM::tLSRrr: // LSR (register) T1
639  case ARM::tMUL: // MUL T1
640  case ARM::tMVN: // MVN (register) T1
641  case ARM::tORR: // ORR (register) T1
642  case ARM::tROR: // ROR (register) T1
643  case ARM::tRSB: // RSB (immediate) T1
644  case ARM::tSBC: // SBC (register) T1
645  case ARM::tSUBi3: // SUB (immediate) T1
646  case ARM::tSUBi8: // SUB (immediate) T2
647  case ARM::tSUBrr: // SUB (register) T1
648  return !ARMBaseInstrInfo::isCPSRDefined(*MI);
649  }
650 }
651 
652 /// isPredicable - Return true if the specified instruction can be predicated.
653 /// By default, this returns true for every instruction with a
654 /// PredicateOperand.
656  if (!MI.isPredicable())
657  return false;
658 
659  if (MI.isBundle())
660  return false;
661 
662  if (!isEligibleForITBlock(&MI))
663  return false;
664 
665  const ARMFunctionInfo *AFI =
667 
668  // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
669  // In their ARM encoding, they can't be encoded in a conditional form.
671  return false;
672 
673  if (AFI->isThumb2Function()) {
674  if (getSubtarget().restrictIT())
675  return isV8EligibleForIT(&MI);
676  }
677 
678  return true;
679 }
680 
681 namespace llvm {
682 
683 template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
684  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
685  const MachineOperand &MO = MI->getOperand(i);
686  if (!MO.isReg() || MO.isUndef() || MO.isUse())
687  continue;
688  if (MO.getReg() != ARM::CPSR)
689  continue;
690  if (!MO.isDead())
691  return false;
692  }
693  // all definitions of CPSR are dead
694  return true;
695 }
696 
697 } // end namespace llvm
698 
699 /// GetInstSize - Return the size of the specified MachineInstr.
700 ///
702  const MachineBasicBlock &MBB = *MI.getParent();
703  const MachineFunction *MF = MBB.getParent();
704  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
705 
706  const MCInstrDesc &MCID = MI.getDesc();
707  if (MCID.getSize())
708  return MCID.getSize();
709 
710  // If this machine instr is an inline asm, measure it.
711  if (MI.getOpcode() == ARM::INLINEASM)
712  return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
713  unsigned Opc = MI.getOpcode();
714  switch (Opc) {
715  default:
716  // pseudo-instruction sizes are zero.
717  return 0;
718  case TargetOpcode::BUNDLE:
719  return getInstBundleLength(MI);
720  case ARM::MOVi16_ga_pcrel:
721  case ARM::MOVTi16_ga_pcrel:
722  case ARM::t2MOVi16_ga_pcrel:
723  case ARM::t2MOVTi16_ga_pcrel:
724  return 4;
725  case ARM::MOVi32imm:
726  case ARM::t2MOVi32imm:
727  return 8;
728  case ARM::CONSTPOOL_ENTRY:
729  case ARM::JUMPTABLE_INSTS:
730  case ARM::JUMPTABLE_ADDRS:
731  case ARM::JUMPTABLE_TBB:
732  case ARM::JUMPTABLE_TBH:
733  // If this machine instr is a constant pool entry, its size is recorded as
734  // operand #2.
735  return MI.getOperand(2).getImm();
736  case ARM::Int_eh_sjlj_longjmp:
737  return 16;
738  case ARM::tInt_eh_sjlj_longjmp:
739  return 10;
740  case ARM::tInt_WIN_eh_sjlj_longjmp:
741  return 12;
742  case ARM::Int_eh_sjlj_setjmp:
743  case ARM::Int_eh_sjlj_setjmp_nofp:
744  return 20;
745  case ARM::tInt_eh_sjlj_setjmp:
746  case ARM::t2Int_eh_sjlj_setjmp:
747  case ARM::t2Int_eh_sjlj_setjmp_nofp:
748  return 12;
749  case ARM::SPACE:
750  return MI.getOperand(1).getImm();
751  }
752 }
753 
754 unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
755  unsigned Size = 0;
758  while (++I != E && I->isInsideBundle()) {
759  assert(!I->isBundle() && "No nested bundle!");
760  Size += getInstSizeInBytes(*I);
761  }
762  return Size;
763 }
764 
767  unsigned DestReg, bool KillSrc,
768  const ARMSubtarget &Subtarget) const {
769  unsigned Opc = Subtarget.isThumb()
770  ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
771  : ARM::MRS;
772 
773  MachineInstrBuilder MIB =
774  BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
775 
776  // There is only 1 A/R class MRS instruction, and it always refers to
777  // APSR. However, there are lots of other possibilities on M-class cores.
778  if (Subtarget.isMClass())
779  MIB.addImm(0x800);
780 
781  MIB.add(predOps(ARMCC::AL))
782  .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
783 }
784 
787  unsigned SrcReg, bool KillSrc,
788  const ARMSubtarget &Subtarget) const {
789  unsigned Opc = Subtarget.isThumb()
790  ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
791  : ARM::MSR;
792 
793  MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
794 
795  if (Subtarget.isMClass())
796  MIB.addImm(0x800);
797  else
798  MIB.addImm(8);
799 
800  MIB.addReg(SrcReg, getKillRegState(KillSrc))
803 }
804 
807  const DebugLoc &DL, unsigned DestReg,
808  unsigned SrcReg, bool KillSrc) const {
809  bool GPRDest = ARM::GPRRegClass.contains(DestReg);
810  bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
811 
812  if (GPRDest && GPRSrc) {
813  BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
814  .addReg(SrcReg, getKillRegState(KillSrc))
816  .add(condCodeOp());
817  return;
818  }
819 
820  bool SPRDest = ARM::SPRRegClass.contains(DestReg);
821  bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
822 
823  unsigned Opc = 0;
824  if (SPRDest && SPRSrc)
825  Opc = ARM::VMOVS;
826  else if (GPRDest && SPRSrc)
827  Opc = ARM::VMOVRS;
828  else if (SPRDest && GPRSrc)
829  Opc = ARM::VMOVSR;
830  else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP())
831  Opc = ARM::VMOVD;
832  else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
833  Opc = ARM::VORRq;
834 
835  if (Opc) {
836  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
837  MIB.addReg(SrcReg, getKillRegState(KillSrc));
838  if (Opc == ARM::VORRq)
839  MIB.addReg(SrcReg, getKillRegState(KillSrc));
840  MIB.add(predOps(ARMCC::AL));
841  return;
842  }
843 
844  // Handle register classes that require multiple instructions.
845  unsigned BeginIdx = 0;
846  unsigned SubRegs = 0;
847  int Spacing = 1;
848 
849  // Use VORRq when possible.
850  if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
851  Opc = ARM::VORRq;
852  BeginIdx = ARM::qsub_0;
853  SubRegs = 2;
854  } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
855  Opc = ARM::VORRq;
856  BeginIdx = ARM::qsub_0;
857  SubRegs = 4;
858  // Fall back to VMOVD.
859  } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
860  Opc = ARM::VMOVD;
861  BeginIdx = ARM::dsub_0;
862  SubRegs = 2;
863  } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
864  Opc = ARM::VMOVD;
865  BeginIdx = ARM::dsub_0;
866  SubRegs = 3;
867  } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
868  Opc = ARM::VMOVD;
869  BeginIdx = ARM::dsub_0;
870  SubRegs = 4;
871  } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
872  Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
873  BeginIdx = ARM::gsub_0;
874  SubRegs = 2;
875  } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
876  Opc = ARM::VMOVD;
877  BeginIdx = ARM::dsub_0;
878  SubRegs = 2;
879  Spacing = 2;
880  } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
881  Opc = ARM::VMOVD;
882  BeginIdx = ARM::dsub_0;
883  SubRegs = 3;
884  Spacing = 2;
885  } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
886  Opc = ARM::VMOVD;
887  BeginIdx = ARM::dsub_0;
888  SubRegs = 4;
889  Spacing = 2;
890  } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) {
891  Opc = ARM::VMOVS;
892  BeginIdx = ARM::ssub_0;
893  SubRegs = 2;
894  } else if (SrcReg == ARM::CPSR) {
895  copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
896  return;
897  } else if (DestReg == ARM::CPSR) {
898  copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
899  return;
900  }
901 
902  assert(Opc && "Impossible reg-to-reg copy");
903 
904  const TargetRegisterInfo *TRI = &getRegisterInfo();
906 
907  // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
908  if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
909  BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
910  Spacing = -Spacing;
911  }
912 #ifndef NDEBUG
913  SmallSet<unsigned, 4> DstRegs;
914 #endif
915  for (unsigned i = 0; i != SubRegs; ++i) {
916  unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
917  unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
918  assert(Dst && Src && "Bad sub-register");
919 #ifndef NDEBUG
920  assert(!DstRegs.count(Src) && "destructive vector copy");
921  DstRegs.insert(Dst);
922 #endif
923  Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
924  // VORR takes two source operands.
925  if (Opc == ARM::VORRq)
926  Mov.addReg(Src);
927  Mov = Mov.add(predOps(ARMCC::AL));
928  // MOVr can set CC.
929  if (Opc == ARM::MOVr)
930  Mov = Mov.add(condCodeOp());
931  }
932  // Add implicit super-register defs and kills to the last instruction.
933  Mov->addRegisterDefined(DestReg, TRI);
934  if (KillSrc)
935  Mov->addRegisterKilled(SrcReg, TRI);
936 }
937 
938 const MachineInstrBuilder &
940  unsigned SubIdx, unsigned State,
941  const TargetRegisterInfo *TRI) const {
942  if (!SubIdx)
943  return MIB.addReg(Reg, State);
944 
946  return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
947  return MIB.addReg(Reg, State, SubIdx);
948 }
949 
952  unsigned SrcReg, bool isKill, int FI,
953  const TargetRegisterClass *RC,
954  const TargetRegisterInfo *TRI) const {
955  DebugLoc DL;
956  if (I != MBB.end()) DL = I->getDebugLoc();
957  MachineFunction &MF = *MBB.getParent();
958  MachineFrameInfo &MFI = MF.getFrameInfo();
959  unsigned Align = MFI.getObjectAlignment(FI);
960 
963  MFI.getObjectSize(FI), Align);
964 
965  switch (TRI->getSpillSize(*RC)) {
966  case 4:
967  if (ARM::GPRRegClass.hasSubClassEq(RC)) {
968  BuildMI(MBB, I, DL, get(ARM::STRi12))
969  .addReg(SrcReg, getKillRegState(isKill))
970  .addFrameIndex(FI)
971  .addImm(0)
972  .addMemOperand(MMO)
973  .add(predOps(ARMCC::AL));
974  } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
975  BuildMI(MBB, I, DL, get(ARM::VSTRS))
976  .addReg(SrcReg, getKillRegState(isKill))
977  .addFrameIndex(FI)
978  .addImm(0)
979  .addMemOperand(MMO)
980  .add(predOps(ARMCC::AL));
981  } else
982  llvm_unreachable("Unknown reg class!");
983  break;
984  case 8:
985  if (ARM::DPRRegClass.hasSubClassEq(RC)) {
986  BuildMI(MBB, I, DL, get(ARM::VSTRD))
987  .addReg(SrcReg, getKillRegState(isKill))
988  .addFrameIndex(FI)
989  .addImm(0)
990  .addMemOperand(MMO)
991  .add(predOps(ARMCC::AL));
992  } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
993  if (Subtarget.hasV5TEOps()) {
994  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD));
995  AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
996  AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
997  MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
998  .add(predOps(ARMCC::AL));
999  } else {
1000  // Fallback to STM instruction, which has existed since the dawn of
1001  // time.
1002  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STMIA))
1003  .addFrameIndex(FI)
1004  .addMemOperand(MMO)
1005  .add(predOps(ARMCC::AL));
1006  AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1007  AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1008  }
1009  } else
1010  llvm_unreachable("Unknown reg class!");
1011  break;
1012  case 16:
1013  if (ARM::DPairRegClass.hasSubClassEq(RC)) {
1014  // Use aligned spills if the stack can be realigned.
1015  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1016  BuildMI(MBB, I, DL, get(ARM::VST1q64))
1017  .addFrameIndex(FI)
1018  .addImm(16)
1019  .addReg(SrcReg, getKillRegState(isKill))
1020  .addMemOperand(MMO)
1021  .add(predOps(ARMCC::AL));
1022  } else {
1023  BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
1024  .addReg(SrcReg, getKillRegState(isKill))
1025  .addFrameIndex(FI)
1026  .addMemOperand(MMO)
1027  .add(predOps(ARMCC::AL));
1028  }
1029  } else
1030  llvm_unreachable("Unknown reg class!");
1031  break;
1032  case 24:
1033  if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1034  // Use aligned spills if the stack can be realigned.
1035  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1036  BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
1037  .addFrameIndex(FI)
1038  .addImm(16)
1039  .addReg(SrcReg, getKillRegState(isKill))
1040  .addMemOperand(MMO)
1041  .add(predOps(ARMCC::AL));
1042  } else {
1043  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
1044  .addFrameIndex(FI)
1045  .add(predOps(ARMCC::AL))
1046  .addMemOperand(MMO);
1047  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1048  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1049  AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1050  }
1051  } else
1052  llvm_unreachable("Unknown reg class!");
1053  break;
1054  case 32:
1055  if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
1056  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1057  // FIXME: It's possible to only store part of the QQ register if the
1058  // spilled def has a sub-register index.
1059  BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
1060  .addFrameIndex(FI)
1061  .addImm(16)
1062  .addReg(SrcReg, getKillRegState(isKill))
1063  .addMemOperand(MMO)
1064  .add(predOps(ARMCC::AL));
1065  } else {
1066  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
1067  .addFrameIndex(FI)
1068  .add(predOps(ARMCC::AL))
1069  .addMemOperand(MMO);
1070  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1071  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1072  MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1073  AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1074  }
1075  } else
1076  llvm_unreachable("Unknown reg class!");
1077  break;
1078  case 64:
1079  if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1080  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
1081  .addFrameIndex(FI)
1082  .add(predOps(ARMCC::AL))
1083  .addMemOperand(MMO);
1084  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1085  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1086  MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1087  MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1088  MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
1089  MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
1090  MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
1091  AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
1092  } else
1093  llvm_unreachable("Unknown reg class!");
1094  break;
1095  default:
1096  llvm_unreachable("Unknown reg class!");
1097  }
1098 }
1099 
1101  int &FrameIndex) const {
1102  switch (MI.getOpcode()) {
1103  default: break;
1104  case ARM::STRrs:
1105  case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
1106  if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1107  MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1108  MI.getOperand(3).getImm() == 0) {
1109  FrameIndex = MI.getOperand(1).getIndex();
1110  return MI.getOperand(0).getReg();
1111  }
1112  break;
1113  case ARM::STRi12:
1114  case ARM::t2STRi12:
1115  case ARM::tSTRspi:
1116  case ARM::VSTRD:
1117  case ARM::VSTRS:
1118  if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1119  MI.getOperand(2).getImm() == 0) {
1120  FrameIndex = MI.getOperand(1).getIndex();
1121  return MI.getOperand(0).getReg();
1122  }
1123  break;
1124  case ARM::VST1q64:
1125  case ARM::VST1d64TPseudo:
1126  case ARM::VST1d64QPseudo:
1127  if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
1128  FrameIndex = MI.getOperand(0).getIndex();
1129  return MI.getOperand(2).getReg();
1130  }
1131  break;
1132  case ARM::VSTMQIA:
1133  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1134  FrameIndex = MI.getOperand(1).getIndex();
1135  return MI.getOperand(0).getReg();
1136  }
1137  break;
1138  }
1139 
1140  return 0;
1141 }
1142 
1144  int &FrameIndex) const {
1145  const MachineMemOperand *Dummy;
1146  return MI.mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
1147 }
1148 
1149 void ARMBaseInstrInfo::
1151  unsigned DestReg, int FI,
1152  const TargetRegisterClass *RC,
1153  const TargetRegisterInfo *TRI) const {
1154  DebugLoc DL;
1155  if (I != MBB.end()) DL = I->getDebugLoc();
1156  MachineFunction &MF = *MBB.getParent();
1157  MachineFrameInfo &MFI = MF.getFrameInfo();
1158  unsigned Align = MFI.getObjectAlignment(FI);
1161  MFI.getObjectSize(FI), Align);
1162 
1163  switch (TRI->getSpillSize(*RC)) {
1164  case 4:
1165  if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1166  BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
1167  .addFrameIndex(FI)
1168  .addImm(0)
1169  .addMemOperand(MMO)
1170  .add(predOps(ARMCC::AL));
1171 
1172  } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1173  BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
1174  .addFrameIndex(FI)
1175  .addImm(0)
1176  .addMemOperand(MMO)
1177  .add(predOps(ARMCC::AL));
1178  } else
1179  llvm_unreachable("Unknown reg class!");
1180  break;
1181  case 8:
1182  if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1183  BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
1184  .addFrameIndex(FI)
1185  .addImm(0)
1186  .addMemOperand(MMO)
1187  .add(predOps(ARMCC::AL));
1188  } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1189  MachineInstrBuilder MIB;
1190 
1191  if (Subtarget.hasV5TEOps()) {
1192  MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
1193  AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1194  AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1195  MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1196  .add(predOps(ARMCC::AL));
1197  } else {
1198  // Fallback to LDM instruction, which has existed since the dawn of
1199  // time.
1200  MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
1201  .addFrameIndex(FI)
1202  .addMemOperand(MMO)
1203  .add(predOps(ARMCC::AL));
1204  MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1205  MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1206  }
1207 
1209  MIB.addReg(DestReg, RegState::ImplicitDefine);
1210  } else
1211  llvm_unreachable("Unknown reg class!");
1212  break;
1213  case 16:
1214  if (ARM::DPairRegClass.hasSubClassEq(RC)) {
1215  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1216  BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
1217  .addFrameIndex(FI)
1218  .addImm(16)
1219  .addMemOperand(MMO)
1220  .add(predOps(ARMCC::AL));
1221  } else {
1222  BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
1223  .addFrameIndex(FI)
1224  .addMemOperand(MMO)
1225  .add(predOps(ARMCC::AL));
1226  }
1227  } else
1228  llvm_unreachable("Unknown reg class!");
1229  break;
1230  case 24:
1231  if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1232  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1233  BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
1234  .addFrameIndex(FI)
1235  .addImm(16)
1236  .addMemOperand(MMO)
1237  .add(predOps(ARMCC::AL));
1238  } else {
1239  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1240  .addFrameIndex(FI)
1241  .addMemOperand(MMO)
1242  .add(predOps(ARMCC::AL));
1243  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1244  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1245  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1247  MIB.addReg(DestReg, RegState::ImplicitDefine);
1248  }
1249  } else
1250  llvm_unreachable("Unknown reg class!");
1251  break;
1252  case 32:
1253  if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
1254  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1255  BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
1256  .addFrameIndex(FI)
1257  .addImm(16)
1258  .addMemOperand(MMO)
1259  .add(predOps(ARMCC::AL));
1260  } else {
1261  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1262  .addFrameIndex(FI)
1263  .add(predOps(ARMCC::AL))
1264  .addMemOperand(MMO);
1265  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1266  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1267  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1268  MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1270  MIB.addReg(DestReg, RegState::ImplicitDefine);
1271  }
1272  } else
1273  llvm_unreachable("Unknown reg class!");
1274  break;
1275  case 64:
1276  if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1277  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1278  .addFrameIndex(FI)
1279  .add(predOps(ARMCC::AL))
1280  .addMemOperand(MMO);
1281  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1282  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1283  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1284  MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1285  MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
1286  MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
1287  MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
1288  MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
1290  MIB.addReg(DestReg, RegState::ImplicitDefine);
1291  } else
1292  llvm_unreachable("Unknown reg class!");
1293  break;
1294  default:
1295  llvm_unreachable("Unknown regclass!");
1296  }
1297 }
1298 
1300  int &FrameIndex) const {
1301  switch (MI.getOpcode()) {
1302  default: break;
1303  case ARM::LDRrs:
1304  case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
1305  if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1306  MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1307  MI.getOperand(3).getImm() == 0) {
1308  FrameIndex = MI.getOperand(1).getIndex();
1309  return MI.getOperand(0).getReg();
1310  }
1311  break;
1312  case ARM::LDRi12:
1313  case ARM::t2LDRi12:
1314  case ARM::tLDRspi:
1315  case ARM::VLDRD:
1316  case ARM::VLDRS:
1317  if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1318  MI.getOperand(2).getImm() == 0) {
1319  FrameIndex = MI.getOperand(1).getIndex();
1320  return MI.getOperand(0).getReg();
1321  }
1322  break;
1323  case ARM::VLD1q64:
1324  case ARM::VLD1d64TPseudo:
1325  case ARM::VLD1d64QPseudo:
1326  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1327  FrameIndex = MI.getOperand(1).getIndex();
1328  return MI.getOperand(0).getReg();
1329  }
1330  break;
1331  case ARM::VLDMQIA:
1332  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1333  FrameIndex = MI.getOperand(1).getIndex();
1334  return MI.getOperand(0).getReg();
1335  }
1336  break;
1337  }
1338 
1339  return 0;
1340 }
1341 
1343  int &FrameIndex) const {
1344  const MachineMemOperand *Dummy;
1345  return MI.mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
1346 }
1347 
1348 /// \brief Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
1349 /// depending on whether the result is used.
1350 void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
1351  bool isThumb1 = Subtarget.isThumb1Only();
1352  bool isThumb2 = Subtarget.isThumb2();
1353  const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
1354 
1355  DebugLoc dl = MI->getDebugLoc();
1356  MachineBasicBlock *BB = MI->getParent();
1357 
1358  MachineInstrBuilder LDM, STM;
1359  if (isThumb1 || !MI->getOperand(1).isDead()) {
1360  LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
1361  : isThumb1 ? ARM::tLDMIA_UPD
1362  : ARM::LDMIA_UPD))
1363  .add(MI->getOperand(1));
1364  } else {
1365  LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
1366  }
1367 
1368  if (isThumb1 || !MI->getOperand(0).isDead()) {
1369  STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
1370  : isThumb1 ? ARM::tSTMIA_UPD
1371  : ARM::STMIA_UPD))
1372  .add(MI->getOperand(0));
1373  } else {
1374  STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
1375  }
1376 
1377  LDM.add(MI->getOperand(3)).add(predOps(ARMCC::AL));
1378  STM.add(MI->getOperand(2)).add(predOps(ARMCC::AL));
1379 
1380  // Sort the scratch registers into ascending order.
1381  const TargetRegisterInfo &TRI = getRegisterInfo();
1382  SmallVector<unsigned, 6> ScratchRegs;
1383  for(unsigned I = 5; I < MI->getNumOperands(); ++I)
1384  ScratchRegs.push_back(MI->getOperand(I).getReg());
1385  std::sort(ScratchRegs.begin(), ScratchRegs.end(),
1386  [&TRI](const unsigned &Reg1,
1387  const unsigned &Reg2) -> bool {
1388  return TRI.getEncodingValue(Reg1) <
1389  TRI.getEncodingValue(Reg2);
1390  });
1391 
1392  for (const auto &Reg : ScratchRegs) {
1393  LDM.addReg(Reg, RegState::Define);
1394  STM.addReg(Reg, RegState::Kill);
1395  }
1396 
1397  BB->erase(MI);
1398 }
1399 
1401  if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
1402  assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() &&
1403  "LOAD_STACK_GUARD currently supported only for MachO.");
1404  expandLoadStackGuard(MI);
1405  MI.getParent()->erase(MI);
1406  return true;
1407  }
1408 
1409  if (MI.getOpcode() == ARM::MEMCPY) {
1410  expandMEMCPY(MI);
1411  return true;
1412  }
1413 
1414  // This hook gets to expand COPY instructions before they become
1415  // copyPhysReg() calls. Look for VMOVS instructions that can legally be
1416  // widened to VMOVD. We prefer the VMOVD when possible because it may be
1417  // changed into a VORR that can go down the NEON pipeline.
1418  if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || Subtarget.isFPOnlySP())
1419  return false;
1420 
1421  // Look for a copy between even S-registers. That is where we keep floats
1422  // when using NEON v2f32 instructions for f32 arithmetic.
1423  unsigned DstRegS = MI.getOperand(0).getReg();
1424  unsigned SrcRegS = MI.getOperand(1).getReg();
1425  if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
1426  return false;
1427 
1428  const TargetRegisterInfo *TRI = &getRegisterInfo();
1429  unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
1430  &ARM::DPRRegClass);
1431  unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
1432  &ARM::DPRRegClass);
1433  if (!DstRegD || !SrcRegD)
1434  return false;
1435 
1436  // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
1437  // legal if the COPY already defines the full DstRegD, and it isn't a
1438  // sub-register insertion.
1439  if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
1440  return false;
1441 
1442  // A dead copy shouldn't show up here, but reject it just in case.
1443  if (MI.getOperand(0).isDead())
1444  return false;
1445 
1446  // All clear, widen the COPY.
1447  DEBUG(dbgs() << "widening: " << MI);
1448  MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
1449 
1450  // Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg
1451  // or some other super-register.
1452  int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD);
1453  if (ImpDefIdx != -1)
1454  MI.RemoveOperand(ImpDefIdx);
1455 
1456  // Change the opcode and operands.
1457  MI.setDesc(get(ARM::VMOVD));
1458  MI.getOperand(0).setReg(DstRegD);
1459  MI.getOperand(1).setReg(SrcRegD);
1460  MIB.add(predOps(ARMCC::AL));
1461 
1462  // We are now reading SrcRegD instead of SrcRegS. This may upset the
1463  // register scavenger and machine verifier, so we need to indicate that we
1464  // are reading an undefined value from SrcRegD, but a proper value from
1465  // SrcRegS.
1466  MI.getOperand(1).setIsUndef();
1467  MIB.addReg(SrcRegS, RegState::Implicit);
1468 
1469  // SrcRegD may actually contain an unrelated value in the ssub_1
1470  // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
1471  if (MI.getOperand(1).isKill()) {
1472  MI.getOperand(1).setIsKill(false);
1473  MI.addRegisterKilled(SrcRegS, TRI, true);
1474  }
1475 
1476  DEBUG(dbgs() << "replaced by: " << MI);
1477  return true;
1478 }
1479 
1480 /// Create a copy of a const pool value. Update CPI to the new index and return
1481 /// the label UID.
1482 static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1485 
1486  const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1487  assert(MCPE.isMachineConstantPoolEntry() &&
1488  "Expecting a machine constantpool entry!");
1489  ARMConstantPoolValue *ACPV =
1490  static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1491 
1492  unsigned PCLabelId = AFI->createPICLabelUId();
1493  ARMConstantPoolValue *NewCPV = nullptr;
1494 
1495  // FIXME: The below assumes PIC relocation model and that the function
1496  // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1497  // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1498  // instructions, so that's probably OK, but is PIC always correct when
1499  // we get here?
1500  if (ACPV->isGlobalValue())
1502  cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
1503  4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
1504  else if (ACPV->isExtSymbol())
1505  NewCPV = ARMConstantPoolSymbol::
1506  Create(MF.getFunction()->getContext(),
1507  cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1508  else if (ACPV->isBlockAddress())
1509  NewCPV = ARMConstantPoolConstant::
1510  Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1512  else if (ACPV->isLSDA())
1513  NewCPV = ARMConstantPoolConstant::Create(MF.getFunction(), PCLabelId,
1514  ARMCP::CPLSDA, 4);
1515  else if (ACPV->isMachineBasicBlock())
1516  NewCPV = ARMConstantPoolMBB::
1517  Create(MF.getFunction()->getContext(),
1518  cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1519  else
1520  llvm_unreachable("Unexpected ARM constantpool value type!!");
1521  CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
1522  return PCLabelId;
1523 }
1524 
1527  unsigned DestReg, unsigned SubIdx,
1528  const MachineInstr &Orig,
1529  const TargetRegisterInfo &TRI) const {
1530  unsigned Opcode = Orig.getOpcode();
1531  switch (Opcode) {
1532  default: {
1533  MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
1534  MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1535  MBB.insert(I, MI);
1536  break;
1537  }
1538  case ARM::tLDRpci_pic:
1539  case ARM::t2LDRpci_pic: {
1540  MachineFunction &MF = *MBB.getParent();
1541  unsigned CPI = Orig.getOperand(1).getIndex();
1542  unsigned PCLabelId = duplicateCPV(MF, CPI);
1543  MachineInstrBuilder MIB =
1544  BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
1545  .addConstantPoolIndex(CPI)
1546  .addImm(PCLabelId);
1547  MIB->setMemRefs(Orig.memoperands_begin(), Orig.memoperands_end());
1548  break;
1549  }
1550  }
1551 }
1552 
1553 MachineInstr &
1555  MachineBasicBlock::iterator InsertBefore,
1556  const MachineInstr &Orig) const {
1557  MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
1559  for (;;) {
1560  switch (I->getOpcode()) {
1561  case ARM::tLDRpci_pic:
1562  case ARM::t2LDRpci_pic: {
1563  MachineFunction &MF = *MBB.getParent();
1564  unsigned CPI = I->getOperand(1).getIndex();
1565  unsigned PCLabelId = duplicateCPV(MF, CPI);
1566  I->getOperand(1).setIndex(CPI);
1567  I->getOperand(2).setImm(PCLabelId);
1568  break;
1569  }
1570  }
1571  if (!I->isBundledWithSucc())
1572  break;
1573  ++I;
1574  }
1575  return Cloned;
1576 }
1577 
1579  const MachineInstr &MI1,
1580  const MachineRegisterInfo *MRI) const {
1581  unsigned Opcode = MI0.getOpcode();
1582  if (Opcode == ARM::t2LDRpci ||
1583  Opcode == ARM::t2LDRpci_pic ||
1584  Opcode == ARM::tLDRpci ||
1585  Opcode == ARM::tLDRpci_pic ||
1586  Opcode == ARM::LDRLIT_ga_pcrel ||
1587  Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1588  Opcode == ARM::tLDRLIT_ga_pcrel ||
1589  Opcode == ARM::MOV_ga_pcrel ||
1590  Opcode == ARM::MOV_ga_pcrel_ldr ||
1591  Opcode == ARM::t2MOV_ga_pcrel) {
1592  if (MI1.getOpcode() != Opcode)
1593  return false;
1594  if (MI0.getNumOperands() != MI1.getNumOperands())
1595  return false;
1596 
1597  const MachineOperand &MO0 = MI0.getOperand(1);
1598  const MachineOperand &MO1 = MI1.getOperand(1);
1599  if (MO0.getOffset() != MO1.getOffset())
1600  return false;
1601 
1602  if (Opcode == ARM::LDRLIT_ga_pcrel ||
1603  Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1604  Opcode == ARM::tLDRLIT_ga_pcrel ||
1605  Opcode == ARM::MOV_ga_pcrel ||
1606  Opcode == ARM::MOV_ga_pcrel_ldr ||
1607  Opcode == ARM::t2MOV_ga_pcrel)
1608  // Ignore the PC labels.
1609  return MO0.getGlobal() == MO1.getGlobal();
1610 
1611  const MachineFunction *MF = MI0.getParent()->getParent();
1612  const MachineConstantPool *MCP = MF->getConstantPool();
1613  int CPI0 = MO0.getIndex();
1614  int CPI1 = MO1.getIndex();
1615  const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1616  const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1617  bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1618  bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1619  if (isARMCP0 && isARMCP1) {
1620  ARMConstantPoolValue *ACPV0 =
1621  static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1622  ARMConstantPoolValue *ACPV1 =
1623  static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1624  return ACPV0->hasSameValue(ACPV1);
1625  } else if (!isARMCP0 && !isARMCP1) {
1626  return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1627  }
1628  return false;
1629  } else if (Opcode == ARM::PICLDR) {
1630  if (MI1.getOpcode() != Opcode)
1631  return false;
1632  if (MI0.getNumOperands() != MI1.getNumOperands())
1633  return false;
1634 
1635  unsigned Addr0 = MI0.getOperand(1).getReg();
1636  unsigned Addr1 = MI1.getOperand(1).getReg();
1637  if (Addr0 != Addr1) {
1638  if (!MRI ||
1641  return false;
1642 
1643  // This assumes SSA form.
1644  MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1645  MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1646  // Check if the loaded value, e.g. a constantpool of a global address, are
1647  // the same.
1648  if (!produceSameValue(*Def0, *Def1, MRI))
1649  return false;
1650  }
1651 
1652  for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
1653  // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg
1654  const MachineOperand &MO0 = MI0.getOperand(i);
1655  const MachineOperand &MO1 = MI1.getOperand(i);
1656  if (!MO0.isIdenticalTo(MO1))
1657  return false;
1658  }
1659  return true;
1660  }
1661 
1663 }
1664 
1665 /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1666 /// determine if two loads are loading from the same base address. It should
1667 /// only return true if the base pointers are the same and the only differences
1668 /// between the two addresses is the offset. It also returns the offsets by
1669 /// reference.
1670 ///
1671 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1672 /// is permanently disabled.
1674  int64_t &Offset1,
1675  int64_t &Offset2) const {
1676  // Don't worry about Thumb: just ARM and Thumb2.
1677  if (Subtarget.isThumb1Only()) return false;
1678 
1679  if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1680  return false;
1681 
1682  switch (Load1->getMachineOpcode()) {
1683  default:
1684  return false;
1685  case ARM::LDRi12:
1686  case ARM::LDRBi12:
1687  case ARM::LDRD:
1688  case ARM::LDRH:
1689  case ARM::LDRSB:
1690  case ARM::LDRSH:
1691  case ARM::VLDRD:
1692  case ARM::VLDRS:
1693  case ARM::t2LDRi8:
1694  case ARM::t2LDRBi8:
1695  case ARM::t2LDRDi8:
1696  case ARM::t2LDRSHi8:
1697  case ARM::t2LDRi12:
1698  case ARM::t2LDRBi12:
1699  case ARM::t2LDRSHi12:
1700  break;
1701  }
1702 
1703  switch (Load2->getMachineOpcode()) {
1704  default:
1705  return false;
1706  case ARM::LDRi12:
1707  case ARM::LDRBi12:
1708  case ARM::LDRD:
1709  case ARM::LDRH:
1710  case ARM::LDRSB:
1711  case ARM::LDRSH:
1712  case ARM::VLDRD:
1713  case ARM::VLDRS:
1714  case ARM::t2LDRi8:
1715  case ARM::t2LDRBi8:
1716  case ARM::t2LDRSHi8:
1717  case ARM::t2LDRi12:
1718  case ARM::t2LDRBi12:
1719  case ARM::t2LDRSHi12:
1720  break;
1721  }
1722 
1723  // Check if base addresses and chain operands match.
1724  if (Load1->getOperand(0) != Load2->getOperand(0) ||
1725  Load1->getOperand(4) != Load2->getOperand(4))
1726  return false;
1727 
1728  // Index should be Reg0.
1729  if (Load1->getOperand(3) != Load2->getOperand(3))
1730  return false;
1731 
1732  // Determine the offsets.
1733  if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
1734  isa<ConstantSDNode>(Load2->getOperand(1))) {
1735  Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
1736  Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
1737  return true;
1738  }
1739 
1740  return false;
1741 }
1742 
1743 /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
1744 /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
1745 /// be scheduled togther. On some targets if two loads are loading from
1746 /// addresses in the same cache line, it's better if they are scheduled
1747 /// together. This function takes two integers that represent the load offsets
1748 /// from the common base address. It returns true if it decides it's desirable
1749 /// to schedule the two loads together. "NumLoads" is the number of loads that
1750 /// have already been scheduled after Load1.
1751 ///
1752 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1753 /// is permanently disabled.
1755  int64_t Offset1, int64_t Offset2,
1756  unsigned NumLoads) const {
1757  // Don't worry about Thumb: just ARM and Thumb2.
1758  if (Subtarget.isThumb1Only()) return false;
1759 
1760  assert(Offset2 > Offset1);
1761 
1762  if ((Offset2 - Offset1) / 8 > 64)
1763  return false;
1764 
1765  // Check if the machine opcodes are different. If they are different
1766  // then we consider them to not be of the same base address,
1767  // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
1768  // In this case, they are considered to be the same because they are different
1769  // encoding forms of the same basic instruction.
1770  if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
1771  !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
1772  Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
1773  (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
1774  Load2->getMachineOpcode() == ARM::t2LDRBi8)))
1775  return false; // FIXME: overly conservative?
1776 
1777  // Four loads in a row should be sufficient.
1778  if (NumLoads >= 3)
1779  return false;
1780 
1781  return true;
1782 }
1783 
1785  const MachineBasicBlock *MBB,
1786  const MachineFunction &MF) const {
1787  // Debug info is never a scheduling boundary. It's necessary to be explicit
1788  // due to the special treatment of IT instructions below, otherwise a
1789  // dbg_value followed by an IT will result in the IT instruction being
1790  // considered a scheduling hazard, which is wrong. It should be the actual
1791  // instruction preceding the dbg_value instruction(s), just like it is
1792  // when debug info is not present.
1793  if (MI.isDebugValue())
1794  return false;
1795 
1796  // Terminators and labels can't be scheduled around.
1797  if (MI.isTerminator() || MI.isPosition())
1798  return true;
1799 
1800  // Treat the start of the IT block as a scheduling boundary, but schedule
1801  // t2IT along with all instructions following it.
1802  // FIXME: This is a big hammer. But the alternative is to add all potential
1803  // true and anti dependencies to IT block instructions as implicit operands
1804  // to the t2IT instruction. The added compile time and complexity does not
1805  // seem worth it.
1807  // Make sure to skip any dbg_value instructions
1808  while (++I != MBB->end() && I->isDebugValue())
1809  ;
1810  if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
1811  return true;
1812 
1813  // Don't attempt to schedule around any instruction that defines
1814  // a stack-oriented pointer, as it's unlikely to be profitable. This
1815  // saves compile time, because it doesn't require every single
1816  // stack slot reference to depend on the instruction that does the
1817  // modification.
1818  // Calls don't actually change the stack pointer, even if they have imp-defs.
1819  // No ARM calling conventions change the stack pointer. (X86 calling
1820  // conventions sometimes do).
1821  if (!MI.isCall() && MI.definesRegister(ARM::SP))
1822  return true;
1823 
1824  return false;
1825 }
1826 
1827 bool ARMBaseInstrInfo::
1829  unsigned NumCycles, unsigned ExtraPredCycles,
1830  BranchProbability Probability) const {
1831  if (!NumCycles)
1832  return false;
1833 
1834  // If we are optimizing for size, see if the branch in the predecessor can be
1835  // lowered to cbn?z by the constant island lowering pass, and return false if
1836  // so. This results in a shorter instruction sequence.
1837  if (MBB.getParent()->getFunction()->optForSize()) {
1838  MachineBasicBlock *Pred = *MBB.pred_begin();
1839  if (!Pred->empty()) {
1840  MachineInstr *LastMI = &*Pred->rbegin();
1841  if (LastMI->getOpcode() == ARM::t2Bcc) {
1842  MachineBasicBlock::iterator CmpMI = LastMI;
1843  if (CmpMI != Pred->begin()) {
1844  --CmpMI;
1845  if (CmpMI->getOpcode() == ARM::tCMPi8 ||
1846  CmpMI->getOpcode() == ARM::t2CMPri) {
1847  unsigned Reg = CmpMI->getOperand(0).getReg();
1848  unsigned PredReg = 0;
1849  ARMCC::CondCodes P = getInstrPredicate(*CmpMI, PredReg);
1850  if (P == ARMCC::AL && CmpMI->getOperand(1).getImm() == 0 &&
1851  isARMLowRegister(Reg))
1852  return false;
1853  }
1854  }
1855  }
1856  }
1857  }
1858  return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
1859  MBB, 0, 0, Probability);
1860 }
1861 
1862 bool ARMBaseInstrInfo::
1864  unsigned TCycles, unsigned TExtra,
1865  MachineBasicBlock &FBB,
1866  unsigned FCycles, unsigned FExtra,
1867  BranchProbability Probability) const {
1868  if (!TCycles)
1869  return false;
1870 
1871  // Attempt to estimate the relative costs of predication versus branching.
1872  // Here we scale up each component of UnpredCost to avoid precision issue when
1873  // scaling TCycles/FCycles by Probability.
1874  const unsigned ScalingUpFactor = 1024;
1875 
1876  unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
1877  unsigned UnpredCost;
1878  if (!Subtarget.hasBranchPredictor()) {
1879  // When we don't have a branch predictor it's always cheaper to not take a
1880  // branch than take it, so we have to take that into account.
1881  unsigned NotTakenBranchCost = 1;
1882  unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
1883  unsigned TUnpredCycles, FUnpredCycles;
1884  if (!FCycles) {
1885  // Triangle: TBB is the fallthrough
1886  TUnpredCycles = TCycles + NotTakenBranchCost;
1887  FUnpredCycles = TakenBranchCost;
1888  } else {
1889  // Diamond: TBB is the block that is branched to, FBB is the fallthrough
1890  TUnpredCycles = TCycles + TakenBranchCost;
1891  FUnpredCycles = FCycles + NotTakenBranchCost;
1892  // The branch at the end of FBB will disappear when it's predicated, so
1893  // discount it from PredCost.
1894  PredCost -= 1 * ScalingUpFactor;
1895  }
1896  // The total cost is the cost of each path scaled by their probabilites
1897  unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
1898  unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
1899  UnpredCost = TUnpredCost + FUnpredCost;
1900  // When predicating assume that the first IT can be folded away but later
1901  // ones cost one cycle each
1902  if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
1903  PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
1904  }
1905  } else {
1906  unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
1907  unsigned FUnpredCost =
1908  Probability.getCompl().scale(FCycles * ScalingUpFactor);
1909  UnpredCost = TUnpredCost + FUnpredCost;
1910  UnpredCost += 1 * ScalingUpFactor; // The branch itself
1911  UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
1912  }
1913 
1914  return PredCost <= UnpredCost;
1915 }
1916 
1917 bool
1919  MachineBasicBlock &FMBB) const {
1920  // Reduce false anti-dependencies to let the target's out-of-order execution
1921  // engine do its thing.
1922  return Subtarget.isProfitableToUnpredicate();
1923 }
1924 
1925 /// getInstrPredicate - If instruction is predicated, returns its predicate
1926 /// condition, otherwise returns AL. It also returns the condition code
1927 /// register by reference.
1929  unsigned &PredReg) {
1930  int PIdx = MI.findFirstPredOperandIdx();
1931  if (PIdx == -1) {
1932  PredReg = 0;
1933  return ARMCC::AL;
1934  }
1935 
1936  PredReg = MI.getOperand(PIdx+1).getReg();
1937  return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
1938 }
1939 
1940 unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {
1941  if (Opc == ARM::B)
1942  return ARM::Bcc;
1943  if (Opc == ARM::tB)
1944  return ARM::tBcc;
1945  if (Opc == ARM::t2B)
1946  return ARM::t2Bcc;
1947 
1948  llvm_unreachable("Unknown unconditional branch opcode!");
1949 }
1950 
1952  bool NewMI,
1953  unsigned OpIdx1,
1954  unsigned OpIdx2) const {
1955  switch (MI.getOpcode()) {
1956  case ARM::MOVCCr:
1957  case ARM::t2MOVCCr: {
1958  // MOVCC can be commuted by inverting the condition.
1959  unsigned PredReg = 0;
1960  ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
1961  // MOVCC AL can't be inverted. Shouldn't happen.
1962  if (CC == ARMCC::AL || PredReg != ARM::CPSR)
1963  return nullptr;
1964  MachineInstr *CommutedMI =
1965  TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
1966  if (!CommutedMI)
1967  return nullptr;
1968  // After swapping the MOVCC operands, also invert the condition.
1969  CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
1970  .setImm(ARMCC::getOppositeCondition(CC));
1971  return CommutedMI;
1972  }
1973  }
1974  return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
1975 }
1976 
1977 /// Identify instructions that can be folded into a MOVCC instruction, and
1978 /// return the defining instruction.
1980  const MachineRegisterInfo &MRI,
1981  const TargetInstrInfo *TII) {
1983  return nullptr;
1984  if (!MRI.hasOneNonDBGUse(Reg))
1985  return nullptr;
1986  MachineInstr *MI = MRI.getVRegDef(Reg);
1987  if (!MI)
1988  return nullptr;
1989  // MI is folded into the MOVCC by predicating it.
1990  if (!MI->isPredicable())
1991  return nullptr;
1992  // Check if MI has any non-dead defs or physreg uses. This also detects
1993  // predicated instructions which will be reading CPSR.
1994  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
1995  const MachineOperand &MO = MI->getOperand(i);
1996  // Reject frame index operands, PEI can't handle the predicated pseudos.
1997  if (MO.isFI() || MO.isCPI() || MO.isJTI())
1998  return nullptr;
1999  if (!MO.isReg())
2000  continue;
2001  // MI can't have any tied operands, that would conflict with predication.
2002  if (MO.isTied())
2003  return nullptr;
2005  return nullptr;
2006  if (MO.isDef() && !MO.isDead())
2007  return nullptr;
2008  }
2009  bool DontMoveAcrossStores = true;
2010  if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
2011  return nullptr;
2012  return MI;
2013 }
2014 
2017  unsigned &TrueOp, unsigned &FalseOp,
2018  bool &Optimizable) const {
2019  assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2020  "Unknown select instruction");
2021  // MOVCC operands:
2022  // 0: Def.
2023  // 1: True use.
2024  // 2: False use.
2025  // 3: Condition code.
2026  // 4: CPSR use.
2027  TrueOp = 1;
2028  FalseOp = 2;
2029  Cond.push_back(MI.getOperand(3));
2030  Cond.push_back(MI.getOperand(4));
2031  // We can always fold a def.
2032  Optimizable = true;
2033  return false;
2034 }
2035 
2036 MachineInstr *
2039  bool PreferFalse) const {
2040  assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2041  "Unknown select instruction");
2044  bool Invert = !DefMI;
2045  if (!DefMI)
2046  DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
2047  if (!DefMI)
2048  return nullptr;
2049 
2050  // Find new register class to use.
2051  MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
2052  unsigned DestReg = MI.getOperand(0).getReg();
2053  const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
2054  if (!MRI.constrainRegClass(DestReg, PreviousClass))
2055  return nullptr;
2056 
2057  // Create a new predicated version of DefMI.
2058  // Rfalse is the first use.
2059  MachineInstrBuilder NewMI =
2060  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
2061 
2062  // Copy all the DefMI operands, excluding its (null) predicate.
2063  const MCInstrDesc &DefDesc = DefMI->getDesc();
2064  for (unsigned i = 1, e = DefDesc.getNumOperands();
2065  i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
2066  NewMI.add(DefMI->getOperand(i));
2067 
2068  unsigned CondCode = MI.getOperand(3).getImm();
2069  if (Invert)
2071  else
2072  NewMI.addImm(CondCode);
2073  NewMI.add(MI.getOperand(4));
2074 
2075  // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
2076  if (NewMI->hasOptionalDef())
2077  NewMI.add(condCodeOp());
2078 
2079  // The output register value when the predicate is false is an implicit
2080  // register operand tied to the first def.
2081  // The tie makes the register allocator ensure the FalseReg is allocated the
2082  // same register as operand 0.
2083  FalseReg.setImplicit();
2084  NewMI.add(FalseReg);
2085  NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
2086 
2087  // Update SeenMIs set: register newly created MI and erase removed DefMI.
2088  SeenMIs.insert(NewMI);
2089  SeenMIs.erase(DefMI);
2090 
2091  // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
2092  // DefMI would be invalid when tranferred inside the loop. Checking for a
2093  // loop is expensive, but at least remove kill flags if they are in different
2094  // BBs.
2095  if (DefMI->getParent() != MI.getParent())
2096  NewMI->clearKillInfo();
2097 
2098  // The caller will erase MI, but not DefMI.
2099  DefMI->eraseFromParent();
2100  return NewMI;
2101 }
2102 
2103 /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
2104 /// instruction is encoded with an 'S' bit is determined by the optional CPSR
2105 /// def operand.
2106 ///
2107 /// This will go away once we can teach tblgen how to set the optional CPSR def
2108 /// operand itself.
2110  uint16_t PseudoOpc;
2111  uint16_t MachineOpc;
2112 };
2113 
2115  {ARM::ADDSri, ARM::ADDri},
2116  {ARM::ADDSrr, ARM::ADDrr},
2117  {ARM::ADDSrsi, ARM::ADDrsi},
2118  {ARM::ADDSrsr, ARM::ADDrsr},
2119 
2120  {ARM::SUBSri, ARM::SUBri},
2121  {ARM::SUBSrr, ARM::SUBrr},
2122  {ARM::SUBSrsi, ARM::SUBrsi},
2123  {ARM::SUBSrsr, ARM::SUBrsr},
2124 
2125  {ARM::RSBSri, ARM::RSBri},
2126  {ARM::RSBSrsi, ARM::RSBrsi},
2127  {ARM::RSBSrsr, ARM::RSBrsr},
2128 
2129  {ARM::tADDSi3, ARM::tADDi3},
2130  {ARM::tADDSi8, ARM::tADDi8},
2131  {ARM::tADDSrr, ARM::tADDrr},
2132  {ARM::tADCS, ARM::tADC},
2133 
2134  {ARM::tSUBSi3, ARM::tSUBi3},
2135  {ARM::tSUBSi8, ARM::tSUBi8},
2136  {ARM::tSUBSrr, ARM::tSUBrr},
2137  {ARM::tSBCS, ARM::tSBC},
2138 
2139  {ARM::t2ADDSri, ARM::t2ADDri},
2140  {ARM::t2ADDSrr, ARM::t2ADDrr},
2141  {ARM::t2ADDSrs, ARM::t2ADDrs},
2142 
2143  {ARM::t2SUBSri, ARM::t2SUBri},
2144  {ARM::t2SUBSrr, ARM::t2SUBrr},
2145  {ARM::t2SUBSrs, ARM::t2SUBrs},
2146 
2147  {ARM::t2RSBSri, ARM::t2RSBri},
2148  {ARM::t2RSBSrs, ARM::t2RSBrs},
2149 };
2150 
2151 unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
2152  for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i)
2153  if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc)
2154  return AddSubFlagsOpcodeMap[i].MachineOpc;
2155  return 0;
2156 }
2157 
2160  const DebugLoc &dl, unsigned DestReg,
2161  unsigned BaseReg, int NumBytes,
2162  ARMCC::CondCodes Pred, unsigned PredReg,
2163  const ARMBaseInstrInfo &TII,
2164  unsigned MIFlags) {
2165  if (NumBytes == 0 && DestReg != BaseReg) {
2166  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
2167  .addReg(BaseReg, RegState::Kill)
2168  .add(predOps(Pred, PredReg))
2169  .add(condCodeOp())
2170  .setMIFlags(MIFlags);
2171  return;
2172  }
2173 
2174  bool isSub = NumBytes < 0;
2175  if (isSub) NumBytes = -NumBytes;
2176 
2177  while (NumBytes) {
2178  unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
2179  unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
2180  assert(ThisVal && "Didn't extract field correctly");
2181 
2182  // We will handle these bits from offset, clear them.
2183  NumBytes &= ~ThisVal;
2184 
2185  assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
2186 
2187  // Build the new ADD / SUB.
2188  unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
2189  BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
2190  .addReg(BaseReg, RegState::Kill)
2191  .addImm(ThisVal)
2192  .add(predOps(Pred, PredReg))
2193  .add(condCodeOp())
2194  .setMIFlags(MIFlags);
2195  BaseReg = DestReg;
2196  }
2197 }
2198 
2200  MachineFunction &MF, MachineInstr *MI,
2201  unsigned NumBytes) {
2202  // This optimisation potentially adds lots of load and store
2203  // micro-operations, it's only really a great benefit to code-size.
2204  if (!MF.getFunction()->optForMinSize())
2205  return false;
2206 
2207  // If only one register is pushed/popped, LLVM can use an LDR/STR
2208  // instead. We can't modify those so make sure we're dealing with an
2209  // instruction we understand.
2210  bool IsPop = isPopOpcode(MI->getOpcode());
2211  bool IsPush = isPushOpcode(MI->getOpcode());
2212  if (!IsPush && !IsPop)
2213  return false;
2214 
2215  bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
2216  MI->getOpcode() == ARM::VLDMDIA_UPD;
2217  bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
2218  MI->getOpcode() == ARM::tPOP ||
2219  MI->getOpcode() == ARM::tPOP_RET;
2220 
2221  assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
2222  MI->getOperand(1).getReg() == ARM::SP)) &&
2223  "trying to fold sp update into non-sp-updating push/pop");
2224 
2225  // The VFP push & pop act on D-registers, so we can only fold an adjustment
2226  // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
2227  // if this is violated.
2228  if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
2229  return false;
2230 
2231  // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
2232  // pred) so the list starts at 4. Thumb1 starts after the predicate.
2233  int RegListIdx = IsT1PushPop ? 2 : 4;
2234 
2235  // Calculate the space we'll need in terms of registers.
2236  unsigned RegsNeeded;
2237  const TargetRegisterClass *RegClass;
2238  if (IsVFPPushPop) {
2239  RegsNeeded = NumBytes / 8;
2240  RegClass = &ARM::DPRRegClass;
2241  } else {
2242  RegsNeeded = NumBytes / 4;
2243  RegClass = &ARM::GPRRegClass;
2244  }
2245 
2246  // We're going to have to strip all list operands off before
2247  // re-adding them since the order matters, so save the existing ones
2248  // for later.
2250 
2251  // We're also going to need the first register transferred by this
2252  // instruction, which won't necessarily be the first register in the list.
2253  unsigned FirstRegEnc = -1;
2254 
2256  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
2257  MachineOperand &MO = MI->getOperand(i);
2258  RegList.push_back(MO);
2259 
2260  if (MO.isReg() && TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
2261  FirstRegEnc = TRI->getEncodingValue(MO.getReg());
2262  }
2263 
2264  const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
2265 
2266  // Now try to find enough space in the reglist to allocate NumBytes.
2267  for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
2268  --CurRegEnc) {
2269  unsigned CurReg = RegClass->getRegister(CurRegEnc);
2270  if (!IsPop) {
2271  // Pushing any register is completely harmless, mark the
2272  // register involved as undef since we don't care about it in
2273  // the slightest.
2274  RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
2275  false, false, true));
2276  --RegsNeeded;
2277  continue;
2278  }
2279 
2280  // However, we can only pop an extra register if it's not live. For
2281  // registers live within the function we might clobber a return value
2282  // register; the other way a register can be live here is if it's
2283  // callee-saved.
2284  if (isCalleeSavedRegister(CurReg, CSRegs) ||
2285  MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
2287  // VFP pops don't allow holes in the register list, so any skip is fatal
2288  // for our transformation. GPR pops do, so we should just keep looking.
2289  if (IsVFPPushPop)
2290  return false;
2291  else
2292  continue;
2293  }
2294 
2295  // Mark the unimportant registers as <def,dead> in the POP.
2296  RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
2297  true));
2298  --RegsNeeded;
2299  }
2300 
2301  if (RegsNeeded > 0)
2302  return false;
2303 
2304  // Finally we know we can profitably perform the optimisation so go
2305  // ahead: strip all existing registers off and add them back again
2306  // in the right order.
2307  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
2308  MI->RemoveOperand(i);
2309 
2310  // Add the complete list back in.
2311  MachineInstrBuilder MIB(MF, &*MI);
2312  for (int i = RegList.size() - 1; i >= 0; --i)
2313  MIB.add(RegList[i]);
2314 
2315  return true;
2316 }
2317 
2318 bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2319  unsigned FrameReg, int &Offset,
2320  const ARMBaseInstrInfo &TII) {
2321  unsigned Opcode = MI.getOpcode();
2322  const MCInstrDesc &Desc = MI.getDesc();
2323  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2324  bool isSub = false;
2325 
2326  // Memory operands in inline assembly always use AddrMode2.
2327  if (Opcode == ARM::INLINEASM)
2328  AddrMode = ARMII::AddrMode2;
2329 
2330  if (Opcode == ARM::ADDri) {
2331  Offset += MI.getOperand(FrameRegIdx+1).getImm();
2332  if (Offset == 0) {
2333  // Turn it into a move.
2334  MI.setDesc(TII.get(ARM::MOVr));
2335  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2336  MI.RemoveOperand(FrameRegIdx+1);
2337  Offset = 0;
2338  return true;
2339  } else if (Offset < 0) {
2340  Offset = -Offset;
2341  isSub = true;
2342  MI.setDesc(TII.get(ARM::SUBri));
2343  }
2344 
2345  // Common case: small offset, fits into instruction.
2346  if (ARM_AM::getSOImmVal(Offset) != -1) {
2347  // Replace the FrameIndex with sp / fp
2348  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2349  MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
2350  Offset = 0;
2351  return true;
2352  }
2353 
2354  // Otherwise, pull as much of the immedidate into this ADDri/SUBri
2355  // as possible.
2356  unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
2357  unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
2358 
2359  // We will handle these bits from offset, clear them.
2360  Offset &= ~ThisImmVal;
2361 
2362  // Get the properly encoded SOImmVal field.
2363  assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2364  "Bit extraction didn't work?");
2365  MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
2366  } else {
2367  unsigned ImmIdx = 0;
2368  int InstrOffs = 0;
2369  unsigned NumBits = 0;
2370  unsigned Scale = 1;
2371  switch (AddrMode) {
2372  case ARMII::AddrMode_i12:
2373  ImmIdx = FrameRegIdx + 1;
2374  InstrOffs = MI.getOperand(ImmIdx).getImm();
2375  NumBits = 12;
2376  break;
2377  case ARMII::AddrMode2:
2378  ImmIdx = FrameRegIdx+2;
2379  InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
2380  if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2381  InstrOffs *= -1;
2382  NumBits = 12;
2383  break;
2384  case ARMII::AddrMode3:
2385  ImmIdx = FrameRegIdx+2;
2386  InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
2387  if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2388  InstrOffs *= -1;
2389  NumBits = 8;
2390  break;
2391  case ARMII::AddrMode4:
2392  case ARMII::AddrMode6:
2393  // Can't fold any offset even if it's zero.
2394  return false;
2395  case ARMII::AddrMode5:
2396  ImmIdx = FrameRegIdx+1;
2397  InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2398  if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2399  InstrOffs *= -1;
2400  NumBits = 8;
2401  Scale = 4;
2402  break;
2403  default:
2404  llvm_unreachable("Unsupported addressing mode!");
2405  }
2406 
2407  Offset += InstrOffs * Scale;
2408  assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2409  if (Offset < 0) {
2410  Offset = -Offset;
2411  isSub = true;
2412  }
2413 
2414  // Attempt to fold address comp. if opcode has offset bits
2415  if (NumBits > 0) {
2416  // Common case: small offset, fits into instruction.
2417  MachineOperand &ImmOp = MI.getOperand(ImmIdx);
2418  int ImmedOffset = Offset / Scale;
2419  unsigned Mask = (1 << NumBits) - 1;
2420  if ((unsigned)Offset <= Mask * Scale) {
2421  // Replace the FrameIndex with sp
2422  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2423  // FIXME: When addrmode2 goes away, this will simplify (like the
2424  // T2 version), as the LDR.i12 versions don't need the encoding
2425  // tricks for the offset value.
2426  if (isSub) {
2427  if (AddrMode == ARMII::AddrMode_i12)
2428  ImmedOffset = -ImmedOffset;
2429  else
2430  ImmedOffset |= 1 << NumBits;
2431  }
2432  ImmOp.ChangeToImmediate(ImmedOffset);
2433  Offset = 0;
2434  return true;
2435  }
2436 
2437  // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2438  ImmedOffset = ImmedOffset & Mask;
2439  if (isSub) {
2440  if (AddrMode == ARMII::AddrMode_i12)
2441  ImmedOffset = -ImmedOffset;
2442  else
2443  ImmedOffset |= 1 << NumBits;
2444  }
2445  ImmOp.ChangeToImmediate(ImmedOffset);
2446  Offset &= ~(Mask*Scale);
2447  }
2448  }
2449 
2450  Offset = (isSub) ? -Offset : Offset;
2451  return Offset == 0;
2452 }
2453 
2454 /// analyzeCompare - For a comparison instruction, return the source registers
2455 /// in SrcReg and SrcReg2 if having two register operands, and the value it
2456 /// compares against in CmpValue. Return true if the comparison instruction
2457 /// can be analyzed.
2458 bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
2459  unsigned &SrcReg2, int &CmpMask,
2460  int &CmpValue) const {
2461  switch (MI.getOpcode()) {
2462  default: break;
2463  case ARM::CMPri:
2464  case ARM::t2CMPri:
2465  case ARM::tCMPi8:
2466  SrcReg = MI.getOperand(0).getReg();
2467  SrcReg2 = 0;
2468  CmpMask = ~0;
2469  CmpValue = MI.getOperand(1).getImm();
2470  return true;
2471  case ARM::CMPrr:
2472  case ARM::t2CMPrr:
2473  SrcReg = MI.getOperand(0).getReg();
2474  SrcReg2 = MI.getOperand(1).getReg();
2475  CmpMask = ~0;
2476  CmpValue = 0;
2477  return true;
2478  case ARM::TSTri:
2479  case ARM::t2TSTri:
2480  SrcReg = MI.getOperand(0).getReg();
2481  SrcReg2 = 0;
2482  CmpMask = MI.getOperand(1).getImm();
2483  CmpValue = 0;
2484  return true;
2485  }
2486 
2487  return false;
2488 }
2489 
2490 /// isSuitableForMask - Identify a suitable 'and' instruction that
2491 /// operates on the given source register and applies the same mask
2492 /// as a 'tst' instruction. Provide a limited look-through for copies.
2493 /// When successful, MI will hold the found instruction.
2494 static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
2495  int CmpMask, bool CommonUse) {
2496  switch (MI->getOpcode()) {
2497  case ARM::ANDri:
2498  case ARM::t2ANDri:
2499  if (CmpMask != MI->getOperand(2).getImm())
2500  return false;
2501  if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
2502  return true;
2503  break;
2504  }
2505 
2506  return false;
2507 }
2508 
2509 /// getSwappedCondition - assume the flags are set by MI(a,b), return
2510 /// the condition code if we modify the instructions such that flags are
2511 /// set by MI(b,a).
2513  switch (CC) {
2514  default: return ARMCC::AL;
2515  case ARMCC::EQ: return ARMCC::EQ;
2516  case ARMCC::NE: return ARMCC::NE;
2517  case ARMCC::HS: return ARMCC::LS;
2518  case ARMCC::LO: return ARMCC::HI;
2519  case ARMCC::HI: return ARMCC::LO;
2520  case ARMCC::LS: return ARMCC::HS;
2521  case ARMCC::GE: return ARMCC::LE;
2522  case ARMCC::LT: return ARMCC::GT;
2523  case ARMCC::GT: return ARMCC::LT;
2524  case ARMCC::LE: return ARMCC::GE;
2525  }
2526 }
2527 
2528 /// isRedundantFlagInstr - check whether the first instruction, whose only
2529 /// purpose is to update flags, can be made redundant.
2530 /// CMPrr can be made redundant by SUBrr if the operands are the same.
2531 /// CMPri can be made redundant by SUBri if the operands are the same.
2532 /// This function can be extended later on.
2533 inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg,
2534  unsigned SrcReg2, int ImmValue,
2535  MachineInstr *OI) {
2536  if ((CmpI->getOpcode() == ARM::CMPrr ||
2537  CmpI->getOpcode() == ARM::t2CMPrr) &&
2538  (OI->getOpcode() == ARM::SUBrr ||
2539  OI->getOpcode() == ARM::t2SUBrr) &&
2540  ((OI->getOperand(1).getReg() == SrcReg &&
2541  OI->getOperand(2).getReg() == SrcReg2) ||
2542  (OI->getOperand(1).getReg() == SrcReg2 &&
2543  OI->getOperand(2).getReg() == SrcReg)))
2544  return true;
2545 
2546  if ((CmpI->getOpcode() == ARM::CMPri ||
2547  CmpI->getOpcode() == ARM::t2CMPri) &&
2548  (OI->getOpcode() == ARM::SUBri ||
2549  OI->getOpcode() == ARM::t2SUBri) &&
2550  OI->getOperand(1).getReg() == SrcReg &&
2551  OI->getOperand(2).getImm() == ImmValue)
2552  return true;
2553  return false;
2554 }
2555 
2556 static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
2557  switch (MI->getOpcode()) {
2558  default: return false;
2559  case ARM::tLSLri:
2560  case ARM::tLSRri:
2561  case ARM::tLSLrr:
2562  case ARM::tLSRrr:
2563  case ARM::tSUBrr:
2564  case ARM::tADDrr:
2565  case ARM::tADDi3:
2566  case ARM::tADDi8:
2567  case ARM::tSUBi3:
2568  case ARM::tSUBi8:
2569  case ARM::tMUL:
2570  IsThumb1 = true;
2572  case ARM::RSBrr:
2573  case ARM::RSBri:
2574  case ARM::RSCrr:
2575  case ARM::RSCri:
2576  case ARM::ADDrr:
2577  case ARM::ADDri:
2578  case ARM::ADCrr:
2579  case ARM::ADCri:
2580  case ARM::SUBrr:
2581  case ARM::SUBri:
2582  case ARM::SBCrr:
2583  case ARM::SBCri:
2584  case ARM::t2RSBri:
2585  case ARM::t2ADDrr:
2586  case ARM::t2ADDri:
2587  case ARM::t2ADCrr:
2588  case ARM::t2ADCri:
2589  case ARM::t2SUBrr:
2590  case ARM::t2SUBri:
2591  case ARM::t2SBCrr:
2592  case ARM::t2SBCri:
2593  case ARM::ANDrr:
2594  case ARM::ANDri:
2595  case ARM::t2ANDrr:
2596  case ARM::t2ANDri:
2597  case ARM::ORRrr:
2598  case ARM::ORRri:
2599  case ARM::t2ORRrr:
2600  case ARM::t2ORRri:
2601  case ARM::EORrr:
2602  case ARM::EORri:
2603  case ARM::t2EORrr:
2604  case ARM::t2EORri:
2605  case ARM::t2LSRri:
2606  case ARM::t2LSRrr:
2607  case ARM::t2LSLri:
2608  case ARM::t2LSLrr:
2609  return true;
2610  }
2611 }
2612 
2613 /// optimizeCompareInstr - Convert the instruction supplying the argument to the
2614 /// comparison into one that sets the zero bit in the flags register;
2615 /// Remove a redundant Compare instruction if an earlier instruction can set the
2616 /// flags in the same way as Compare.
2617 /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
2618 /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
2619 /// condition code of instructions which use the flags.
2621  MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
2622  int CmpValue, const MachineRegisterInfo *MRI) const {
2623  // Get the unique definition of SrcReg.
2624  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2625  if (!MI) return false;
2626 
2627  // Masked compares sometimes use the same register as the corresponding 'and'.
2628  if (CmpMask != ~0) {
2629  if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) {
2630  MI = nullptr;
2632  UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
2633  UI != UE; ++UI) {
2634  if (UI->getParent() != CmpInstr.getParent())
2635  continue;
2636  MachineInstr *PotentialAND = &*UI;
2637  if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
2638  isPredicated(*PotentialAND))
2639  continue;
2640  MI = PotentialAND;
2641  break;
2642  }
2643  if (!MI) return false;
2644  }
2645  }
2646 
2647  // Get ready to iterate backward from CmpInstr.
2648  MachineBasicBlock::iterator I = CmpInstr, E = MI,
2649  B = CmpInstr.getParent()->begin();
2650 
2651  // Early exit if CmpInstr is at the beginning of the BB.
2652  if (I == B) return false;
2653 
2654  // There are two possible candidates which can be changed to set CPSR:
2655  // One is MI, the other is a SUB instruction.
2656  // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
2657  // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
2658  MachineInstr *Sub = nullptr;
2659  if (SrcReg2 != 0)
2660  // MI is not a candidate for CMPrr.
2661  MI = nullptr;
2662  else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
2663  // Conservatively refuse to convert an instruction which isn't in the same
2664  // BB as the comparison.
2665  // For CMPri w/ CmpValue != 0, a Sub may still be a candidate.
2666  // Thus we cannot return here.
2667  if (CmpInstr.getOpcode() == ARM::CMPri ||
2668  CmpInstr.getOpcode() == ARM::t2CMPri)
2669  MI = nullptr;
2670  else
2671  return false;
2672  }
2673 
2674  bool IsThumb1 = false;
2675  if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
2676  return false;
2677 
2678  // We also want to do this peephole for cases like this: if (a*b == 0),
2679  // and optimise away the CMP instruction from the generated code sequence:
2680  // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
2681  // resulting from the select instruction, but these MOVS instructions for
2682  // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
2683  // However, if we only have MOVS instructions in between the CMP and the
2684  // other instruction (the MULS in this example), then the CPSR is dead so we
2685  // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
2686  // reordering and then continue the analysis hoping we can eliminate the
2687  // CMP. This peephole works on the vregs, so is still in SSA form. As a
2688  // consequence, the movs won't redefine/kill the MUL operands which would
2689  // make this reordering illegal.
2690  if (MI && IsThumb1) {
2691  --I;
2692  bool CanReorder = true;
2693  const bool HasStmts = I != E;
2694  for (; I != E; --I) {
2695  if (I->getOpcode() != ARM::tMOVi8) {
2696  CanReorder = false;
2697  break;
2698  }
2699  }
2700  if (HasStmts && CanReorder) {
2701  MI = MI->removeFromParent();
2702  E = CmpInstr;
2703  CmpInstr.getParent()->insert(E, MI);
2704  }
2705  I = CmpInstr;
2706  E = MI;
2707  }
2708 
2709  // Check that CPSR isn't set between the comparison instruction and the one we
2710  // want to change. At the same time, search for Sub.
2711  const TargetRegisterInfo *TRI = &getRegisterInfo();
2712  --I;
2713  for (; I != E; --I) {
2714  const MachineInstr &Instr = *I;
2715 
2716  if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
2717  Instr.readsRegister(ARM::CPSR, TRI))
2718  // This instruction modifies or uses CPSR after the one we want to
2719  // change. We can't do this transformation.
2720  return false;
2721 
2722  // Check whether CmpInstr can be made redundant by the current instruction.
2723  if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) {
2724  Sub = &*I;
2725  break;
2726  }
2727 
2728  if (I == B)
2729  // The 'and' is below the comparison instruction.
2730  return false;
2731  }
2732 
2733  // Return false if no candidates exist.
2734  if (!MI && !Sub)
2735  return false;
2736 
2737  // The single candidate is called MI.
2738  if (!MI) MI = Sub;
2739 
2740  // We can't use a predicated instruction - it doesn't always write the flags.
2741  if (isPredicated(*MI))
2742  return false;
2743 
2744  // Scan forward for the use of CPSR
2745  // When checking against MI: if it's a conditional code that requires
2746  // checking of the V bit or C bit, then this is not safe to do.
2747  // It is safe to remove CmpInstr if CPSR is redefined or killed.
2748  // If we are done with the basic block, we need to check whether CPSR is
2749  // live-out.
2751  OperandsToUpdate;
2752  bool isSafe = false;
2753  I = CmpInstr;
2754  E = CmpInstr.getParent()->end();
2755  while (!isSafe && ++I != E) {
2756  const MachineInstr &Instr = *I;
2757  for (unsigned IO = 0, EO = Instr.getNumOperands();
2758  !isSafe && IO != EO; ++IO) {
2759  const MachineOperand &MO = Instr.getOperand(IO);
2760  if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
2761  isSafe = true;
2762  break;
2763  }
2764  if (!MO.isReg() || MO.getReg() != ARM::CPSR)
2765  continue;
2766  if (MO.isDef()) {
2767  isSafe = true;
2768  break;
2769  }
2770  // Condition code is after the operand before CPSR except for VSELs.
2771  ARMCC::CondCodes CC;
2772  bool IsInstrVSel = true;
2773  switch (Instr.getOpcode()) {
2774  default:
2775  IsInstrVSel = false;
2776  CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
2777  break;
2778  case ARM::VSELEQD:
2779  case ARM::VSELEQS:
2780  CC = ARMCC::EQ;
2781  break;
2782  case ARM::VSELGTD:
2783  case ARM::VSELGTS:
2784  CC = ARMCC::GT;
2785  break;
2786  case ARM::VSELGED:
2787  case ARM::VSELGES:
2788  CC = ARMCC::GE;
2789  break;
2790  case ARM::VSELVSS:
2791  case ARM::VSELVSD:
2792  CC = ARMCC::VS;
2793  break;
2794  }
2795 
2796  if (Sub) {
2798  if (NewCC == ARMCC::AL)
2799  return false;
2800  // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
2801  // on CMP needs to be updated to be based on SUB.
2802  // Push the condition code operands to OperandsToUpdate.
2803  // If it is safe to remove CmpInstr, the condition code of these
2804  // operands will be modified.
2805  if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
2806  Sub->getOperand(2).getReg() == SrcReg) {
2807  // VSel doesn't support condition code update.
2808  if (IsInstrVSel)
2809  return false;
2810  OperandsToUpdate.push_back(
2811  std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
2812  }
2813  } else {
2814  // No Sub, so this is x = <op> y, z; cmp x, 0.
2815  switch (CC) {
2816  case ARMCC::EQ: // Z
2817  case ARMCC::NE: // Z
2818  case ARMCC::MI: // N
2819  case ARMCC::PL: // N
2820  case ARMCC::AL: // none
2821  // CPSR can be used multiple times, we should continue.
2822  break;
2823  case ARMCC::HS: // C
2824  case ARMCC::LO: // C
2825  case ARMCC::VS: // V
2826  case ARMCC::VC: // V
2827  case ARMCC::HI: // C Z
2828  case ARMCC::LS: // C Z
2829  case ARMCC::GE: // N V
2830  case ARMCC::LT: // N V
2831  case ARMCC::GT: // Z N V
2832  case ARMCC::LE: // Z N V
2833  // The instruction uses the V bit or C bit which is not safe.
2834  return false;
2835  }
2836  }
2837  }
2838  }
2839 
2840  // If CPSR is not killed nor re-defined, we should check whether it is
2841  // live-out. If it is live-out, do not optimize.
2842  if (!isSafe) {
2843  MachineBasicBlock *MBB = CmpInstr.getParent();
2845  SE = MBB->succ_end(); SI != SE; ++SI)
2846  if ((*SI)->isLiveIn(ARM::CPSR))
2847  return false;
2848  }
2849 
2850  // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
2851  // set CPSR so this is represented as an explicit output)
2852  if (!IsThumb1) {
2853  MI->getOperand(5).setReg(ARM::CPSR);
2854  MI->getOperand(5).setIsDef(true);
2855  }
2856  assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
2857  CmpInstr.eraseFromParent();
2858 
2859  // Modify the condition code of operands in OperandsToUpdate.
2860  // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
2861  // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
2862  for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
2863  OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
2864 
2865  return true;
2866 }
2867 
2869  unsigned Reg,
2870  MachineRegisterInfo *MRI) const {
2871  // Fold large immediates into add, sub, or, xor.
2872  unsigned DefOpc = DefMI.getOpcode();
2873  if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
2874  return false;
2875  if (!DefMI.getOperand(1).isImm())
2876  // Could be t2MOVi32imm <ga:xx>
2877  return false;
2878 
2879  if (!MRI->hasOneNonDBGUse(Reg))
2880  return false;
2881 
2882  const MCInstrDesc &DefMCID = DefMI.getDesc();
2883  if (DefMCID.hasOptionalDef()) {
2884  unsigned NumOps = DefMCID.getNumOperands();
2885  const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
2886  if (MO.getReg() == ARM::CPSR && !MO.isDead())
2887  // If DefMI defines CPSR and it is not dead, it's obviously not safe
2888  // to delete DefMI.
2889  return false;
2890  }
2891 
2892  const MCInstrDesc &UseMCID = UseMI.getDesc();
2893  if (UseMCID.hasOptionalDef()) {
2894  unsigned NumOps = UseMCID.getNumOperands();
2895  if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
2896  // If the instruction sets the flag, do not attempt this optimization
2897  // since it may change the semantics of the code.
2898  return false;
2899  }
2900 
2901  unsigned UseOpc = UseMI.getOpcode();
2902  unsigned NewUseOpc = 0;
2903  uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();
2904  uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
2905  bool Commute = false;
2906  switch (UseOpc) {
2907  default: return false;
2908  case ARM::SUBrr:
2909  case ARM::ADDrr:
2910  case ARM::ORRrr:
2911  case ARM::EORrr:
2912  case ARM::t2SUBrr:
2913  case ARM::t2ADDrr:
2914  case ARM::t2ORRrr:
2915  case ARM::t2EORrr: {
2916  Commute = UseMI.getOperand(2).getReg() != Reg;
2917  switch (UseOpc) {
2918  default: break;
2919  case ARM::ADDrr:
2920  case ARM::SUBrr:
2921  if (UseOpc == ARM::SUBrr && Commute)
2922  return false;
2923 
2924  // ADD/SUB are special because they're essentially the same operation, so
2925  // we can handle a larger range of immediates.
2926  if (ARM_AM::isSOImmTwoPartVal(ImmVal))
2927  NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
2928  else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
2929  ImmVal = -ImmVal;
2930  NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
2931  } else
2932  return false;
2933  SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
2934  SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
2935  break;
2936  case ARM::ORRrr:
2937  case ARM::EORrr:
2938  if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
2939  return false;
2940  SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
2941  SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
2942  switch (UseOpc) {
2943  default: break;
2944  case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
2945  case ARM::EORrr: NewUseOpc = ARM::EORri; break;
2946  }
2947  break;
2948  case ARM::t2ADDrr:
2949  case ARM::t2SUBrr:
2950  if (UseOpc == ARM::t2SUBrr && Commute)
2951  return false;
2952 
2953  // ADD/SUB are special because they're essentially the same operation, so
2954  // we can handle a larger range of immediates.
2955  if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
2956  NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2ADDri : ARM::t2SUBri;
2957  else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
2958  ImmVal = -ImmVal;
2959  NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2SUBri : ARM::t2ADDri;
2960  } else
2961  return false;
2962  SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
2963  SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
2964  break;
2965  case ARM::t2ORRrr:
2966  case ARM::t2EORrr:
2967  if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
2968  return false;
2969  SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
2970  SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
2971  switch (UseOpc) {
2972  default: break;
2973  case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
2974  case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
2975  }
2976  break;
2977  }
2978  }
2979  }
2980 
2981  unsigned OpIdx = Commute ? 2 : 1;
2982  unsigned Reg1 = UseMI.getOperand(OpIdx).getReg();
2983  bool isKill = UseMI.getOperand(OpIdx).isKill();
2984  unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
2985  BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
2986  NewReg)
2987  .addReg(Reg1, getKillRegState(isKill))
2988  .addImm(SOImmValV1)
2989  .add(predOps(ARMCC::AL))
2990  .add(condCodeOp());
2991  UseMI.setDesc(get(NewUseOpc));
2992  UseMI.getOperand(1).setReg(NewReg);
2993  UseMI.getOperand(1).setIsKill();
2994  UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
2995  DefMI.eraseFromParent();
2996  return true;
2997 }
2998 
2999 static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
3000  const MachineInstr &MI) {
3001  switch (MI.getOpcode()) {
3002  default: {
3003  const MCInstrDesc &Desc = MI.getDesc();
3004  int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
3005  assert(UOps >= 0 && "bad # UOps");
3006  return UOps;
3007  }
3008 
3009  case ARM::LDRrs:
3010  case ARM::LDRBrs:
3011  case ARM::STRrs:
3012  case ARM::STRBrs: {
3013  unsigned ShOpVal = MI.getOperand(3).getImm();
3014  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3015  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3016  if (!isSub &&
3017  (ShImm == 0 ||
3018  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3019  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3020  return 1;
3021  return 2;
3022  }
3023 
3024  case ARM::LDRH:
3025  case ARM::STRH: {
3026  if (!MI.getOperand(2).getReg())
3027  return 1;
3028 
3029  unsigned ShOpVal = MI.getOperand(3).getImm();
3030  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3031  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3032  if (!isSub &&
3033  (ShImm == 0 ||
3034  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3035  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3036  return 1;
3037  return 2;
3038  }
3039 
3040  case ARM::LDRSB:
3041  case ARM::LDRSH:
3042  return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;
3043 
3044  case ARM::LDRSB_POST:
3045  case ARM::LDRSH_POST: {
3046  unsigned Rt = MI.getOperand(0).getReg();
3047  unsigned Rm = MI.getOperand(3).getReg();
3048  return (Rt == Rm) ? 4 : 3;
3049  }
3050 
3051  case ARM::LDR_PRE_REG:
3052  case ARM::LDRB_PRE_REG: {
3053  unsigned Rt = MI.getOperand(0).getReg();
3054  unsigned Rm = MI.getOperand(3).getReg();
3055  if (Rt == Rm)
3056  return 3;
3057  unsigned ShOpVal = MI.getOperand(4).getImm();
3058  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3059  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3060  if (!isSub &&
3061  (ShImm == 0 ||
3062  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3063  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3064  return 2;
3065  return 3;
3066  }
3067 
3068  case ARM::STR_PRE_REG:
3069  case ARM::STRB_PRE_REG: {
3070  unsigned ShOpVal = MI.getOperand(4).getImm();
3071  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3072  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3073  if (!isSub &&
3074  (ShImm == 0 ||
3075  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3076  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3077  return 2;
3078  return 3;
3079  }
3080 
3081  case ARM::LDRH_PRE:
3082  case ARM::STRH_PRE: {
3083  unsigned Rt = MI.getOperand(0).getReg();
3084  unsigned Rm = MI.getOperand(3).getReg();
3085  if (!Rm)
3086  return 2;
3087  if (Rt == Rm)
3088  return 3;
3089  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;
3090  }
3091 
3092  case ARM::LDR_POST_REG:
3093  case ARM::LDRB_POST_REG:
3094  case ARM::LDRH_POST: {
3095  unsigned Rt = MI.getOperand(0).getReg();
3096  unsigned Rm = MI.getOperand(3).getReg();
3097  return (Rt == Rm) ? 3 : 2;
3098  }
3099 
3100  case ARM::LDR_PRE_IMM:
3101  case ARM::LDRB_PRE_IMM:
3102  case ARM::LDR_POST_IMM:
3103  case ARM::LDRB_POST_IMM:
3104  case ARM::STRB_POST_IMM:
3105  case ARM::STRB_POST_REG:
3106  case ARM::STRB_PRE_IMM:
3107  case ARM::STRH_POST:
3108  case ARM::STR_POST_IMM:
3109  case ARM::STR_POST_REG:
3110  case ARM::STR_PRE_IMM:
3111  return 2;
3112 
3113  case ARM::LDRSB_PRE:
3114  case ARM::LDRSH_PRE: {
3115  unsigned Rm = MI.getOperand(3).getReg();
3116  if (Rm == 0)
3117  return 3;
3118  unsigned Rt = MI.getOperand(0).getReg();
3119  if (Rt == Rm)
3120  return 4;
3121  unsigned ShOpVal = MI.getOperand(4).getImm();
3122  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3123  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3124  if (!isSub &&
3125  (ShImm == 0 ||
3126  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3127  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3128  return 3;
3129  return 4;
3130  }
3131 
3132  case ARM::LDRD: {
3133  unsigned Rt = MI.getOperand(0).getReg();
3134  unsigned Rn = MI.getOperand(2).getReg();
3135  unsigned Rm = MI.getOperand(3).getReg();
3136  if (Rm)
3137  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3138  : 3;
3139  return (Rt == Rn) ? 3 : 2;
3140  }
3141 
3142  case ARM::STRD: {
3143  unsigned Rm = MI.getOperand(3).getReg();
3144  if (Rm)
3145  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3146  : 3;
3147  return 2;
3148  }
3149 
3150  case ARM::LDRD_POST:
3151  case ARM::t2LDRD_POST:
3152  return 3;
3153 
3154  case ARM::STRD_POST:
3155  case ARM::t2STRD_POST:
3156  return 4;
3157 
3158  case ARM::LDRD_PRE: {
3159  unsigned Rt = MI.getOperand(0).getReg();
3160  unsigned Rn = MI.getOperand(3).getReg();
3161  unsigned Rm = MI.getOperand(4).getReg();
3162  if (Rm)
3163  return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3164  : 4;
3165  return (Rt == Rn) ? 4 : 3;
3166  }
3167 
3168  case ARM::t2LDRD_PRE: {
3169  unsigned Rt = MI.getOperand(0).getReg();
3170  unsigned Rn = MI.getOperand(3).getReg();
3171  return (Rt == Rn) ? 4 : 3;
3172  }
3173 
3174  case ARM::STRD_PRE: {
3175  unsigned Rm = MI.getOperand(4).getReg();
3176  if (Rm)
3177  return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3178  : 4;
3179  return 3;
3180  }
3181 
3182  case ARM::t2STRD_PRE:
3183  return 3;
3184 
3185  case ARM::t2LDR_POST:
3186  case ARM::t2LDRB_POST:
3187  case ARM::t2LDRB_PRE:
3188  case ARM::t2LDRSBi12:
3189  case ARM::t2LDRSBi8:
3190  case ARM::t2LDRSBpci:
3191  case ARM::t2LDRSBs:
3192  case ARM::t2LDRH_POST:
3193  case ARM::t2LDRH_PRE:
3194  case ARM::t2LDRSBT:
3195  case ARM::t2LDRSB_POST:
3196  case ARM::t2LDRSB_PRE:
3197  case ARM::t2LDRSH_POST:
3198  case ARM::t2LDRSH_PRE:
3199  case ARM::t2LDRSHi12:
3200  case ARM::t2LDRSHi8:
3201  case ARM::t2LDRSHpci:
3202  case ARM::t2LDRSHs:
3203  return 2;
3204 
3205  case ARM::t2LDRDi8: {
3206  unsigned Rt = MI.getOperand(0).getReg();
3207  unsigned Rn = MI.getOperand(2).getReg();
3208  return (Rt == Rn) ? 3 : 2;
3209  }
3210 
3211  case ARM::t2STRB_POST:
3212  case ARM::t2STRB_PRE:
3213  case ARM::t2STRBs:
3214  case ARM::t2STRDi8:
3215  case ARM::t2STRH_POST:
3216  case ARM::t2STRH_PRE:
3217  case ARM::t2STRHs:
3218  case ARM::t2STR_POST:
3219  case ARM::t2STR_PRE:
3220  case ARM::t2STRs:
3221  return 2;
3222  }
3223 }
3224 
3225 // Return the number of 32-bit words loaded by LDM or stored by STM. If this
3226 // can't be easily determined return 0 (missing MachineMemOperand).
3227 //
3228 // FIXME: The current MachineInstr design does not support relying on machine
3229 // mem operands to determine the width of a memory access. Instead, we expect
3230 // the target to provide this information based on the instruction opcode and
3231 // operands. However, using MachineMemOperand is the best solution now for
3232 // two reasons:
3233 //
3234 // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
3235 // operands. This is much more dangerous than using the MachineMemOperand
3236 // sizes because CodeGen passes can insert/remove optional machine operands. In
3237 // fact, it's totally incorrect for preRA passes and appears to be wrong for
3238 // postRA passes as well.
3239 //
3240 // 2) getNumLDMAddresses is only used by the scheduling machine model and any
3241 // machine model that calls this should handle the unknown (zero size) case.
3242 //
3243 // Long term, we should require a target hook that verifies MachineMemOperand
3244 // sizes during MC lowering. That target hook should be local to MC lowering
3245 // because we can't ensure that it is aware of other MI forms. Doing this will
3246 // ensure that MachineMemOperands are correctly propagated through all passes.
3248  unsigned Size = 0;
3250  E = MI.memoperands_end();
3251  I != E; ++I) {
3252  Size += (*I)->getSize();
3253  }
3254  return Size / 4;
3255 }
3256 
3257 static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,
3258  unsigned NumRegs) {
3259  unsigned UOps = 1 + NumRegs; // 1 for address computation.
3260  switch (Opc) {
3261  default:
3262  break;
3263  case ARM::VLDMDIA_UPD:
3264  case ARM::VLDMDDB_UPD:
3265  case ARM::VLDMSIA_UPD:
3266  case ARM::VLDMSDB_UPD:
3267  case ARM::VSTMDIA_UPD:
3268  case ARM::VSTMDDB_UPD:
3269  case ARM::VSTMSIA_UPD:
3270  case ARM::VSTMSDB_UPD:
3271  case ARM::LDMIA_UPD:
3272  case ARM::LDMDA_UPD:
3273  case ARM::LDMDB_UPD:
3274  case ARM::LDMIB_UPD:
3275  case ARM::STMIA_UPD:
3276  case ARM::STMDA_UPD:
3277  case ARM::STMDB_UPD:
3278  case ARM::STMIB_UPD:
3279  case ARM::tLDMIA_UPD:
3280  case ARM::tSTMIA_UPD:
3281  case ARM::t2LDMIA_UPD:
3282  case ARM::t2LDMDB_UPD:
3283  case ARM::t2STMIA_UPD:
3284  case ARM::t2STMDB_UPD:
3285  ++UOps; // One for base register writeback.
3286  break;
3287  case ARM::LDMIA_RET:
3288  case ARM::tPOP_RET:
3289  case ARM::t2LDMIA_RET:
3290  UOps += 2; // One for base reg wb, one for write to pc.
3291  break;
3292  }
3293  return UOps;
3294 }
3295 
3297  const MachineInstr &MI) const {
3298  if (!ItinData || ItinData->isEmpty())
3299  return 1;
3300 
3301  const MCInstrDesc &Desc = MI.getDesc();
3302  unsigned Class = Desc.getSchedClass();
3303  int ItinUOps = ItinData->getNumMicroOps(Class);
3304  if (ItinUOps >= 0) {
3305  if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
3306  return getNumMicroOpsSwiftLdSt(ItinData, MI);
3307 
3308  return ItinUOps;
3309  }
3310 
3311  unsigned Opc = MI.getOpcode();
3312  switch (Opc) {
3313  default:
3314  llvm_unreachable("Unexpected multi-uops instruction!");
3315  case ARM::VLDMQIA:
3316  case ARM::VSTMQIA:
3317  return 2;
3318 
3319  // The number of uOps for load / store multiple are determined by the number
3320  // registers.
3321  //
3322  // On Cortex-A8, each pair of register loads / stores can be scheduled on the
3323  // same cycle. The scheduling for the first load / store must be done
3324  // separately by assuming the address is not 64-bit aligned.
3325  //
3326  // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
3327  // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
3328  // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
3329  case ARM::VLDMDIA:
3330  case ARM::VLDMDIA_UPD:
3331  case ARM::VLDMDDB_UPD:
3332  case ARM::VLDMSIA:
3333  case ARM::VLDMSIA_UPD:
3334  case ARM::VLDMSDB_UPD:
3335  case ARM::VSTMDIA:
3336  case ARM::VSTMDIA_UPD:
3337  case ARM::VSTMDDB_UPD:
3338  case ARM::VSTMSIA:
3339  case ARM::VSTMSIA_UPD:
3340  case ARM::VSTMSDB_UPD: {
3341  unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
3342  return (NumRegs / 2) + (NumRegs % 2) + 1;
3343  }
3344 
3345  case ARM::LDMIA_RET:
3346  case ARM::LDMIA:
3347  case ARM::LDMDA:
3348  case ARM::LDMDB:
3349  case ARM::LDMIB:
3350  case ARM::LDMIA_UPD:
3351  case ARM::LDMDA_UPD:
3352  case ARM::LDMDB_UPD:
3353  case ARM::LDMIB_UPD:
3354  case ARM::STMIA:
3355  case ARM::STMDA:
3356  case ARM::STMDB:
3357  case ARM::STMIB:
3358  case ARM::STMIA_UPD:
3359  case ARM::STMDA_UPD:
3360  case ARM::STMDB_UPD:
3361  case ARM::STMIB_UPD:
3362  case ARM::tLDMIA:
3363  case ARM::tLDMIA_UPD:
3364  case ARM::tSTMIA_UPD:
3365  case ARM::tPOP_RET:
3366  case ARM::tPOP:
3367  case ARM::tPUSH:
3368  case ARM::t2LDMIA_RET:
3369  case ARM::t2LDMIA:
3370  case ARM::t2LDMDB:
3371  case ARM::t2LDMIA_UPD:
3372  case ARM::t2LDMDB_UPD:
3373  case ARM::t2STMIA:
3374  case ARM::t2STMDB:
3375  case ARM::t2STMIA_UPD:
3376  case ARM::t2STMDB_UPD: {
3377  unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
3378  switch (Subtarget.getLdStMultipleTiming()) {
3380  return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs);
3382  // Assume the worst.
3383  return NumRegs;
3385  if (NumRegs < 4)
3386  return 2;
3387  // 4 registers would be issued: 2, 2.
3388  // 5 registers would be issued: 2, 2, 1.
3389  unsigned UOps = (NumRegs / 2);
3390  if (NumRegs % 2)
3391  ++UOps;
3392  return UOps;
3393  }
3395  unsigned UOps = (NumRegs / 2);
3396  // If there are odd number of registers or if it's not 64-bit aligned,
3397  // then it takes an extra AGU (Address Generation Unit) cycle.
3398  if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
3399  (*MI.memoperands_begin())->getAlignment() < 8)
3400  ++UOps;
3401  return UOps;
3402  }
3403  }
3404  }
3405  }
3406  llvm_unreachable("Didn't find the number of microops");
3407 }
3408 
3409 int
3410 ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
3411  const MCInstrDesc &DefMCID,
3412  unsigned DefClass,
3413  unsigned DefIdx, unsigned DefAlign) const {
3414  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3415  if (RegNo <= 0)
3416  // Def is the address writeback.
3417  return ItinData->getOperandCycle(DefClass, DefIdx);
3418 
3419  int DefCycle;
3420  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3421  // (regno / 2) + (regno % 2) + 1
3422  DefCycle = RegNo / 2 + 1;
3423  if (RegNo % 2)
3424  ++DefCycle;
3425  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3426  DefCycle = RegNo;
3427  bool isSLoad = false;
3428 
3429  switch (DefMCID.getOpcode()) {
3430  default: break;
3431  case ARM::VLDMSIA:
3432  case ARM::VLDMSIA_UPD:
3433  case ARM::VLDMSDB_UPD:
3434  isSLoad = true;
3435  break;
3436  }
3437 
3438  // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3439  // then it takes an extra cycle.
3440  if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
3441  ++DefCycle;
3442  } else {
3443  // Assume the worst.
3444  DefCycle = RegNo + 2;
3445  }
3446 
3447  return DefCycle;
3448 }
3449 
3451  unsigned BaseReg = MI.getOperand(0).getReg();
3452  for (unsigned i = 1, sz = MI.getNumOperands(); i < sz; ++i) {
3453  const auto &Op = MI.getOperand(i);
3454  if (Op.isReg() && Op.getReg() == BaseReg)
3455  return true;
3456  }
3457  return false;
3458 }
3459 unsigned
3461  // ins GPR:$Rn, pred:$p (2xOp), reglist:$regs, variable_ops
3462  // (outs GPR:$wb), (ins GPR:$Rn, pred:$p (2xOp), reglist:$regs, variable_ops)
3463  return MI.getNumOperands() + 1 - MI.getDesc().getNumOperands();
3464 }
3465 
3466 int
3467 ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
3468  const MCInstrDesc &DefMCID,
3469  unsigned DefClass,
3470  unsigned DefIdx, unsigned DefAlign) const {
3471  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3472  if (RegNo <= 0)
3473  // Def is the address writeback.
3474  return ItinData->getOperandCycle(DefClass, DefIdx);
3475 
3476  int DefCycle;
3477  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3478  // 4 registers would be issued: 1, 2, 1.
3479  // 5 registers would be issued: 1, 2, 2.
3480  DefCycle = RegNo / 2;
3481  if (DefCycle < 1)
3482  DefCycle = 1;
3483  // Result latency is issue cycle + 2: E2.
3484  DefCycle += 2;
3485  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3486  DefCycle = (RegNo / 2);
3487  // If there are odd number of registers or if it's not 64-bit aligned,
3488  // then it takes an extra AGU (Address Generation Unit) cycle.
3489  if ((RegNo % 2) || DefAlign < 8)
3490  ++DefCycle;
3491  // Result latency is AGU cycles + 2.
3492  DefCycle += 2;
3493  } else {
3494  // Assume the worst.
3495  DefCycle = RegNo + 2;
3496  }
3497 
3498  return DefCycle;
3499 }
3500 
3501 int
3502 ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
3503  const MCInstrDesc &UseMCID,
3504  unsigned UseClass,
3505  unsigned UseIdx, unsigned UseAlign) const {
3506  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3507  if (RegNo <= 0)
3508  return ItinData->getOperandCycle(UseClass, UseIdx);
3509 
3510  int UseCycle;
3511  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3512  // (regno / 2) + (regno % 2) + 1
3513  UseCycle = RegNo / 2 + 1;
3514  if (RegNo % 2)
3515  ++UseCycle;
3516  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3517  UseCycle = RegNo;
3518  bool isSStore = false;
3519 
3520  switch (UseMCID.getOpcode()) {
3521  default: break;
3522  case ARM::VSTMSIA:
3523  case ARM::VSTMSIA_UPD:
3524  case ARM::VSTMSDB_UPD:
3525  isSStore = true;
3526  break;
3527  }
3528 
3529  // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3530  // then it takes an extra cycle.
3531  if ((isSStore && (RegNo % 2)) || UseAlign < 8)
3532  ++UseCycle;
3533  } else {
3534  // Assume the worst.
3535  UseCycle = RegNo + 2;
3536  }
3537 
3538  return UseCycle;
3539 }
3540 
3541 int
3542 ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
3543  const MCInstrDesc &UseMCID,
3544  unsigned UseClass,
3545  unsigned UseIdx, unsigned UseAlign) const {
3546  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3547  if (RegNo <= 0)
3548  return ItinData->getOperandCycle(UseClass, UseIdx);
3549 
3550  int UseCycle;
3551  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3552  UseCycle = RegNo / 2;
3553  if (UseCycle < 2)
3554  UseCycle = 2;
3555  // Read in E3.
3556  UseCycle += 2;
3557  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3558  UseCycle = (RegNo / 2);
3559  // If there are odd number of registers or if it's not 64-bit aligned,
3560  // then it takes an extra AGU (Address Generation Unit) cycle.
3561  if ((RegNo % 2) || UseAlign < 8)
3562  ++UseCycle;
3563  } else {
3564  // Assume the worst.
3565  UseCycle = 1;
3566  }
3567  return UseCycle;
3568 }
3569 
3570 int
3572  const MCInstrDesc &DefMCID,
3573  unsigned DefIdx, unsigned DefAlign,
3574  const MCInstrDesc &UseMCID,
3575  unsigned UseIdx, unsigned UseAlign) const {
3576  unsigned DefClass = DefMCID.getSchedClass();
3577  unsigned UseClass = UseMCID.getSchedClass();
3578 
3579  if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
3580  return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
3581 
3582  // This may be a def / use of a variable_ops instruction, the operand
3583  // latency might be determinable dynamically. Let the target try to
3584  // figure it out.
3585  int DefCycle = -1;
3586  bool LdmBypass = false;
3587  switch (DefMCID.getOpcode()) {
3588  default:
3589  DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
3590  break;
3591 
3592  case ARM::VLDMDIA:
3593  case ARM::VLDMDIA_UPD:
3594  case ARM::VLDMDDB_UPD:
3595  case ARM::VLDMSIA:
3596  case ARM::VLDMSIA_UPD:
3597  case ARM::VLDMSDB_UPD:
3598  DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3599  break;
3600 
3601  case ARM::LDMIA_RET:
3602  case ARM::LDMIA:
3603  case ARM::LDMDA:
3604  case ARM::LDMDB:
3605  case ARM::LDMIB:
3606  case ARM::LDMIA_UPD:
3607  case ARM::LDMDA_UPD:
3608  case ARM::LDMDB_UPD:
3609  case ARM::LDMIB_UPD:
3610  case ARM::tLDMIA:
3611  case ARM::tLDMIA_UPD:
3612  case ARM::tPUSH:
3613  case ARM::t2LDMIA_RET:
3614  case ARM::t2LDMIA:
3615  case ARM::t2LDMDB:
3616  case ARM::t2LDMIA_UPD:
3617  case ARM::t2LDMDB_UPD:
3618  LdmBypass = true;
3619  DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3620  break;
3621  }
3622 
3623  if (DefCycle == -1)
3624  // We can't seem to determine the result latency of the def, assume it's 2.
3625  DefCycle = 2;
3626 
3627  int UseCycle = -1;
3628  switch (UseMCID.getOpcode()) {
3629  default:
3630  UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
3631  break;
3632 
3633  case ARM::VSTMDIA:
3634  case ARM::VSTMDIA_UPD:
3635  case ARM::VSTMDDB_UPD:
3636  case ARM::VSTMSIA:
3637  case ARM::VSTMSIA_UPD:
3638  case ARM::VSTMSDB_UPD:
3639  UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3640  break;
3641 
3642  case ARM::STMIA:
3643  case ARM::STMDA:
3644  case ARM::STMDB:
3645  case ARM::STMIB:
3646  case ARM::STMIA_UPD:
3647  case ARM::STMDA_UPD:
3648  case ARM::STMDB_UPD:
3649  case ARM::STMIB_UPD:
3650  case ARM::tSTMIA_UPD:
3651  case ARM::tPOP_RET:
3652  case ARM::tPOP:
3653  case ARM::t2STMIA:
3654  case ARM::t2STMDB:
3655  case ARM::t2STMIA_UPD:
3656  case ARM::t2STMDB_UPD:
3657  UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3658  break;
3659  }
3660 
3661  if (UseCycle == -1)
3662  // Assume it's read in the first stage.
3663  UseCycle = 1;
3664 
3665  UseCycle = DefCycle - UseCycle + 1;
3666  if (UseCycle > 0) {
3667  if (LdmBypass) {
3668  // It's a variable_ops instruction so we can't use DefIdx here. Just use
3669  // first def operand.
3670  if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
3671  UseClass, UseIdx))
3672  --UseCycle;
3673  } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
3674  UseClass, UseIdx)) {
3675  --UseCycle;
3676  }
3677  }
3678 
3679  return UseCycle;
3680 }
3681 
3683  const MachineInstr *MI, unsigned Reg,
3684  unsigned &DefIdx, unsigned &Dist) {
3685  Dist = 0;
3686 
3689  assert(II->isInsideBundle() && "Empty bundle?");
3690 
3691  int Idx = -1;
3692  while (II->isInsideBundle()) {
3693  Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
3694  if (Idx != -1)
3695  break;
3696  --II;
3697  ++Dist;
3698  }
3699 
3700  assert(Idx != -1 && "Cannot find bundled definition!");
3701  DefIdx = Idx;
3702  return &*II;
3703 }
3704 
3706  const MachineInstr &MI, unsigned Reg,
3707  unsigned &UseIdx, unsigned &Dist) {
3708  Dist = 0;
3709 
3711  assert(II->isInsideBundle() && "Empty bundle?");
3713 
3714  // FIXME: This doesn't properly handle multiple uses.
3715  int Idx = -1;
3716  while (II != E && II->isInsideBundle()) {
3717  Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
3718  if (Idx != -1)
3719  break;
3720  if (II->getOpcode() != ARM::t2IT)
3721  ++Dist;
3722  ++II;
3723  }
3724 
3725  if (Idx == -1) {
3726  Dist = 0;
3727  return nullptr;
3728  }
3729 
3730  UseIdx = Idx;
3731  return &*II;
3732 }
3733 
3734 /// Return the number of cycles to add to (or subtract from) the static
3735 /// itinerary based on the def opcode and alignment. The caller will ensure that
3736 /// adjusted latency is at least one cycle.
3737 static int adjustDefLatency(const ARMSubtarget &Subtarget,
3738  const MachineInstr &DefMI,
3739  const MCInstrDesc &DefMCID, unsigned DefAlign) {
3740  int Adjust = 0;
3741  if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
3742  // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
3743  // variants are one cycle cheaper.
3744  switch (DefMCID.getOpcode()) {
3745  default: break;
3746  case ARM::LDRrs:
3747  case ARM::LDRBrs: {
3748  unsigned ShOpVal = DefMI.getOperand(3).getImm();
3749  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3750  if (ShImm == 0 ||
3751  (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
3752  --Adjust;
3753  break;
3754  }
3755  case ARM::t2LDRs:
3756  case ARM::t2LDRBs:
3757  case ARM::t2LDRHs:
3758  case ARM::t2LDRSHs: {
3759  // Thumb2 mode: lsl only.
3760  unsigned ShAmt = DefMI.getOperand(3).getImm();
3761  if (ShAmt == 0 || ShAmt == 2)
3762  --Adjust;
3763  break;
3764  }
3765  }
3766  } else if (Subtarget.isSwift()) {
3767  // FIXME: Properly handle all of the latency adjustments for address
3768  // writeback.
3769  switch (DefMCID.getOpcode()) {
3770  default: break;
3771  case ARM::LDRrs:
3772  case ARM::LDRBrs: {
3773  unsigned ShOpVal = DefMI.getOperand(3).getImm();
3774  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3775  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3776  if (!isSub &&
3777  (ShImm == 0 ||
3778  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3779  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3780  Adjust -= 2;
3781  else if (!isSub &&
3782  ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
3783  --Adjust;
3784  break;
3785  }
3786  case ARM::t2LDRs:
3787  case ARM::t2LDRBs:
3788  case ARM::t2LDRHs:
3789  case ARM::t2LDRSHs: {
3790  // Thumb2 mode: lsl only.
3791  unsigned ShAmt = DefMI.getOperand(3).getImm();
3792  if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
3793  Adjust -= 2;
3794  break;
3795  }
3796  }
3797  }
3798 
3799  if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
3800  switch (DefMCID.getOpcode()) {
3801  default: break;
3802  case ARM::VLD1q8:
3803  case ARM::VLD1q16:
3804  case ARM::VLD1q32:
3805  case ARM::VLD1q64:
3806  case ARM::VLD1q8wb_fixed:
3807  case ARM::VLD1q16wb_fixed:
3808  case ARM::VLD1q32wb_fixed:
3809  case ARM::VLD1q64wb_fixed:
3810  case ARM::VLD1q8wb_register:
3811  case ARM::VLD1q16wb_register:
3812  case ARM::VLD1q32wb_register:
3813  case ARM::VLD1q64wb_register:
3814  case ARM::VLD2d8:
3815  case ARM::VLD2d16:
3816  case ARM::VLD2d32:
3817  case ARM::VLD2q8:
3818  case ARM::VLD2q16:
3819  case ARM::VLD2q32:
3820  case ARM::VLD2d8wb_fixed:
3821  case ARM::VLD2d16wb_fixed:
3822  case ARM::VLD2d32wb_fixed:
3823  case ARM::VLD2q8wb_fixed:
3824  case ARM::VLD2q16wb_fixed:
3825  case ARM::VLD2q32wb_fixed:
3826  case ARM::VLD2d8wb_register:
3827  case ARM::VLD2d16wb_register:
3828  case ARM::VLD2d32wb_register:
3829  case ARM::VLD2q8wb_register:
3830  case ARM::VLD2q16wb_register:
3831  case ARM::VLD2q32wb_register:
3832  case ARM::VLD3d8:
3833  case ARM::VLD3d16:
3834  case ARM::VLD3d32:
3835  case ARM::VLD1d64T:
3836  case ARM::VLD3d8_UPD:
3837  case ARM::VLD3d16_UPD:
3838  case ARM::VLD3d32_UPD:
3839  case ARM::VLD1d64Twb_fixed:
3840  case ARM::VLD1d64Twb_register:
3841  case ARM::VLD3q8_UPD:
3842  case ARM::VLD3q16_UPD:
3843  case ARM::VLD3q32_UPD:
3844  case ARM::VLD4d8:
3845  case ARM::VLD4d16:
3846  case ARM::VLD4d32:
3847  case ARM::VLD1d64Q:
3848  case ARM::VLD4d8_UPD:
3849  case ARM::VLD4d16_UPD:
3850  case ARM::VLD4d32_UPD:
3851  case ARM::VLD1d64Qwb_fixed:
3852  case ARM::VLD1d64Qwb_register:
3853  case ARM::VLD4q8_UPD:
3854  case ARM::VLD4q16_UPD:
3855  case ARM::VLD4q32_UPD:
3856  case ARM::VLD1DUPq8:
3857  case ARM::VLD1DUPq16:
3858  case ARM::VLD1DUPq32:
3859  case ARM::VLD1DUPq8wb_fixed:
3860  case ARM::VLD1DUPq16wb_fixed:
3861  case ARM::VLD1DUPq32wb_fixed:
3862  case ARM::VLD1DUPq8wb_register:
3863  case ARM::VLD1DUPq16wb_register:
3864  case ARM::VLD1DUPq32wb_register:
3865  case ARM::VLD2DUPd8:
3866  case ARM::VLD2DUPd16:
3867  case ARM::VLD2DUPd32:
3868  case ARM::VLD2DUPd8wb_fixed:
3869  case ARM::VLD2DUPd16wb_fixed:
3870  case ARM::VLD2DUPd32wb_fixed:
3871  case ARM::VLD2DUPd8wb_register:
3872  case ARM::VLD2DUPd16wb_register:
3873  case ARM::VLD2DUPd32wb_register:
3874  case ARM::VLD4DUPd8:
3875  case ARM::VLD4DUPd16:
3876  case ARM::VLD4DUPd32:
3877  case ARM::VLD4DUPd8_UPD:
3878  case ARM::VLD4DUPd16_UPD:
3879  case ARM::VLD4DUPd32_UPD:
3880  case ARM::VLD1LNd8:
3881  case ARM::VLD1LNd16:
3882  case ARM::VLD1LNd32:
3883  case ARM::VLD1LNd8_UPD:
3884  case ARM::VLD1LNd16_UPD:
3885  case ARM::VLD1LNd32_UPD:
3886  case ARM::VLD2LNd8:
3887  case ARM::VLD2LNd16:
3888  case ARM::VLD2LNd32:
3889  case ARM::VLD2LNq16:
3890  case ARM::VLD2LNq32:
3891  case ARM::VLD2LNd8_UPD:
3892  case ARM::VLD2LNd16_UPD:
3893  case ARM::VLD2LNd32_UPD:
3894  case ARM::VLD2LNq16_UPD:
3895  case ARM::VLD2LNq32_UPD:
3896  case ARM::VLD4LNd8:
3897  case ARM::VLD4LNd16:
3898  case ARM::VLD4LNd32:
3899  case ARM::VLD4LNq16:
3900  case ARM::VLD4LNq32:
3901  case ARM::VLD4LNd8_UPD:
3902  case ARM::VLD4LNd16_UPD:
3903  case ARM::VLD4LNd32_UPD:
3904  case ARM::VLD4LNq16_UPD:
3905  case ARM::VLD4LNq32_UPD:
3906  // If the address is not 64-bit aligned, the latencies of these
3907  // instructions increases by one.
3908  ++Adjust;
3909  break;
3910  }
3911  }
3912  return Adjust;
3913 }
3914 
3916  const MachineInstr &DefMI,
3917  unsigned DefIdx,
3918  const MachineInstr &UseMI,
3919  unsigned UseIdx) const {
3920  // No operand latency. The caller may fall back to getInstrLatency.
3921  if (!ItinData || ItinData->isEmpty())
3922  return -1;
3923 
3924  const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
3925  unsigned Reg = DefMO.getReg();
3926 
3927  const MachineInstr *ResolvedDefMI = &DefMI;
3928  unsigned DefAdj = 0;
3929  if (DefMI.isBundle())
3930  ResolvedDefMI =
3931  getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);
3932  if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
3933  ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
3934  return 1;
3935  }
3936 
3937  const MachineInstr *ResolvedUseMI = &UseMI;
3938  unsigned UseAdj = 0;
3939  if (UseMI.isBundle()) {
3940  ResolvedUseMI =
3941  getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
3942  if (!ResolvedUseMI)
3943  return -1;
3944  }
3945 
3946  return getOperandLatencyImpl(
3947  ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
3948  Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
3949 }
3950 
3951 int ARMBaseInstrInfo::getOperandLatencyImpl(
3952  const InstrItineraryData *ItinData, const MachineInstr &DefMI,
3953  unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
3954  const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
3955  unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
3956  if (Reg == ARM::CPSR) {
3957  if (DefMI.getOpcode() == ARM::FMSTAT) {
3958  // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
3959  return Subtarget.isLikeA9() ? 1 : 20;
3960  }
3961 
3962  // CPSR set and branch can be paired in the same cycle.
3963  if (UseMI.isBranch())
3964  return 0;
3965 
3966  // Otherwise it takes the instruction latency (generally one).
3967  unsigned Latency = getInstrLatency(ItinData, DefMI);
3968 
3969  // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
3970  // its uses. Instructions which are otherwise scheduled between them may
3971  // incur a code size penalty (not able to use the CPSR setting 16-bit
3972  // instructions).
3973  if (Latency > 0 && Subtarget.isThumb2()) {
3974  const MachineFunction *MF = DefMI.getParent()->getParent();
3975  // FIXME: Use Function::optForSize().
3976  if (MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
3977  --Latency;
3978  }
3979  return Latency;
3980  }
3981 
3982  if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
3983  return -1;
3984 
3985  unsigned DefAlign = DefMI.hasOneMemOperand()
3986  ? (*DefMI.memoperands_begin())->getAlignment()
3987  : 0;
3988  unsigned UseAlign = UseMI.hasOneMemOperand()
3989  ? (*UseMI.memoperands_begin())->getAlignment()
3990  : 0;
3991 
3992  // Get the itinerary's latency if possible, and handle variable_ops.
3993  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, UseMCID,
3994  UseIdx, UseAlign);
3995  // Unable to find operand latency. The caller may resort to getInstrLatency.
3996  if (Latency < 0)
3997  return Latency;
3998 
3999  // Adjust for IT block position.
4000  int Adj = DefAdj + UseAdj;
4001 
4002  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4003  Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
4004  if (Adj >= 0 || (int)Latency > -Adj) {
4005  return Latency + Adj;
4006  }
4007  // Return the itinerary latency, which may be zero but not less than zero.
4008  return Latency;
4009 }
4010 
4011 int
4013  SDNode *DefNode, unsigned DefIdx,
4014  SDNode *UseNode, unsigned UseIdx) const {
4015  if (!DefNode->isMachineOpcode())
4016  return 1;
4017 
4018  const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
4019 
4020  if (isZeroCost(DefMCID.Opcode))
4021  return 0;
4022 
4023  if (!ItinData || ItinData->isEmpty())
4024  return DefMCID.mayLoad() ? 3 : 1;
4025 
4026  if (!UseNode->isMachineOpcode()) {
4027  int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
4028  int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
4029  int Threshold = 1 + Adj;
4030  return Latency <= Threshold ? 1 : Latency - Adj;
4031  }
4032 
4033  const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
4034  const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
4035  unsigned DefAlign = !DefMN->memoperands_empty()
4036  ? (*DefMN->memoperands_begin())->getAlignment() : 0;
4037  const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
4038  unsigned UseAlign = !UseMN->memoperands_empty()
4039  ? (*UseMN->memoperands_begin())->getAlignment() : 0;
4040  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
4041  UseMCID, UseIdx, UseAlign);
4042 
4043  if (Latency > 1 &&
4044  (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
4045  Subtarget.isCortexA7())) {
4046  // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4047  // variants are one cycle cheaper.
4048  switch (DefMCID.getOpcode()) {
4049  default: break;
4050  case ARM::LDRrs:
4051  case ARM::LDRBrs: {
4052  unsigned ShOpVal =
4053  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4054  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4055  if (ShImm == 0 ||
4056  (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4057  --Latency;
4058  break;
4059  }
4060  case ARM::t2LDRs:
4061  case ARM::t2LDRBs:
4062  case ARM::t2LDRHs:
4063  case ARM::t2LDRSHs: {
4064  // Thumb2 mode: lsl only.
4065  unsigned ShAmt =
4066  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4067  if (ShAmt == 0 || ShAmt == 2)
4068  --Latency;
4069  break;
4070  }
4071  }
4072  } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) {
4073  // FIXME: Properly handle all of the latency adjustments for address
4074  // writeback.
4075  switch (DefMCID.getOpcode()) {
4076  default: break;
4077  case ARM::LDRrs:
4078  case ARM::LDRBrs: {
4079  unsigned ShOpVal =
4080  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4081  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4082  if (ShImm == 0 ||
4083  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4084  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4085  Latency -= 2;
4086  else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4087  --Latency;
4088  break;
4089  }
4090  case ARM::t2LDRs:
4091  case ARM::t2LDRBs:
4092  case ARM::t2LDRHs:
4093  case ARM::t2LDRSHs:
4094  // Thumb2 mode: lsl 0-3 only.
4095  Latency -= 2;
4096  break;
4097  }
4098  }
4099 
4100  if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
4101  switch (DefMCID.getOpcode()) {
4102  default: break;
4103  case ARM::VLD1q8:
4104  case ARM::VLD1q16:
4105  case ARM::VLD1q32:
4106  case ARM::VLD1q64:
4107  case ARM::VLD1q8wb_register:
4108  case ARM::VLD1q16wb_register:
4109  case ARM::VLD1q32wb_register:
4110  case ARM::VLD1q64wb_register:
4111  case ARM::VLD1q8wb_fixed:
4112  case ARM::VLD1q16wb_fixed:
4113  case ARM::VLD1q32wb_fixed:
4114  case ARM::VLD1q64wb_fixed:
4115  case ARM::VLD2d8:
4116  case ARM::VLD2d16:
4117  case ARM::VLD2d32:
4118  case ARM::VLD2q8Pseudo:
4119  case ARM::VLD2q16Pseudo:
4120  case ARM::VLD2q32Pseudo:
4121  case ARM::VLD2d8wb_fixed:
4122  case ARM::VLD2d16wb_fixed:
4123  case ARM::VLD2d32wb_fixed:
4124  case ARM::VLD2q8PseudoWB_fixed:
4125  case ARM::VLD2q16PseudoWB_fixed:
4126  case ARM::VLD2q32PseudoWB_fixed:
4127  case ARM::VLD2d8wb_register:
4128  case ARM::VLD2d16wb_register:
4129  case ARM::VLD2d32wb_register:
4130  case ARM::VLD2q8PseudoWB_register:
4131  case ARM::VLD2q16PseudoWB_register:
4132  case ARM::VLD2q32PseudoWB_register:
4133  case ARM::VLD3d8Pseudo:
4134  case ARM::VLD3d16Pseudo:
4135  case ARM::VLD3d32Pseudo:
4136  case ARM::VLD1d64TPseudo:
4137  case ARM::VLD1d64TPseudoWB_fixed:
4138  case ARM::VLD3d8Pseudo_UPD:
4139  case ARM::VLD3d16Pseudo_UPD:
4140  case ARM::VLD3d32Pseudo_UPD:
4141  case ARM::VLD3q8Pseudo_UPD:
4142  case ARM::VLD3q16Pseudo_UPD:
4143  case ARM::VLD3q32Pseudo_UPD:
4144  case ARM::VLD3q8oddPseudo:
4145  case ARM::VLD3q16oddPseudo:
4146  case ARM::VLD3q32oddPseudo:
4147  case ARM::VLD3q8oddPseudo_UPD:
4148  case ARM::VLD3q16oddPseudo_UPD:
4149  case ARM::VLD3q32oddPseudo_UPD:
4150  case ARM::VLD4d8Pseudo:
4151  case ARM::VLD4d16Pseudo:
4152  case ARM::VLD4d32Pseudo:
4153  case ARM::VLD1d64QPseudo:
4154  case ARM::VLD1d64QPseudoWB_fixed:
4155  case ARM::VLD4d8Pseudo_UPD:
4156  case ARM::VLD4d16Pseudo_UPD:
4157  case ARM::VLD4d32Pseudo_UPD:
4158  case ARM::VLD4q8Pseudo_UPD:
4159  case ARM::VLD4q16Pseudo_UPD:
4160  case ARM::VLD4q32Pseudo_UPD:
4161  case ARM::VLD4q8oddPseudo:
4162  case ARM::VLD4q16oddPseudo:
4163  case ARM::VLD4q32oddPseudo:
4164  case ARM::VLD4q8oddPseudo_UPD:
4165  case ARM::VLD4q16oddPseudo_UPD:
4166  case ARM::VLD4q32oddPseudo_UPD:
4167  case ARM::VLD1DUPq8:
4168  case ARM::VLD1DUPq16:
4169  case ARM::VLD1DUPq32:
4170  case ARM::VLD1DUPq8wb_fixed:
4171  case ARM::VLD1DUPq16wb_fixed:
4172  case ARM::VLD1DUPq32wb_fixed:
4173  case ARM::VLD1DUPq8wb_register:
4174  case ARM::VLD1DUPq16wb_register:
4175  case ARM::VLD1DUPq32wb_register:
4176  case ARM::VLD2DUPd8:
4177  case ARM::VLD2DUPd16:
4178  case ARM::VLD2DUPd32:
4179  case ARM::VLD2DUPd8wb_fixed:
4180  case ARM::VLD2DUPd16wb_fixed:
4181  case ARM::VLD2DUPd32wb_fixed:
4182  case ARM::VLD2DUPd8wb_register:
4183  case ARM::VLD2DUPd16wb_register:
4184  case ARM::VLD2DUPd32wb_register:
4185  case ARM::VLD4DUPd8Pseudo:
4186  case ARM::VLD4DUPd16Pseudo:
4187  case ARM::VLD4DUPd32Pseudo:
4188  case ARM::VLD4DUPd8Pseudo_UPD:
4189  case ARM::VLD4DUPd16Pseudo_UPD:
4190  case ARM::VLD4DUPd32Pseudo_UPD:
4191  case ARM::VLD1LNq8Pseudo:
4192  case ARM::VLD1LNq16Pseudo:
4193  case ARM::VLD1LNq32Pseudo:
4194  case ARM::VLD1LNq8Pseudo_UPD:
4195  case ARM::VLD1LNq16Pseudo_UPD:
4196  case ARM::VLD1LNq32Pseudo_UPD:
4197  case ARM::VLD2LNd8Pseudo:
4198  case ARM::VLD2LNd16Pseudo:
4199  case ARM::VLD2LNd32Pseudo:
4200  case ARM::VLD2LNq16Pseudo:
4201  case ARM::VLD2LNq32Pseudo:
4202  case ARM::VLD2LNd8Pseudo_UPD:
4203  case ARM::VLD2LNd16Pseudo_UPD:
4204  case ARM::VLD2LNd32Pseudo_UPD:
4205  case ARM::VLD2LNq16Pseudo_UPD:
4206  case ARM::VLD2LNq32Pseudo_UPD:
4207  case ARM::VLD4LNd8Pseudo:
4208  case ARM::VLD4LNd16Pseudo:
4209  case ARM::VLD4LNd32Pseudo:
4210  case ARM::VLD4LNq16Pseudo:
4211  case ARM::VLD4LNq32Pseudo:
4212  case ARM::VLD4LNd8Pseudo_UPD:
4213  case ARM::VLD4LNd16Pseudo_UPD:
4214  case ARM::VLD4LNd32Pseudo_UPD:
4215  case ARM::VLD4LNq16Pseudo_UPD:
4216  case ARM::VLD4LNq32Pseudo_UPD:
4217  // If the address is not 64-bit aligned, the latencies of these
4218  // instructions increases by one.
4219  ++Latency;
4220  break;
4221  }
4222 
4223  return Latency;
4224 }
4225 
4226 unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
4227  if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4228  MI.isImplicitDef())
4229  return 0;
4230 
4231  if (MI.isBundle())
4232  return 0;
4233 
4234  const MCInstrDesc &MCID = MI.getDesc();
4235 
4236  if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4237  !Subtarget.cheapPredicableCPSRDef())) {
4238  // When predicated, CPSR is an additional source operand for CPSR updating
4239  // instructions, this apparently increases their latencies.
4240  return 1;
4241  }
4242  return 0;
4243 }
4244 
4245 unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4246  const MachineInstr &MI,
4247  unsigned *PredCost) const {
4248  if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4249  MI.isImplicitDef())
4250  return 1;
4251 
4252  // An instruction scheduler typically runs on unbundled instructions, however
4253  // other passes may query the latency of a bundled instruction.
4254  if (MI.isBundle()) {
4255  unsigned Latency = 0;
4258  while (++I != E && I->isInsideBundle()) {
4259  if (I->getOpcode() != ARM::t2IT)
4260  Latency += getInstrLatency(ItinData, *I, PredCost);
4261  }
4262  return Latency;
4263  }
4264 
4265  const MCInstrDesc &MCID = MI.getDesc();
4266  if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4267  !Subtarget.cheapPredicableCPSRDef()))) {
4268  // When predicated, CPSR is an additional source operand for CPSR updating
4269  // instructions, this apparently increases their latencies.
4270  *PredCost = 1;
4271  }
4272  // Be sure to call getStageLatency for an empty itinerary in case it has a
4273  // valid MinLatency property.
4274  if (!ItinData)
4275  return MI.mayLoad() ? 3 : 1;
4276 
4277  unsigned Class = MCID.getSchedClass();
4278 
4279  // For instructions with variable uops, use uops as latency.
4280  if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
4281  return getNumMicroOps(ItinData, MI);
4282 
4283  // For the common case, fall back on the itinerary's latency.
4284  unsigned Latency = ItinData->getStageLatency(Class);
4285 
4286  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4287  unsigned DefAlign =
4288  MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlignment() : 0;
4289  int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
4290  if (Adj >= 0 || (int)Latency > -Adj) {
4291  return Latency + Adj;
4292  }
4293  return Latency;
4294 }
4295 
4296 int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4297  SDNode *Node) const {
4298  if (!Node->isMachineOpcode())
4299  return 1;
4300 
4301  if (!ItinData || ItinData->isEmpty())
4302  return 1;
4303 
4304  unsigned Opcode = Node->getMachineOpcode();
4305  switch (Opcode) {
4306  default:
4307  return ItinData->getStageLatency(get(Opcode).getSchedClass());
4308  case ARM::VLDMQIA:
4309  case ARM::VSTMQIA:
4310  return 2;
4311  }
4312 }
4313 
4314 bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
4315  const MachineRegisterInfo *MRI,
4316  const MachineInstr &DefMI,
4317  unsigned DefIdx,
4318  const MachineInstr &UseMI,
4319  unsigned UseIdx) const {
4320  unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4321  unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
4322  if (Subtarget.nonpipelinedVFP() &&
4323  (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
4324  return true;
4325 
4326  // Hoist VFP / NEON instructions with 4 or higher latency.
4327  unsigned Latency =
4328  SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);
4329  if (Latency <= 3)
4330  return false;
4331  return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
4332  UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
4333 }
4334 
4335 bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
4336  const MachineInstr &DefMI,
4337  unsigned DefIdx) const {
4338  const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
4339  if (!ItinData || ItinData->isEmpty())
4340  return false;
4341 
4342  unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4343  if (DDomain == ARMII::DomainGeneral) {
4344  unsigned DefClass = DefMI.getDesc().getSchedClass();
4345  int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
4346  return (DefCycle != -1 && DefCycle <= 2);
4347  }
4348  return false;
4349 }
4350 
4351 bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
4352  StringRef &ErrInfo) const {
4353  if (convertAddSubFlagsOpcode(MI.getOpcode())) {
4354  ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
4355  return false;
4356  }
4357  return true;
4358 }
4359 
4360 // LoadStackGuard has so far only been implemented for MachO. Different code
4361 // sequence is needed for other targets.
4363  unsigned LoadImmOpc,
4364  unsigned LoadOpc) const {
4365  assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
4366  "ROPI/RWPI not currently supported with stack guard");
4367 
4368  MachineBasicBlock &MBB = *MI->getParent();
4369  DebugLoc DL = MI->getDebugLoc();
4370  unsigned Reg = MI->getOperand(0).getReg();
4371  const GlobalValue *GV =
4372  cast<GlobalValue>((*MI->memoperands_begin())->getValue());
4373  MachineInstrBuilder MIB;
4374 
4375  BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4377 
4378  if (Subtarget.isGVIndirectSymbol(GV)) {
4379  MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4380  MIB.addReg(Reg, RegState::Kill).addImm(0);
4381  auto Flags = MachineMemOperand::MOLoad |
4384  MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
4385  MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4);
4386  MIB.addMemOperand(MMO).add(predOps(ARMCC::AL));
4387  }
4388 
4389  MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4390  MIB.addReg(Reg, RegState::Kill)
4391  .addImm(0)
4392  .setMemRefs(MI->memoperands_begin(), MI->memoperands_end())
4393  .add(predOps(ARMCC::AL));
4394 }
4395 
4396 bool
4397 ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
4398  unsigned &AddSubOpc,
4399  bool &NegAcc, bool &HasLane) const {
4400  DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
4401  if (I == MLxEntryMap.end())
4402  return false;
4403 
4404  const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
4405  MulOpc = Entry.MulOpc;
4406  AddSubOpc = Entry.AddSubOpc;
4407  NegAcc = Entry.NegAcc;
4408  HasLane = Entry.HasLane;
4409  return true;
4410 }
4411 
4412 //===----------------------------------------------------------------------===//
4413 // Execution domains.
4414 //===----------------------------------------------------------------------===//
4415 //
4416 // Some instructions go down the NEON pipeline, some go down the VFP pipeline,
4417 // and some can go down both. The vmov instructions go down the VFP pipeline,
4418 // but they can be changed to vorr equivalents that are executed by the NEON
4419 // pipeline.
4420 //
4421 // We use the following execution domain numbering:
4422 //
4425  ExeVFP = 1,
4427 };
4428 
4429 //
4430 // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
4431 //
4432 std::pair<uint16_t, uint16_t>
4434  // If we don't have access to NEON instructions then we won't be able
4435  // to swizzle anything to the NEON domain. Check to make sure.
4436  if (Subtarget.hasNEON()) {
4437  // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
4438  // if they are not predicated.
4439  if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI))
4440  return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4441 
4442  // CortexA9 is particularly picky about mixing the two and wants these
4443  // converted.
4444  if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) &&
4445  (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR ||
4446  MI.getOpcode() == ARM::VMOVS))
4447  return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4448  }
4449  // No other instructions can be swizzled, so just determine their domain.
4450  unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;
4451 
4452  if (Domain & ARMII::DomainNEON)
4453  return std::make_pair(ExeNEON, 0);
4454 
4455  // Certain instructions can go either way on Cortex-A8.
4456  // Treat them as NEON instructions.
4457  if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
4458  return std::make_pair(ExeNEON, 0);
4459 
4460  if (Domain & ARMII::DomainVFP)
4461  return std::make_pair(ExeVFP, 0);
4462 
4463  return std::make_pair(ExeGeneric, 0);
4464 }
4465 
4467  unsigned SReg, unsigned &Lane) {
4468  unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
4469  Lane = 0;
4470 
4471  if (DReg != ARM::NoRegister)
4472  return DReg;
4473 
4474  Lane = 1;
4475  DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
4476 
4477  assert(DReg && "S-register with no D super-register?");
4478  return DReg;
4479 }
4480 
4481 /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
4482 /// set ImplicitSReg to a register number that must be marked as implicit-use or
4483 /// zero if no register needs to be defined as implicit-use.
4484 ///
4485 /// If the function cannot determine if an SPR should be marked implicit use or
4486 /// not, it returns false.
4487 ///
4488 /// This function handles cases where an instruction is being modified from taking
4489 /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
4490 /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
4491 /// lane of the DPR).
4492 ///
4493 /// If the other SPR is defined, an implicit-use of it should be added. Else,
4494 /// (including the case where the DPR itself is defined), it should not.
4495 ///
4497  MachineInstr &MI, unsigned DReg,
4498  unsigned Lane, unsigned &ImplicitSReg) {
4499  // If the DPR is defined or used already, the other SPR lane will be chained
4500  // correctly, so there is nothing to be done.
4501  if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) {
4502  ImplicitSReg = 0;
4503  return true;
4504  }
4505 
4506  // Otherwise we need to go searching to see if the SPR is set explicitly.
4507  ImplicitSReg = TRI->getSubReg(DReg,
4508  (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
4510  MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
4511 
4512  if (LQR == MachineBasicBlock::LQR_Live)
4513  return true;
4514  else if (LQR == MachineBasicBlock::LQR_Unknown)
4515  return false;
4516 
4517  // If the register is known not to be live, there is no need to add an
4518  // implicit-use.
4519  ImplicitSReg = 0;
4520  return true;
4521 }
4522 
4524  unsigned Domain) const {
4525  unsigned DstReg, SrcReg, DReg;
4526  unsigned Lane;
4527  MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
4528  const TargetRegisterInfo *TRI = &getRegisterInfo();
4529  switch (MI.getOpcode()) {
4530  default:
4531  llvm_unreachable("cannot handle opcode!");
4532  break;
4533  case ARM::VMOVD:
4534  if (Domain != ExeNEON)
4535  break;
4536 
4537  // Zap the predicate operands.
4538  assert(!isPredicated(MI) && "Cannot predicate a VORRd");
4539 
4540  // Make sure we've got NEON instructions.
4541  assert(Subtarget.hasNEON() && "VORRd requires NEON");
4542 
4543  // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
4544  DstReg = MI.getOperand(0).getReg();
4545  SrcReg = MI.getOperand(1).getReg();
4546 
4547  for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4548  MI.RemoveOperand(i - 1);
4549 
4550  // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
4551  MI.setDesc(get(ARM::VORRd));
4552  MIB.addReg(DstReg, RegState::Define)
4553  .addReg(SrcReg)
4554  .addReg(SrcReg)
4555  .add(predOps(ARMCC::AL));
4556  break;
4557  case ARM::VMOVRS:
4558  if (Domain != ExeNEON)
4559  break;
4560  assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
4561 
4562  // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
4563  DstReg = MI.getOperand(0).getReg();
4564  SrcReg = MI.getOperand(1).getReg();
4565 
4566  for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4567  MI.RemoveOperand(i - 1);
4568 
4569  DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
4570 
4571  // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
4572  // Note that DSrc has been widened and the other lane may be undef, which
4573  // contaminates the entire register.
4574  MI.setDesc(get(ARM::VGETLNi32));
4575  MIB.addReg(DstReg, RegState::Define)
4576  .addReg(DReg, RegState::Undef)
4577  .addImm(Lane)
4578  .add(predOps(ARMCC::AL));
4579 
4580  // The old source should be an implicit use, otherwise we might think it
4581  // was dead before here.
4582  MIB.addReg(SrcReg, RegState::Implicit);
4583  break;
4584  case ARM::VMOVSR: {
4585  if (Domain != ExeNEON)
4586  break;
4587  assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
4588 
4589  // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
4590  DstReg = MI.getOperand(0).getReg();
4591  SrcReg = MI.getOperand(1).getReg();
4592 
4593  DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
4594 
4595  unsigned ImplicitSReg;
4596  if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
4597  break;
4598 
4599  for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4600  MI.RemoveOperand(i - 1);
4601 
4602  // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
4603  // Again DDst may be undefined at the beginning of this instruction.
4604  MI.setDesc(get(ARM::VSETLNi32));
4605  MIB.addReg(DReg, RegState::Define)
4606  .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI)))
4607  .addReg(SrcReg)
4608  .addImm(Lane)
4609  .add(predOps(ARMCC::AL));
4610 
4611  // The narrower destination must be marked as set to keep previous chains
4612  // in place.
4613  MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
4614  if (ImplicitSReg != 0)
4615  MIB.addReg(ImplicitSReg, RegState::Implicit);
4616  break;
4617  }
4618  case ARM::VMOVS: {
4619  if (Domain != ExeNEON)
4620  break;
4621 
4622  // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
4623  DstReg = MI.getOperand(0).getReg();
4624  SrcReg = MI.getOperand(1).getReg();
4625 
4626  unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
4627  DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
4628  DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
4629 
4630  unsigned ImplicitSReg;
4631  if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
4632  break;
4633 
4634  for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4635  MI.RemoveOperand(i - 1);
4636 
4637  if (DSrc == DDst) {
4638  // Destination can be:
4639  // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
4640  MI.setDesc(get(ARM::VDUPLN32d));
4641  MIB.addReg(DDst, RegState::Define)
4642  .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI)))
4643  .addImm(SrcLane)
4644  .add(predOps(ARMCC::AL));
4645 
4646  // Neither the source or the destination are naturally represented any
4647  // more, so add them in manually.
4648  MIB.addReg(DstReg, RegState::Implicit | RegState::Define);
4649  MIB.addReg(SrcReg, RegState::Implicit);
4650  if (ImplicitSReg != 0)
4651  MIB.addReg(ImplicitSReg, RegState::Implicit);
4652  break;
4653  }
4654 
4655  // In general there's no single instruction that can perform an S <-> S
4656  // move in NEON space, but a pair of VEXT instructions *can* do the
4657  // job. It turns out that the VEXTs needed will only use DSrc once, with
4658  // the position based purely on the combination of lane-0 and lane-1
4659  // involved. For example
4660  // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
4661  // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
4662  // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
4663  // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
4664  //
4665  // Pattern of the MachineInstrs is:
4666  // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
4667  MachineInstrBuilder NewMIB;
4668  NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32),
4669  DDst);
4670 
4671  // On the first instruction, both DSrc and DDst may be <undef> if present.
4672  // Specifically when the original instruction didn't have them as an
4673  // <imp-use>.
4674  unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
4675  bool CurUndef = !MI.readsRegister(CurReg, TRI);
4676  NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
4677 
4678  CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
4679  CurUndef = !MI.readsRegister(CurReg, TRI);
4680  NewMIB.addReg(CurReg, getUndefRegState(CurUndef))
4681  .addImm(1)
4682  .add(predOps(ARMCC::AL));
4683 
4684  if (SrcLane == DstLane)
4685  NewMIB.addReg(SrcReg, RegState::Implicit);
4686 
4687  MI.setDesc(get(ARM::VEXTd32));
4688  MIB.addReg(DDst, RegState::Define);
4689 
4690  // On the second instruction, DDst has definitely been defined above, so
4691  // it is not <undef>. DSrc, if present, can be <undef> as above.
4692  CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
4693  CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
4694  MIB.addReg(CurReg, getUndefRegState(CurUndef));
4695 
4696  CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
4697  CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
4698  MIB.addReg(CurReg, getUndefRegState(CurUndef))
4699  .addImm(1)
4700  .add(predOps(ARMCC::AL));
4701 
4702  if (SrcLane != DstLane)
4703  MIB.addReg(SrcReg, RegState::Implicit);
4704 
4705  // As before, the original destination is no longer represented, add it
4706  // implicitly.
4707  MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
4708  if (ImplicitSReg != 0)
4709  MIB.addReg(ImplicitSReg, RegState::Implicit);
4710  break;
4711  }
4712  }
4713 }
4714 
4715 //===----------------------------------------------------------------------===//
4716 // Partial register updates
4717 //===----------------------------------------------------------------------===//
4718 //
4719 // Swift renames NEON registers with 64-bit granularity. That means any
4720 // instruction writing an S-reg implicitly reads the containing D-reg. The
4721 // problem is mostly avoided by translating f32 operations to v2f32 operations
4722 // on D-registers, but f32 loads are still a problem.
4723 //
4724 // These instructions can load an f32 into a NEON register:
4725 //
4726 // VLDRS - Only writes S, partial D update.
4727 // VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
4728 // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
4729 //
4730 // FCONSTD can be used as a dependency-breaking instruction.
4732  const MachineInstr &MI, unsigned OpNum,
4733  const TargetRegisterInfo *TRI) const {
4734  auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance();
4735  if (!PartialUpdateClearance)
4736  return 0;
4737 
4738  assert(TRI && "Need TRI instance");
4739 
4740  const MachineOperand &MO = MI.getOperand(OpNum);
4741  if (MO.readsReg())
4742  return 0;
4743  unsigned Reg = MO.getReg();
4744  int UseOp = -1;
4745 
4746  switch (MI.getOpcode()) {
4747  // Normal instructions writing only an S-register.
4748  case ARM::VLDRS:
4749  case ARM::FCONSTS:
4750  case ARM::VMOVSR:
4751  case ARM::VMOVv8i8:
4752  case ARM::VMOVv4i16:
4753  case ARM::VMOVv2i32:
4754  case ARM::VMOVv2f32:
4755  case ARM::VMOVv1i64:
4756  UseOp = MI.findRegisterUseOperandIdx(Reg, false, TRI);
4757  break;
4758 
4759  // Explicitly reads the dependency.
4760  case ARM::VLD1LNd32:
4761  UseOp = 3;
4762  break;
4763  default:
4764  return 0;
4765  }
4766 
4767  // If this instruction actually reads a value from Reg, there is no unwanted
4768  // dependency.
4769  if (UseOp != -1 && MI.getOperand(UseOp).readsReg())
4770  return 0;
4771 
4772  // We must be able to clobber the whole D-reg.
4774  // Virtual register must be a foo:ssub_0<def,undef> operand.
4775  if (!MO.getSubReg() || MI.readsVirtualRegister(Reg))
4776  return 0;
4777  } else if (ARM::SPRRegClass.contains(Reg)) {
4778  // Physical register: MI must define the full D-reg.
4779  unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
4780  &ARM::DPRRegClass);
4781  if (!DReg || !MI.definesRegister(DReg, TRI))
4782  return 0;
4783  }
4784 
4785  // MI has an unwanted D-register dependency.
4786  // Avoid defs in the previous N instructrions.
4787  return PartialUpdateClearance;
4788 }
4789 
4790 // Break a partial register dependency after getPartialRegUpdateClearance
4791 // returned non-zero.
4793  MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
4794  assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def");
4795  assert(TRI && "Need TRI instance");
4796 
4797  const MachineOperand &MO = MI.getOperand(OpNum);
4798  unsigned Reg = MO.getReg();
4800  "Can't break virtual register dependencies.");
4801  unsigned DReg = Reg;
4802 
4803  // If MI defines an S-reg, find the corresponding D super-register.
4804  if (ARM::SPRRegClass.contains(Reg)) {
4805  DReg = ARM::D0 + (Reg - ARM::S0) / 2;
4806  assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
4807  }
4808 
4809  assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
4810  assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
4811 
4812  // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
4813  // the full D-register by loading the same value to both lanes. The
4814  // instruction is micro-coded with 2 uops, so don't do this until we can
4815  // properly schedule micro-coded instructions. The dispatcher stalls cause
4816  // too big regressions.
4817 
4818  // Insert the dependency-breaking FCONSTD before MI.
4819  // 96 is the encoding of 0.5, but the actual value doesn't matter here.
4820  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
4821  .addImm(96)
4822  .add(predOps(ARMCC::AL));
4823  MI.addRegisterKilled(DReg, TRI, true);
4824 }
4825 
4827  return Subtarget.getFeatureBits()[ARM::HasV6KOps];
4828 }
4829 
4831  if (MI->getNumOperands() < 4)
4832  return true;
4833  unsigned ShOpVal = MI->getOperand(3).getImm();
4834  unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
4835  // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
4836  if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
4837  ((ShImm == 1 || ShImm == 2) &&
4838  ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
4839  return true;
4840 
4841  return false;
4842 }
4843 
4845  const MachineInstr &MI, unsigned DefIdx,
4846  SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
4847  assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
4848  assert(MI.isRegSequenceLike() && "Invalid kind of instruction");
4849 
4850  switch (MI.getOpcode()) {
4851  case ARM::VMOVDRR:
4852  // dX = VMOVDRR rY, rZ
4853  // is the same as:
4854  // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
4855  // Populate the InputRegs accordingly.
4856  // rY
4857  const MachineOperand *MOReg = &MI.getOperand(1);
4858  InputRegs.push_back(
4859  RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_0));
4860  // rZ
4861  MOReg = &MI.getOperand(2);
4862  InputRegs.push_back(
4863  RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_1));
4864  return true;
4865  }
4866  llvm_unreachable("Target dependent opcode missing");
4867 }
4868 
4870  const MachineInstr &MI, unsigned DefIdx,
4871  RegSubRegPairAndIdx &InputReg) const {
4872  assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
4873  assert(MI.isExtractSubregLike() && "Invalid kind of instruction");
4874 
4875  switch (MI.getOpcode()) {
4876  case ARM::VMOVRRD:
4877  // rX, rY = VMOVRRD dZ
4878  // is the same as:
4879  // rX = EXTRACT_SUBREG dZ, ssub_0
4880  // rY = EXTRACT_SUBREG dZ, ssub_1
4881  const MachineOperand &MOReg = MI.getOperand(2);
4882  InputReg.Reg = MOReg.getReg();
4883  InputReg.SubReg = MOReg.getSubReg();
4884  InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
4885  return true;
4886  }
4887  llvm_unreachable("Target dependent opcode missing");
4888 }
4889 
4891  const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
4892  RegSubRegPairAndIdx &InsertedReg) const {
4893  assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
4894  assert(MI.isInsertSubregLike() && "Invalid kind of instruction");
4895 
4896  switch (MI.getOpcode()) {
4897  case ARM::VSETLNi32:
4898  // dX = VSETLNi32 dY, rZ, imm
4899  const MachineOperand &MOBaseReg = MI.getOperand(1);
4900  const MachineOperand &MOInsertedReg = MI.getOperand(2);
4901  const MachineOperand &MOIndex = MI.getOperand(3);
4902  BaseReg.Reg = MOBaseReg.getReg();
4903  BaseReg.SubReg = MOBaseReg.getSubReg();
4904 
4905  InsertedReg.Reg = MOInsertedReg.getReg();
4906  InsertedReg.SubReg = MOInsertedReg.getSubReg();
4907  InsertedReg.SubIdx = MOIndex.getImm() == 0 ? ARM::ssub_0 : ARM::ssub_1;
4908  return true;
4909  }
4910  llvm_unreachable("Target dependent opcode missing");
4911 }
bool isLdstSoMinusReg(const MachineInstr &MI, unsigned Op) const
MachineConstantPoolValue * MachineCPVal
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const override
bool getInsertSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const override
Build the equivalent inputs of a INSERT_SUBREG for the given MI and DefIdx.
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
Definition: MachineInstr.h:965
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
bool checkVLDnAccessAlignment() const
Definition: ARMSubtarget.h:576
BranchProbability getCompl() const
int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate...
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:458
instr_iterator instr_end()
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
unsigned getT2SOImmTwoPartFirst(unsigned Imm)
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isThumb() const
Definition: ARMSubtarget.h:672
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
bool DefinesPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred) const override
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, unsigned Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been <def>ined and not <kill>ed as of just before Before...
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
unsigned getRegister(unsigned i) const
Return the specified register in the class.
bool isExtractSubregLike(QueryType Type=IgnoreBundle) const
Return true if this instruction behaves the same way as the generic EXTRACT_SUBREG instructions...
Definition: MachineInstr.h:601
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:268
ARMConstantPoolValue - ARM specific constantpool value.
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
bool expandPostRAPseudo(MachineInstr &MI) const override
void setIsUndef(bool Val=true)
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, int CmpMask, bool CommonUse)
isSuitableForMask - Identify a suitable &#39;and&#39; instruction that operates on the given source register ...
bool isPredicated(const MachineInstr &MI) const override
virtual MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const
Clones instruction or the whole instruction bundle Orig and insert into MBB before InsertBefore...
unsigned getSubReg() const
bool isPredicable(QueryType Type=AllInBundle) const
Return true if this instruction has a predicate operand that controls execution.
Definition: MachineInstr.h:512
bool isV8EligibleForIT(const InstrType *Instr)
Definition: ARMFeatures.h:25
static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, unsigned SReg, unsigned &Lane)
bool isRegSequence() const
Definition: MachineInstr.h:849
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:262
bool isSOImmTwoPartVal(unsigned V)
isSOImmTwoPartVal - Return true if the specified value can be obtained by or&#39;ing together two SOImmVa...
VarInfo - This represents the regions where a virtual register is live in the program.
Definition: LiveVariables.h:79
A debug info location.
Definition: DebugLoc.h:34
void setIsDead(bool Val=true)
unsigned getPartialRegUpdateClearance(const MachineInstr &, unsigned, const TargetRegisterInfo *) const override
setjmp/longjmp based exceptions
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
bool isLdstScaledRegNotPlusLsl2(const MachineInstr &MI, unsigned Op) const
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
bool mayLoad() const
Return true if this instruction could possibly read memory.
Definition: MCInstrDesc.h:387
unsigned getSOImmTwoPartSecond(unsigned V)
getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal, return the second chunk of ...
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:332
bool isCopyLike() const
Return true if the instruction behaves like a copy.
Definition: MachineInstr.h:871
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
static bool isThumb(const MCSubtargetInfo &STI)
union llvm::MachineConstantPoolEntry::@140 Val
The constant itself.
unsigned getSpillSize(const TargetRegisterClass &RC) const
Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...
static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1)
return AArch64::GPR64RegClass contains(Reg)
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
bool removeKill(MachineInstr &MI)
removeKill - Delete a kill corresponding to the specified machine instruction.
Definition: LiveVariables.h:94
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
void clearKillInfo()
Clears kill flags on all operands.
static bool isCPSRDefined(const MachineInstr &MI)
Can load/store 1 register/cycle.
Definition: ARMSubtarget.h:123
static uint32_t getAlignment(const MCSectionCOFF &Sec)
bool isFpMLxInstruction(unsigned Opcode) const
isFpMLxInstruction - Return true if the specified opcode is a fp MLA / MLS instruction.
A description of a memory reference used in the backend.
bool isMachineConstantPoolEntry() const
isMachineConstantPoolEntry - Return true if the MachineConstantPoolEntry is indeed a target specific ...
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
ShiftOpc getAM2ShiftOpc(unsigned AM2Opc)
unsigned getSOImmValRotate(unsigned Imm)
getSOImmValRotate - Try to handle Imm with an immediate shifter operand, computing the rotate amount ...
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false)
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:293
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, unsigned &SrcReg2, int &CmpMask, int &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2 if h...
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void setImplicit(bool Val=true)
static bool isLoad(int Opcode)
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int &Offset, const ARMBaseInstrInfo &TII)
rewriteARMFrameIndex / rewriteT2FrameIndex - Rewrite MI to access &#39;Offset&#39; bytes from the FP...
struct llvm::MachineOperand::@143::@145 Reg
Reg
All possible values of the reg field in the ModR/M byte.
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:474
This file contains the simple types necessary to represent the attributes associated with functions a...
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
const MachineInstrBuilder & AddDReg(MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI) const
void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override
bool isAm2ScaledReg(const MachineInstr &MI, unsigned Op) const
The memory access is dereferenceable (i.e., doesn&#39;t trap).
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:290
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override
const InstrItineraryData * getInstrItineraries() const
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override
shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to determine (in conjunction w...
const char * getSymbolName() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
bool analyzeSelect(const MachineInstr &MI, SmallVectorImpl< MachineOperand > &Cond, unsigned &TrueOp, unsigned &FalseOp, bool &Optimizable) const override
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:634
bool hasVFP2() const
Definition: ARMSubtarget.h:529
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
defusechain_iterator - This class provides iterator support for machine operands in the function that...
bool PredicateInstruction(MachineInstr &MI, ArrayRef< MachineOperand > Pred) const override
void RemoveOperand(unsigned i)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg, MachineRegisterInfo *MRI) const override
FoldImmediate - &#39;Reg&#39; is known to be defined by a move immediate instruction, try to fold the immedia...
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:287
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
bool isBundle() const
Definition: MachineInstr.h:853
static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC)
getSwappedCondition - assume the flags are set by MI(a,b), return the condition code if we modify the...
unsigned char getAM3Offset(unsigned AM3Opc)
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override
Commutes the operands in the given instruction.
Can load/store 2 registers/cycle, but needs an extra cycle if the access is not 64-bit aligned...
Definition: ARMSubtarget.h:121
unsigned getNumMicroOps(const InstrItineraryData *ItinData, const MachineInstr &MI) const override
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
Itinerary data supplied by a subtarget to be used by a target.
iterator getLastNonDebugInstr()
Returns an iterator to the last non-debug instruction in the basic block, or end().
const ARMSubtarget & getSubtarget() const
int getNumMicroOps(unsigned ItinClassIndx) const
Return the number of micro-ops that the given class decodes to.
This class is a data container for one entry in a MachineConstantPool.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:916
static const ARM_MLxEntry ARM_MLxTable[]
unsigned getMatchingCondBranchOpcode(unsigned Opc)
ARM_MLxEntry - Record information about MLA / MLS instructions.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
unsigned getUndefRegState(bool B)
virtual const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const =0
Return a null-terminated list of all of the callee-saved registers on this target.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getStageLatency(unsigned ItinClassIndx) const
Return the total stage latency of the given class.
Expected< const typename ELFT::Sym * > getSymbol(typename ELFT::SymRange Symbols, uint32_t Index)
Definition: ELF.h:249
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
reverse_iterator rbegin()
bool isBranch(QueryType Type=AnyInBundle) const
Ret