LLVM  7.0.0svn
ARMBaseInstrInfo.cpp
Go to the documentation of this file.
1 //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the Base ARM implementation of the TargetInstrInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMBaseRegisterInfo.h"
16 #include "ARMConstantPoolValue.h"
17 #include "ARMFeatures.h"
18 #include "ARMHazardRecognizer.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/Triple.h"
43 #include "llvm/IR/Attributes.h"
44 #include "llvm/IR/Constants.h"
45 #include "llvm/IR/DebugLoc.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/GlobalValue.h"
48 #include "llvm/MC/MCAsmInfo.h"
49 #include "llvm/MC/MCInstrDesc.h"
52 #include "llvm/Support/Casting.h"
54 #include "llvm/Support/Compiler.h"
55 #include "llvm/Support/Debug.h"
59 #include <algorithm>
60 #include <cassert>
61 #include <cstdint>
62 #include <iterator>
63 #include <new>
64 #include <utility>
65 #include <vector>
66 
67 using namespace llvm;
68 
69 #define DEBUG_TYPE "arm-instrinfo"
70 
71 #define GET_INSTRINFO_CTOR_DTOR
72 #include "ARMGenInstrInfo.inc"
73 
74 static cl::opt<bool>
75 EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
76  cl::desc("Enable ARM 2-addr to 3-addr conv"));
77 
78 /// ARM_MLxEntry - Record information about MLA / MLS instructions.
79 struct ARM_MLxEntry {
80  uint16_t MLxOpc; // MLA / MLS opcode
81  uint16_t MulOpc; // Expanded multiplication opcode
82  uint16_t AddSubOpc; // Expanded add / sub opcode
83  bool NegAcc; // True if the acc is negated before the add / sub.
84  bool HasLane; // True if instruction has an extra "lane" operand.
85 };
86 
87 static const ARM_MLxEntry ARM_MLxTable[] = {
88  // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
89  // fp scalar ops
90  { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
91  { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
92  { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
93  { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
94  { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
95  { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
96  { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
97  { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
98 
99  // fp SIMD ops
100  { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
101  { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
102  { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
103  { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
104  { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
105  { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
106  { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
107  { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
108 };
109 
111  : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
112  Subtarget(STI) {
113  for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
114  if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
115  llvm_unreachable("Duplicated entries?");
116  MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
117  MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
118  }
119 }
120 
121 // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
122 // currently defaults to no prepass hazard recognizer.
125  const ScheduleDAG *DAG) const {
126  if (usePreRAHazardRecognizer()) {
127  const InstrItineraryData *II =
128  static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
129  return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
130  }
132 }
133 
136  const ScheduleDAG *DAG) const {
137  if (Subtarget.isThumb2() || Subtarget.hasVFP2())
138  return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG);
140 }
141 
144  // FIXME: Thumb2 support.
145 
146  if (!EnableARM3Addr)
147  return nullptr;
148 
149  MachineFunction &MF = *MI.getParent()->getParent();
150  uint64_t TSFlags = MI.getDesc().TSFlags;
151  bool isPre = false;
152  switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
153  default: return nullptr;
154  case ARMII::IndexModePre:
155  isPre = true;
156  break;
158  break;
159  }
160 
161  // Try splitting an indexed load/store to an un-indexed one plus an add/sub
162  // operation.
163  unsigned MemOpc = getUnindexedOpcode(MI.getOpcode());
164  if (MemOpc == 0)
165  return nullptr;
166 
167  MachineInstr *UpdateMI = nullptr;
168  MachineInstr *MemMI = nullptr;
169  unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
170  const MCInstrDesc &MCID = MI.getDesc();
171  unsigned NumOps = MCID.getNumOperands();
172  bool isLoad = !MI.mayStore();
173  const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0);
174  const MachineOperand &Base = MI.getOperand(2);
175  const MachineOperand &Offset = MI.getOperand(NumOps - 3);
176  unsigned WBReg = WB.getReg();
177  unsigned BaseReg = Base.getReg();
178  unsigned OffReg = Offset.getReg();
179  unsigned OffImm = MI.getOperand(NumOps - 2).getImm();
180  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm();
181  switch (AddrMode) {
182  default: llvm_unreachable("Unknown indexed op!");
183  case ARMII::AddrMode2: {
184  bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
185  unsigned Amt = ARM_AM::getAM2Offset(OffImm);
186  if (OffReg == 0) {
187  if (ARM_AM::getSOImmVal(Amt) == -1)
188  // Can't encode it in a so_imm operand. This transformation will
189  // add more than 1 instruction. Abandon!
190  return nullptr;
191  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
192  get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
193  .addReg(BaseReg)
194  .addImm(Amt)
195  .add(predOps(Pred))
196  .add(condCodeOp());
197  } else if (Amt != 0) {
199  unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
200  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
201  get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
202  .addReg(BaseReg)
203  .addReg(OffReg)
204  .addReg(0)
205  .addImm(SOOpc)
206  .add(predOps(Pred))
207  .add(condCodeOp());
208  } else
209  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
210  get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
211  .addReg(BaseReg)
212  .addReg(OffReg)
213  .add(predOps(Pred))
214  .add(condCodeOp());
215  break;
216  }
217  case ARMII::AddrMode3 : {
218  bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
219  unsigned Amt = ARM_AM::getAM3Offset(OffImm);
220  if (OffReg == 0)
221  // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
222  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
223  get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
224  .addReg(BaseReg)
225  .addImm(Amt)
226  .add(predOps(Pred))
227  .add(condCodeOp());
228  else
229  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
230  get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
231  .addReg(BaseReg)
232  .addReg(OffReg)
233  .add(predOps(Pred))
234  .add(condCodeOp());
235  break;
236  }
237  }
238 
239  std::vector<MachineInstr*> NewMIs;
240  if (isPre) {
241  if (isLoad)
242  MemMI =
243  BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
244  .addReg(WBReg)
245  .addImm(0)
246  .addImm(Pred);
247  else
248  MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
249  .addReg(MI.getOperand(1).getReg())
250  .addReg(WBReg)
251  .addReg(0)
252  .addImm(0)
253  .addImm(Pred);
254  NewMIs.push_back(MemMI);
255  NewMIs.push_back(UpdateMI);
256  } else {
257  if (isLoad)
258  MemMI =
259  BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
260  .addReg(BaseReg)
261  .addImm(0)
262  .addImm(Pred);
263  else
264  MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
265  .addReg(MI.getOperand(1).getReg())
266  .addReg(BaseReg)
267  .addReg(0)
268  .addImm(0)
269  .addImm(Pred);
270  if (WB.isDead())
271  UpdateMI->getOperand(0).setIsDead();
272  NewMIs.push_back(UpdateMI);
273  NewMIs.push_back(MemMI);
274  }
275 
276  // Transfer LiveVariables states, kill / dead info.
277  if (LV) {
278  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
279  MachineOperand &MO = MI.getOperand(i);
281  unsigned Reg = MO.getReg();
282 
284  if (MO.isDef()) {
285  MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
286  if (MO.isDead())
287  LV->addVirtualRegisterDead(Reg, *NewMI);
288  }
289  if (MO.isUse() && MO.isKill()) {
290  for (unsigned j = 0; j < 2; ++j) {
291  // Look at the two new MI's in reverse order.
292  MachineInstr *NewMI = NewMIs[j];
293  if (!NewMI->readsRegister(Reg))
294  continue;
295  LV->addVirtualRegisterKilled(Reg, *NewMI);
296  if (VI.removeKill(MI))
297  VI.Kills.push_back(NewMI);
298  break;
299  }
300  }
301  }
302  }
303  }
304 
306  MFI->insert(MBBI, NewMIs[1]);
307  MFI->insert(MBBI, NewMIs[0]);
308  return NewMIs[0];
309 }
310 
311 // Branch analysis.
313  MachineBasicBlock *&TBB,
314  MachineBasicBlock *&FBB,
316  bool AllowModify) const {
317  TBB = nullptr;
318  FBB = nullptr;
319 
321  if (I == MBB.begin())
322  return false; // Empty blocks are easy.
323  --I;
324 
325  // Walk backwards from the end of the basic block until the branch is
326  // analyzed or we give up.
327  while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
328  // Flag to be raised on unanalyzeable instructions. This is useful in cases
329  // where we want to clean up on the end of the basic block before we bail
330  // out.
331  bool CantAnalyze = false;
332 
333  // Skip over DEBUG values and predicated nonterminators.
334  while (I->isDebugInstr() || !I->isTerminator()) {
335  if (I == MBB.begin())
336  return false;
337  --I;
338  }
339 
340  if (isIndirectBranchOpcode(I->getOpcode()) ||
341  isJumpTableBranchOpcode(I->getOpcode())) {
342  // Indirect branches and jump tables can't be analyzed, but we still want
343  // to clean up any instructions at the tail of the basic block.
344  CantAnalyze = true;
345  } else if (isUncondBranchOpcode(I->getOpcode())) {
346  TBB = I->getOperand(0).getMBB();
347  } else if (isCondBranchOpcode(I->getOpcode())) {
348  // Bail out if we encounter multiple conditional branches.
349  if (!Cond.empty())
350  return true;
351 
352  assert(!FBB && "FBB should have been null.");
353  FBB = TBB;
354  TBB = I->getOperand(0).getMBB();
355  Cond.push_back(I->getOperand(1));
356  Cond.push_back(I->getOperand(2));
357  } else if (I->isReturn()) {
358  // Returns can't be analyzed, but we should run cleanup.
359  CantAnalyze = !isPredicated(*I);
360  } else {
361  // We encountered other unrecognized terminator. Bail out immediately.
362  return true;
363  }
364 
365  // Cleanup code - to be run for unpredicated unconditional branches and
366  // returns.
367  if (!isPredicated(*I) &&
368  (isUncondBranchOpcode(I->getOpcode()) ||
369  isIndirectBranchOpcode(I->getOpcode()) ||
370  isJumpTableBranchOpcode(I->getOpcode()) ||
371  I->isReturn())) {
372  // Forget any previous condition branch information - it no longer applies.
373  Cond.clear();
374  FBB = nullptr;
375 
376  // If we can modify the function, delete everything below this
377  // unconditional branch.
378  if (AllowModify) {
379  MachineBasicBlock::iterator DI = std::next(I);
380  while (DI != MBB.end()) {
381  MachineInstr &InstToDelete = *DI;
382  ++DI;
383  InstToDelete.eraseFromParent();
384  }
385  }
386  }
387 
388  if (CantAnalyze)
389  return true;
390 
391  if (I == MBB.begin())
392  return false;
393 
394  --I;
395  }
396 
397  // We made it past the terminators without bailing out - we must have
398  // analyzed this branch successfully.
399  return false;
400 }
401 
403  int *BytesRemoved) const {
404  assert(!BytesRemoved && "code size not handled");
405 
407  if (I == MBB.end())
408  return 0;
409 
410  if (!isUncondBranchOpcode(I->getOpcode()) &&
411  !isCondBranchOpcode(I->getOpcode()))
412  return 0;
413 
414  // Remove the branch.
415  I->eraseFromParent();
416 
417  I = MBB.end();
418 
419  if (I == MBB.begin()) return 1;
420  --I;
421  if (!isCondBranchOpcode(I->getOpcode()))
422  return 1;
423 
424  // Remove the branch.
425  I->eraseFromParent();
426  return 2;
427 }
428 
430  MachineBasicBlock *TBB,
431  MachineBasicBlock *FBB,
433  const DebugLoc &DL,
434  int *BytesAdded) const {
435  assert(!BytesAdded && "code size not handled");
437  int BOpc = !AFI->isThumbFunction()
438  ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
439  int BccOpc = !AFI->isThumbFunction()
440  ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
441  bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
442 
443  // Shouldn't be a fall through.
444  assert(TBB && "insertBranch must not be told to insert a fallthrough");
445  assert((Cond.size() == 2 || Cond.size() == 0) &&
446  "ARM branch conditions have two components!");
447 
448  // For conditional branches, we use addOperand to preserve CPSR flags.
449 
450  if (!FBB) {
451  if (Cond.empty()) { // Unconditional branch?
452  if (isThumb)
453  BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL));
454  else
455  BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
456  } else
457  BuildMI(&MBB, DL, get(BccOpc))
458  .addMBB(TBB)
459  .addImm(Cond[0].getImm())
460  .add(Cond[1]);
461  return 1;
462  }
463 
464  // Two-way conditional branch.
465  BuildMI(&MBB, DL, get(BccOpc))
466  .addMBB(TBB)
467  .addImm(Cond[0].getImm())
468  .add(Cond[1]);
469  if (isThumb)
470  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
471  else
472  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
473  return 2;
474 }
475 
478  ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
479  Cond[0].setImm(ARMCC::getOppositeCondition(CC));
480  return false;
481 }
482 
484  if (MI.isBundle()) {
487  while (++I != E && I->isInsideBundle()) {
488  int PIdx = I->findFirstPredOperandIdx();
489  if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
490  return true;
491  }
492  return false;
493  }
494 
495  int PIdx = MI.findFirstPredOperandIdx();
496  return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
497 }
498 
501  unsigned Opc = MI.getOpcode();
502  if (isUncondBranchOpcode(Opc)) {
503  MI.setDesc(get(getMatchingCondBranchOpcode(Opc)));
505  .addImm(Pred[0].getImm())
506  .addReg(Pred[1].getReg());
507  return true;
508  }
509 
510  int PIdx = MI.findFirstPredOperandIdx();
511  if (PIdx != -1) {
512  MachineOperand &PMO = MI.getOperand(PIdx);
513  PMO.setImm(Pred[0].getImm());
514  MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
515  return true;
516  }
517  return false;
518 }
519 
521  ArrayRef<MachineOperand> Pred2) const {
522  if (Pred1.size() > 2 || Pred2.size() > 2)
523  return false;
524 
525  ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
526  ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
527  if (CC1 == CC2)
528  return true;
529 
530  switch (CC1) {
531  default:
532  return false;
533  case ARMCC::AL:
534  return true;
535  case ARMCC::HS:
536  return CC2 == ARMCC::HI;
537  case ARMCC::LS:
538  return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
539  case ARMCC::GE:
540  return CC2 == ARMCC::GT;
541  case ARMCC::LE:
542  return CC2 == ARMCC::LT;
543  }
544 }
545 
547  MachineInstr &MI, std::vector<MachineOperand> &Pred) const {
548  bool Found = false;
549  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
550  const MachineOperand &MO = MI.getOperand(i);
551  if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) ||
552  (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) {
553  Pred.push_back(MO);
554  Found = true;
555  }
556  }
557 
558  return Found;
559 }
560 
562  for (const auto &MO : MI.operands())
563  if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
564  return true;
565  return false;
566 }
567 
569  unsigned Op) const {
570  const MachineOperand &Offset = MI.getOperand(Op + 1);
571  return Offset.getReg() != 0;
572 }
573 
574 // Load with negative register offset requires additional 1cyc and +I unit
575 // for Cortex A57
577  unsigned Op) const {
578  const MachineOperand &Offset = MI.getOperand(Op + 1);
579  const MachineOperand &Opc = MI.getOperand(Op + 2);
580  assert(Opc.isImm());
581  assert(Offset.isReg());
582  int64_t OpcImm = Opc.getImm();
583 
584  bool isSub = ARM_AM::getAM3Op(OpcImm) == ARM_AM::sub;
585  return (isSub && Offset.getReg() != 0);
586 }
587 
589  unsigned Op) const {
590  const MachineOperand &Opc = MI.getOperand(Op + 2);
591  unsigned OffImm = Opc.getImm();
592  return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
593 }
594 
595 // Load, scaled register offset, not plus LSL2
597  unsigned Op) const {
598  const MachineOperand &Opc = MI.getOperand(Op + 2);
599  unsigned OffImm = Opc.getImm();
600 
601  bool isAdd = ARM_AM::getAM2Op(OffImm) == ARM_AM::add;
602  unsigned Amt = ARM_AM::getAM2Offset(OffImm);
604  if (ShiftOpc == ARM_AM::no_shift) return false; // not scaled
605  bool SimpleScaled = (isAdd && ShiftOpc == ARM_AM::lsl && Amt == 2);
606  return !SimpleScaled;
607 }
608 
609 // Minus reg for ldstso addr mode
611  unsigned Op) const {
612  unsigned OffImm = MI.getOperand(Op + 2).getImm();
613  return ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
614 }
615 
616 // Load, scaled register offset
618  unsigned Op) const {
619  unsigned OffImm = MI.getOperand(Op + 2).getImm();
620  return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
621 }
622 
623 static bool isEligibleForITBlock(const MachineInstr *MI) {
624  switch (MI->getOpcode()) {
625  default: return true;
626  case ARM::tADC: // ADC (register) T1
627  case ARM::tADDi3: // ADD (immediate) T1
628  case ARM::tADDi8: // ADD (immediate) T2
629  case ARM::tADDrr: // ADD (register) T1
630  case ARM::tAND: // AND (register) T1
631  case ARM::tASRri: // ASR (immediate) T1
632  case ARM::tASRrr: // ASR (register) T1
633  case ARM::tBIC: // BIC (register) T1
634  case ARM::tEOR: // EOR (register) T1
635  case ARM::tLSLri: // LSL (immediate) T1
636  case ARM::tLSLrr: // LSL (register) T1
637  case ARM::tLSRri: // LSR (immediate) T1
638  case ARM::tLSRrr: // LSR (register) T1
639  case ARM::tMUL: // MUL T1
640  case ARM::tMVN: // MVN (register) T1
641  case ARM::tORR: // ORR (register) T1
642  case ARM::tROR: // ROR (register) T1
643  case ARM::tRSB: // RSB (immediate) T1
644  case ARM::tSBC: // SBC (register) T1
645  case ARM::tSUBi3: // SUB (immediate) T1
646  case ARM::tSUBi8: // SUB (immediate) T2
647  case ARM::tSUBrr: // SUB (register) T1
648  return !ARMBaseInstrInfo::isCPSRDefined(*MI);
649  }
650 }
651 
652 /// isPredicable - Return true if the specified instruction can be predicated.
653 /// By default, this returns true for every instruction with a
654 /// PredicateOperand.
656  if (!MI.isPredicable())
657  return false;
658 
659  if (MI.isBundle())
660  return false;
661 
662  if (!isEligibleForITBlock(&MI))
663  return false;
664 
665  const ARMFunctionInfo *AFI =
667 
668  // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
669  // In their ARM encoding, they can't be encoded in a conditional form.
671  return false;
672 
673  if (AFI->isThumb2Function()) {
674  if (getSubtarget().restrictIT())
675  return isV8EligibleForIT(&MI);
676  }
677 
678  return true;
679 }
680 
681 namespace llvm {
682 
683 template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
684  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
685  const MachineOperand &MO = MI->getOperand(i);
686  if (!MO.isReg() || MO.isUndef() || MO.isUse())
687  continue;
688  if (MO.getReg() != ARM::CPSR)
689  continue;
690  if (!MO.isDead())
691  return false;
692  }
693  // all definitions of CPSR are dead
694  return true;
695 }
696 
697 } // end namespace llvm
698 
699 /// GetInstSize - Return the size of the specified MachineInstr.
700 ///
702  const MachineBasicBlock &MBB = *MI.getParent();
703  const MachineFunction *MF = MBB.getParent();
704  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
705 
706  const MCInstrDesc &MCID = MI.getDesc();
707  if (MCID.getSize())
708  return MCID.getSize();
709 
710  // If this machine instr is an inline asm, measure it.
711  if (MI.getOpcode() == ARM::INLINEASM)
712  return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
713  unsigned Opc = MI.getOpcode();
714  switch (Opc) {
715  default:
716  // pseudo-instruction sizes are zero.
717  return 0;
718  case TargetOpcode::BUNDLE:
719  return getInstBundleLength(MI);
720  case ARM::MOVi16_ga_pcrel:
721  case ARM::MOVTi16_ga_pcrel:
722  case ARM::t2MOVi16_ga_pcrel:
723  case ARM::t2MOVTi16_ga_pcrel:
724  return 4;
725  case ARM::MOVi32imm:
726  case ARM::t2MOVi32imm:
727  return 8;
728  case ARM::CONSTPOOL_ENTRY:
729  case ARM::JUMPTABLE_INSTS:
730  case ARM::JUMPTABLE_ADDRS:
731  case ARM::JUMPTABLE_TBB:
732  case ARM::JUMPTABLE_TBH:
733  // If this machine instr is a constant pool entry, its size is recorded as
734  // operand #2.
735  return MI.getOperand(2).getImm();
736  case ARM::Int_eh_sjlj_longjmp:
737  return 16;
738  case ARM::tInt_eh_sjlj_longjmp:
739  return 10;
740  case ARM::tInt_WIN_eh_sjlj_longjmp:
741  return 12;
742  case ARM::Int_eh_sjlj_setjmp:
743  case ARM::Int_eh_sjlj_setjmp_nofp:
744  return 20;
745  case ARM::tInt_eh_sjlj_setjmp:
746  case ARM::t2Int_eh_sjlj_setjmp:
747  case ARM::t2Int_eh_sjlj_setjmp_nofp:
748  return 12;
749  case ARM::SPACE:
750  return MI.getOperand(1).getImm();
751  }
752 }
753 
754 unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
755  unsigned Size = 0;
758  while (++I != E && I->isInsideBundle()) {
759  assert(!I->isBundle() && "No nested bundle!");
760  Size += getInstSizeInBytes(*I);
761  }
762  return Size;
763 }
764 
767  unsigned DestReg, bool KillSrc,
768  const ARMSubtarget &Subtarget) const {
769  unsigned Opc = Subtarget.isThumb()
770  ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
771  : ARM::MRS;
772 
773  MachineInstrBuilder MIB =
774  BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
775 
776  // There is only 1 A/R class MRS instruction, and it always refers to
777  // APSR. However, there are lots of other possibilities on M-class cores.
778  if (Subtarget.isMClass())
779  MIB.addImm(0x800);
780 
781  MIB.add(predOps(ARMCC::AL))
782  .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
783 }
784 
787  unsigned SrcReg, bool KillSrc,
788  const ARMSubtarget &Subtarget) const {
789  unsigned Opc = Subtarget.isThumb()
790  ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
791  : ARM::MSR;
792 
793  MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
794 
795  if (Subtarget.isMClass())
796  MIB.addImm(0x800);
797  else
798  MIB.addImm(8);
799 
800  MIB.addReg(SrcReg, getKillRegState(KillSrc))
803 }
804 
807  const DebugLoc &DL, unsigned DestReg,
808  unsigned SrcReg, bool KillSrc) const {
809  bool GPRDest = ARM::GPRRegClass.contains(DestReg);
810  bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
811 
812  if (GPRDest && GPRSrc) {
813  BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
814  .addReg(SrcReg, getKillRegState(KillSrc))
816  .add(condCodeOp());
817  return;
818  }
819 
820  bool SPRDest = ARM::SPRRegClass.contains(DestReg);
821  bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
822 
823  unsigned Opc = 0;
824  if (SPRDest && SPRSrc)
825  Opc = ARM::VMOVS;
826  else if (GPRDest && SPRSrc)
827  Opc = ARM::VMOVRS;
828  else if (SPRDest && GPRSrc)
829  Opc = ARM::VMOVSR;
830  else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP())
831  Opc = ARM::VMOVD;
832  else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
833  Opc = ARM::VORRq;
834 
835  if (Opc) {
836  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
837  MIB.addReg(SrcReg, getKillRegState(KillSrc));
838  if (Opc == ARM::VORRq)
839  MIB.addReg(SrcReg, getKillRegState(KillSrc));
840  MIB.add(predOps(ARMCC::AL));
841  return;
842  }
843 
844  // Handle register classes that require multiple instructions.
845  unsigned BeginIdx = 0;
846  unsigned SubRegs = 0;
847  int Spacing = 1;
848 
849  // Use VORRq when possible.
850  if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
851  Opc = ARM::VORRq;
852  BeginIdx = ARM::qsub_0;
853  SubRegs = 2;
854  } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
855  Opc = ARM::VORRq;
856  BeginIdx = ARM::qsub_0;
857  SubRegs = 4;
858  // Fall back to VMOVD.
859  } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
860  Opc = ARM::VMOVD;
861  BeginIdx = ARM::dsub_0;
862  SubRegs = 2;
863  } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
864  Opc = ARM::VMOVD;
865  BeginIdx = ARM::dsub_0;
866  SubRegs = 3;
867  } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
868  Opc = ARM::VMOVD;
869  BeginIdx = ARM::dsub_0;
870  SubRegs = 4;
871  } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
872  Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
873  BeginIdx = ARM::gsub_0;
874  SubRegs = 2;
875  } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
876  Opc = ARM::VMOVD;
877  BeginIdx = ARM::dsub_0;
878  SubRegs = 2;
879  Spacing = 2;
880  } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
881  Opc = ARM::VMOVD;
882  BeginIdx = ARM::dsub_0;
883  SubRegs = 3;
884  Spacing = 2;
885  } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
886  Opc = ARM::VMOVD;
887  BeginIdx = ARM::dsub_0;
888  SubRegs = 4;
889  Spacing = 2;
890  } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) {
891  Opc = ARM::VMOVS;
892  BeginIdx = ARM::ssub_0;
893  SubRegs = 2;
894  } else if (SrcReg == ARM::CPSR) {
895  copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
896  return;
897  } else if (DestReg == ARM::CPSR) {
898  copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
899  return;
900  }
901 
902  assert(Opc && "Impossible reg-to-reg copy");
903 
906 
907  // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
908  if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
909  BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
910  Spacing = -Spacing;
911  }
912 #ifndef NDEBUG
913  SmallSet<unsigned, 4> DstRegs;
914 #endif
915  for (unsigned i = 0; i != SubRegs; ++i) {
916  unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
917  unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
918  assert(Dst && Src && "Bad sub-register");
919 #ifndef NDEBUG
920  assert(!DstRegs.count(Src) && "destructive vector copy");
921  DstRegs.insert(Dst);
922 #endif
923  Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
924  // VORR takes two source operands.
925  if (Opc == ARM::VORRq)
926  Mov.addReg(Src);
927  Mov = Mov.add(predOps(ARMCC::AL));
928  // MOVr can set CC.
929  if (Opc == ARM::MOVr)
930  Mov = Mov.add(condCodeOp());
931  }
932  // Add implicit super-register defs and kills to the last instruction.
933  Mov->addRegisterDefined(DestReg, TRI);
934  if (KillSrc)
935  Mov->addRegisterKilled(SrcReg, TRI);
936 }
937 
939  MachineOperand &Dest) const {
940  // VMOVRRD is also a copy instruction but it requires
941  // special way of handling. It is more complex copy version
942  // and since that we are not considering it. For recognition
943  // of such instruction isExtractSubregLike MI interface fuction
944  // could be used.
945  // VORRq is considered as a move only if two inputs are
946  // the same register.
947  if (!MI.isMoveReg() ||
948  (MI.getOpcode() == ARM::VORRq &&
949  MI.getOperand(1).getReg() != MI.getOperand(2).getReg()))
950  return false;
951  Dest = MI.getOperand(0);
952  Src = MI.getOperand(1);
953  return true;
954 }
955 
956 const MachineInstrBuilder &
958  unsigned SubIdx, unsigned State,
959  const TargetRegisterInfo *TRI) const {
960  if (!SubIdx)
961  return MIB.addReg(Reg, State);
962 
964  return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
965  return MIB.addReg(Reg, State, SubIdx);
966 }
967 
970  unsigned SrcReg, bool isKill, int FI,
971  const TargetRegisterClass *RC,
972  const TargetRegisterInfo *TRI) const {
973  DebugLoc DL;
974  if (I != MBB.end()) DL = I->getDebugLoc();
975  MachineFunction &MF = *MBB.getParent();
976  MachineFrameInfo &MFI = MF.getFrameInfo();
977  unsigned Align = MFI.getObjectAlignment(FI);
978 
981  MFI.getObjectSize(FI), Align);
982 
983  switch (TRI->getSpillSize(*RC)) {
984  case 2:
985  if (ARM::HPRRegClass.hasSubClassEq(RC)) {
986  BuildMI(MBB, I, DL, get(ARM::VSTRH))
987  .addReg(SrcReg, getKillRegState(isKill))
988  .addFrameIndex(FI)
989  .addImm(0)
990  .addMemOperand(MMO)
991  .add(predOps(ARMCC::AL));
992  } else
993  llvm_unreachable("Unknown reg class!");
994  break;
995  case 4:
996  if (ARM::GPRRegClass.hasSubClassEq(RC)) {
997  BuildMI(MBB, I, DL, get(ARM::STRi12))
998  .addReg(SrcReg, getKillRegState(isKill))
999  .addFrameIndex(FI)
1000  .addImm(0)
1001  .addMemOperand(MMO)
1002  .add(predOps(ARMCC::AL));
1003  } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1004  BuildMI(MBB, I, DL, get(ARM::VSTRS))
1005  .addReg(SrcReg, getKillRegState(isKill))
1006  .addFrameIndex(FI)
1007  .addImm(0)
1008  .addMemOperand(MMO)
1009  .add(predOps(ARMCC::AL));
1010  } else
1011  llvm_unreachable("Unknown reg class!");
1012  break;
1013  case 8:
1014  if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1015  BuildMI(MBB, I, DL, get(ARM::VSTRD))
1016  .addReg(SrcReg, getKillRegState(isKill))
1017  .addFrameIndex(FI)
1018  .addImm(0)
1019  .addMemOperand(MMO)
1020  .add(predOps(ARMCC::AL));
1021  } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1022  if (Subtarget.hasV5TEOps()) {
1023  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD));
1024  AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1025  AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1026  MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1027  .add(predOps(ARMCC::AL));
1028  } else {
1029  // Fallback to STM instruction, which has existed since the dawn of
1030  // time.
1031  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STMIA))
1032  .addFrameIndex(FI)
1033  .addMemOperand(MMO)
1034  .add(predOps(ARMCC::AL));
1035  AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1036  AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1037  }
1038  } else
1039  llvm_unreachable("Unknown reg class!");
1040  break;
1041  case 16:
1042  if (ARM::DPairRegClass.hasSubClassEq(RC)) {
1043  // Use aligned spills if the stack can be realigned.
1044  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1045  BuildMI(MBB, I, DL, get(ARM::VST1q64))
1046  .addFrameIndex(FI)
1047  .addImm(16)
1048  .addReg(SrcReg, getKillRegState(isKill))
1049  .addMemOperand(MMO)
1050  .add(predOps(ARMCC::AL));
1051  } else {
1052  BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
1053  .addReg(SrcReg, getKillRegState(isKill))
1054  .addFrameIndex(FI)
1055  .addMemOperand(MMO)
1056  .add(predOps(ARMCC::AL));
1057  }
1058  } else
1059  llvm_unreachable("Unknown reg class!");
1060  break;
1061  case 24:
1062  if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1063  // Use aligned spills if the stack can be realigned.
1064  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1065  BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
1066  .addFrameIndex(FI)
1067  .addImm(16)
1068  .addReg(SrcReg, getKillRegState(isKill))
1069  .addMemOperand(MMO)
1070  .add(predOps(ARMCC::AL));
1071  } else {
1072  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
1073  .addFrameIndex(FI)
1074  .add(predOps(ARMCC::AL))
1075  .addMemOperand(MMO);
1076  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1077  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1078  AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1079  }
1080  } else
1081  llvm_unreachable("Unknown reg class!");
1082  break;
1083  case 32:
1084  if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
1085  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1086  // FIXME: It's possible to only store part of the QQ register if the
1087  // spilled def has a sub-register index.
1088  BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
1089  .addFrameIndex(FI)
1090  .addImm(16)
1091  .addReg(SrcReg, getKillRegState(isKill))
1092  .addMemOperand(MMO)
1093  .add(predOps(ARMCC::AL));
1094  } else {
1095  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
1096  .addFrameIndex(FI)
1097  .add(predOps(ARMCC::AL))
1098  .addMemOperand(MMO);
1099  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1100  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1101  MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1102  AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1103  }
1104  } else
1105  llvm_unreachable("Unknown reg class!");
1106  break;
1107  case 64:
1108  if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1109  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
1110  .addFrameIndex(FI)
1111  .add(predOps(ARMCC::AL))
1112  .addMemOperand(MMO);
1113  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1114  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1115  MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1116  MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1117  MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
1118  MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
1119  MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
1120  AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
1121  } else
1122  llvm_unreachable("Unknown reg class!");
1123  break;
1124  default:
1125  llvm_unreachable("Unknown reg class!");
1126  }
1127 }
1128 
1130  int &FrameIndex) const {
1131  switch (MI.getOpcode()) {
1132  default: break;
1133  case ARM::STRrs:
1134  case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
1135  if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1136  MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1137  MI.getOperand(3).getImm() == 0) {
1138  FrameIndex = MI.getOperand(1).getIndex();
1139  return MI.getOperand(0).getReg();
1140  }
1141  break;
1142  case ARM::STRi12:
1143  case ARM::t2STRi12:
1144  case ARM::tSTRspi:
1145  case ARM::VSTRD:
1146  case ARM::VSTRS:
1147  if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1148  MI.getOperand(2).getImm() == 0) {
1149  FrameIndex = MI.getOperand(1).getIndex();
1150  return MI.getOperand(0).getReg();
1151  }
1152  break;
1153  case ARM::VST1q64:
1154  case ARM::VST1d64TPseudo:
1155  case ARM::VST1d64QPseudo:
1156  if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
1157  FrameIndex = MI.getOperand(0).getIndex();
1158  return MI.getOperand(2).getReg();
1159  }
1160  break;
1161  case ARM::VSTMQIA:
1162  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1163  FrameIndex = MI.getOperand(1).getIndex();
1164  return MI.getOperand(0).getReg();
1165  }
1166  break;
1167  }
1168 
1169  return 0;
1170 }
1171 
1173  int &FrameIndex) const {
1174  const MachineMemOperand *Dummy;
1175  return MI.mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
1176 }
1177 
1178 void ARMBaseInstrInfo::
1180  unsigned DestReg, int FI,
1181  const TargetRegisterClass *RC,
1182  const TargetRegisterInfo *TRI) const {
1183  DebugLoc DL;
1184  if (I != MBB.end()) DL = I->getDebugLoc();
1185  MachineFunction &MF = *MBB.getParent();
1186  MachineFrameInfo &MFI = MF.getFrameInfo();
1187  unsigned Align = MFI.getObjectAlignment(FI);
1190  MFI.getObjectSize(FI), Align);
1191 
1192  switch (TRI->getSpillSize(*RC)) {
1193  case 2:
1194  if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1195  BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg)
1196  .addFrameIndex(FI)
1197  .addImm(0)
1198  .addMemOperand(MMO)
1199  .add(predOps(ARMCC::AL));
1200  } else
1201  llvm_unreachable("Unknown reg class!");
1202  break;
1203  case 4:
1204  if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1205  BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
1206  .addFrameIndex(FI)
1207  .addImm(0)
1208  .addMemOperand(MMO)
1209  .add(predOps(ARMCC::AL));
1210  } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1211  BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
1212  .addFrameIndex(FI)
1213  .addImm(0)
1214  .addMemOperand(MMO)
1215  .add(predOps(ARMCC::AL));
1216  } else
1217  llvm_unreachable("Unknown reg class!");
1218  break;
1219  case 8:
1220  if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1221  BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
1222  .addFrameIndex(FI)
1223  .addImm(0)
1224  .addMemOperand(MMO)
1225  .add(predOps(ARMCC::AL));
1226  } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1227  MachineInstrBuilder MIB;
1228 
1229  if (Subtarget.hasV5TEOps()) {
1230  MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
1231  AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1232  AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1233  MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1234  .add(predOps(ARMCC::AL));
1235  } else {
1236  // Fallback to LDM instruction, which has existed since the dawn of
1237  // time.
1238  MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
1239  .addFrameIndex(FI)
1240  .addMemOperand(MMO)
1241  .add(predOps(ARMCC::AL));
1242  MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1243  MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1244  }
1245 
1247  MIB.addReg(DestReg, RegState::ImplicitDefine);
1248  } else
1249  llvm_unreachable("Unknown reg class!");
1250  break;
1251  case 16:
1252  if (ARM::DPairRegClass.hasSubClassEq(RC)) {
1253  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1254  BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
1255  .addFrameIndex(FI)
1256  .addImm(16)
1257  .addMemOperand(MMO)
1258  .add(predOps(ARMCC::AL));
1259  } else {
1260  BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
1261  .addFrameIndex(FI)
1262  .addMemOperand(MMO)
1263  .add(predOps(ARMCC::AL));
1264  }
1265  } else
1266  llvm_unreachable("Unknown reg class!");
1267  break;
1268  case 24:
1269  if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1270  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1271  BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
1272  .addFrameIndex(FI)
1273  .addImm(16)
1274  .addMemOperand(MMO)
1275  .add(predOps(ARMCC::AL));
1276  } else {
1277  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1278  .addFrameIndex(FI)
1279  .addMemOperand(MMO)
1280  .add(predOps(ARMCC::AL));
1281  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1282  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1283  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1285  MIB.addReg(DestReg, RegState::ImplicitDefine);
1286  }
1287  } else
1288  llvm_unreachable("Unknown reg class!");
1289  break;
1290  case 32:
1291  if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
1292  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1293  BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
1294  .addFrameIndex(FI)
1295  .addImm(16)
1296  .addMemOperand(MMO)
1297  .add(predOps(ARMCC::AL));
1298  } else {
1299  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1300  .addFrameIndex(FI)
1301  .add(predOps(ARMCC::AL))
1302  .addMemOperand(MMO);
1303  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1304  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1305  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1306  MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1308  MIB.addReg(DestReg, RegState::ImplicitDefine);
1309  }
1310  } else
1311  llvm_unreachable("Unknown reg class!");
1312  break;
1313  case 64:
1314  if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1315  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1316  .addFrameIndex(FI)
1317  .add(predOps(ARMCC::AL))
1318  .addMemOperand(MMO);
1319  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1320  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1321  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1322  MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1323  MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
1324  MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
1325  MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
1326  MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
1328  MIB.addReg(DestReg, RegState::ImplicitDefine);
1329  } else
1330  llvm_unreachable("Unknown reg class!");
1331  break;
1332  default:
1333  llvm_unreachable("Unknown regclass!");
1334  }
1335 }
1336 
1338  int &FrameIndex) const {
1339  switch (MI.getOpcode()) {
1340  default: break;
1341  case ARM::LDRrs:
1342  case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
1343  if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1344  MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1345  MI.getOperand(3).getImm() == 0) {
1346  FrameIndex = MI.getOperand(1).getIndex();
1347  return MI.getOperand(0).getReg();
1348  }
1349  break;
1350  case ARM::LDRi12:
1351  case ARM::t2LDRi12:
1352  case ARM::tLDRspi:
1353  case ARM::VLDRD:
1354  case ARM::VLDRS:
1355  if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1356  MI.getOperand(2).getImm() == 0) {
1357  FrameIndex = MI.getOperand(1).getIndex();
1358  return MI.getOperand(0).getReg();
1359  }
1360  break;
1361  case ARM::VLD1q64:
1362  case ARM::VLD1d64TPseudo:
1363  case ARM::VLD1d64QPseudo:
1364  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1365  FrameIndex = MI.getOperand(1).getIndex();
1366  return MI.getOperand(0).getReg();
1367  }
1368  break;
1369  case ARM::VLDMQIA:
1370  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1371  FrameIndex = MI.getOperand(1).getIndex();
1372  return MI.getOperand(0).getReg();
1373  }
1374  break;
1375  }
1376 
1377  return 0;
1378 }
1379 
1381  int &FrameIndex) const {
1382  const MachineMemOperand *Dummy;
1383  return MI.mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
1384 }
1385 
1386 /// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
1387 /// depending on whether the result is used.
1388 void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
1389  bool isThumb1 = Subtarget.isThumb1Only();
1390  bool isThumb2 = Subtarget.isThumb2();
1391  const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
1392 
1393  DebugLoc dl = MI->getDebugLoc();
1394  MachineBasicBlock *BB = MI->getParent();
1395 
1396  MachineInstrBuilder LDM, STM;
1397  if (isThumb1 || !MI->getOperand(1).isDead()) {
1398  MachineOperand LDWb(MI->getOperand(1));
1399  LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
1400  : isThumb1 ? ARM::tLDMIA_UPD
1401  : ARM::LDMIA_UPD))
1402  .add(LDWb);
1403  } else {
1404  LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
1405  }
1406 
1407  if (isThumb1 || !MI->getOperand(0).isDead()) {
1408  MachineOperand STWb(MI->getOperand(0));
1409  STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
1410  : isThumb1 ? ARM::tSTMIA_UPD
1411  : ARM::STMIA_UPD))
1412  .add(STWb);
1413  } else {
1414  STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
1415  }
1416 
1417  MachineOperand LDBase(MI->getOperand(3));
1418  LDM.add(LDBase).add(predOps(ARMCC::AL));
1419 
1420  MachineOperand STBase(MI->getOperand(2));
1421  STM.add(STBase).add(predOps(ARMCC::AL));
1422 
1423  // Sort the scratch registers into ascending order.
1425  SmallVector<unsigned, 6> ScratchRegs;
1426  for(unsigned I = 5; I < MI->getNumOperands(); ++I)
1427  ScratchRegs.push_back(MI->getOperand(I).getReg());
1428  llvm::sort(ScratchRegs.begin(), ScratchRegs.end(),
1429  [&TRI](const unsigned &Reg1,
1430  const unsigned &Reg2) -> bool {
1431  return TRI.getEncodingValue(Reg1) <
1432  TRI.getEncodingValue(Reg2);
1433  });
1434 
1435  for (const auto &Reg : ScratchRegs) {
1436  LDM.addReg(Reg, RegState::Define);
1437  STM.addReg(Reg, RegState::Kill);
1438  }
1439 
1440  BB->erase(MI);
1441 }
1442 
1444  if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
1445  assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() &&
1446  "LOAD_STACK_GUARD currently supported only for MachO.");
1447  expandLoadStackGuard(MI);
1448  MI.getParent()->erase(MI);
1449  return true;
1450  }
1451 
1452  if (MI.getOpcode() == ARM::MEMCPY) {
1453  expandMEMCPY(MI);
1454  return true;
1455  }
1456 
1457  // This hook gets to expand COPY instructions before they become
1458  // copyPhysReg() calls. Look for VMOVS instructions that can legally be
1459  // widened to VMOVD. We prefer the VMOVD when possible because it may be
1460  // changed into a VORR that can go down the NEON pipeline.
1461  if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || Subtarget.isFPOnlySP())
1462  return false;
1463 
1464  // Look for a copy between even S-registers. That is where we keep floats
1465  // when using NEON v2f32 instructions for f32 arithmetic.
1466  unsigned DstRegS = MI.getOperand(0).getReg();
1467  unsigned SrcRegS = MI.getOperand(1).getReg();
1468  if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
1469  return false;
1470 
1472  unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
1473  &ARM::DPRRegClass);
1474  unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
1475  &ARM::DPRRegClass);
1476  if (!DstRegD || !SrcRegD)
1477  return false;
1478 
1479  // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
1480  // legal if the COPY already defines the full DstRegD, and it isn't a
1481  // sub-register insertion.
1482  if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
1483  return false;
1484 
1485  // A dead copy shouldn't show up here, but reject it just in case.
1486  if (MI.getOperand(0).isDead())
1487  return false;
1488 
1489  // All clear, widen the COPY.
1490  LLVM_DEBUG(dbgs() << "widening: " << MI);
1491  MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
1492 
1493  // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg
1494  // or some other super-register.
1495  int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD);
1496  if (ImpDefIdx != -1)
1497  MI.RemoveOperand(ImpDefIdx);
1498 
1499  // Change the opcode and operands.
1500  MI.setDesc(get(ARM::VMOVD));
1501  MI.getOperand(0).setReg(DstRegD);
1502  MI.getOperand(1).setReg(SrcRegD);
1503  MIB.add(predOps(ARMCC::AL));
1504 
1505  // We are now reading SrcRegD instead of SrcRegS. This may upset the
1506  // register scavenger and machine verifier, so we need to indicate that we
1507  // are reading an undefined value from SrcRegD, but a proper value from
1508  // SrcRegS.
1509  MI.getOperand(1).setIsUndef();
1510  MIB.addReg(SrcRegS, RegState::Implicit);
1511 
1512  // SrcRegD may actually contain an unrelated value in the ssub_1
1513  // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
1514  if (MI.getOperand(1).isKill()) {
1515  MI.getOperand(1).setIsKill(false);
1516  MI.addRegisterKilled(SrcRegS, TRI, true);
1517  }
1518 
1519  LLVM_DEBUG(dbgs() << "replaced by: " << MI);
1520  return true;
1521 }
1522 
1523 /// Create a copy of a const pool value. Update CPI to the new index and return
1524 /// the label UID.
1525 static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1528 
1529  const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1530  assert(MCPE.isMachineConstantPoolEntry() &&
1531  "Expecting a machine constantpool entry!");
1532  ARMConstantPoolValue *ACPV =
1533  static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1534 
1535  unsigned PCLabelId = AFI->createPICLabelUId();
1536  ARMConstantPoolValue *NewCPV = nullptr;
1537 
1538  // FIXME: The below assumes PIC relocation model and that the function
1539  // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1540  // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1541  // instructions, so that's probably OK, but is PIC always correct when
1542  // we get here?
1543  if (ACPV->isGlobalValue())
1545  cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
1546  4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
1547  else if (ACPV->isExtSymbol())
1548  NewCPV = ARMConstantPoolSymbol::
1549  Create(MF.getFunction().getContext(),
1550  cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1551  else if (ACPV->isBlockAddress())
1552  NewCPV = ARMConstantPoolConstant::
1553  Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1555  else if (ACPV->isLSDA())
1556  NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId,
1557  ARMCP::CPLSDA, 4);
1558  else if (ACPV->isMachineBasicBlock())
1559  NewCPV = ARMConstantPoolMBB::
1560  Create(MF.getFunction().getContext(),
1561  cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1562  else
1563  llvm_unreachable("Unexpected ARM constantpool value type!!");
1564  CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
1565  return PCLabelId;
1566 }
1567 
1570  unsigned DestReg, unsigned SubIdx,
1571  const MachineInstr &Orig,
1572  const TargetRegisterInfo &TRI) const {
1573  unsigned Opcode = Orig.getOpcode();
1574  switch (Opcode) {
1575  default: {
1576  MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
1577  MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1578  MBB.insert(I, MI);
1579  break;
1580  }
1581  case ARM::tLDRpci_pic:
1582  case ARM::t2LDRpci_pic: {
1583  MachineFunction &MF = *MBB.getParent();
1584  unsigned CPI = Orig.getOperand(1).getIndex();
1585  unsigned PCLabelId = duplicateCPV(MF, CPI);
1586  MachineInstrBuilder MIB =
1587  BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
1588  .addConstantPoolIndex(CPI)
1589  .addImm(PCLabelId);
1590  MIB->setMemRefs(Orig.memoperands_begin(), Orig.memoperands_end());
1591  break;
1592  }
1593  }
1594 }
1595 
1596 MachineInstr &
1598  MachineBasicBlock::iterator InsertBefore,
1599  const MachineInstr &Orig) const {
1600  MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
1602  for (;;) {
1603  switch (I->getOpcode()) {
1604  case ARM::tLDRpci_pic:
1605  case ARM::t2LDRpci_pic: {
1606  MachineFunction &MF = *MBB.getParent();
1607  unsigned CPI = I->getOperand(1).getIndex();
1608  unsigned PCLabelId = duplicateCPV(MF, CPI);
1609  I->getOperand(1).setIndex(CPI);
1610  I->getOperand(2).setImm(PCLabelId);
1611  break;
1612  }
1613  }
1614  if (!I->isBundledWithSucc())
1615  break;
1616  ++I;
1617  }
1618  return Cloned;
1619 }
1620 
1622  const MachineInstr &MI1,
1623  const MachineRegisterInfo *MRI) const {
1624  unsigned Opcode = MI0.getOpcode();
1625  if (Opcode == ARM::t2LDRpci ||
1626  Opcode == ARM::t2LDRpci_pic ||
1627  Opcode == ARM::tLDRpci ||
1628  Opcode == ARM::tLDRpci_pic ||
1629  Opcode == ARM::LDRLIT_ga_pcrel ||
1630  Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1631  Opcode == ARM::tLDRLIT_ga_pcrel ||
1632  Opcode == ARM::MOV_ga_pcrel ||
1633  Opcode == ARM::MOV_ga_pcrel_ldr ||
1634  Opcode == ARM::t2MOV_ga_pcrel) {
1635  if (MI1.getOpcode() != Opcode)
1636  return false;
1637  if (MI0.getNumOperands() != MI1.getNumOperands())
1638  return false;
1639 
1640  const MachineOperand &MO0 = MI0.getOperand(1);
1641  const MachineOperand &MO1 = MI1.getOperand(1);
1642  if (MO0.getOffset() != MO1.getOffset())
1643  return false;
1644 
1645  if (Opcode == ARM::LDRLIT_ga_pcrel ||
1646  Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1647  Opcode == ARM::tLDRLIT_ga_pcrel ||
1648  Opcode == ARM::MOV_ga_pcrel ||
1649  Opcode == ARM::MOV_ga_pcrel_ldr ||
1650  Opcode == ARM::t2MOV_ga_pcrel)
1651  // Ignore the PC labels.
1652  return MO0.getGlobal() == MO1.getGlobal();
1653 
1654  const MachineFunction *MF = MI0.getParent()->getParent();
1655  const MachineConstantPool *MCP = MF->getConstantPool();
1656  int CPI0 = MO0.getIndex();
1657  int CPI1 = MO1.getIndex();
1658  const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1659  const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1660  bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1661  bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1662  if (isARMCP0 && isARMCP1) {
1663  ARMConstantPoolValue *ACPV0 =
1664  static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1665  ARMConstantPoolValue *ACPV1 =
1666  static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1667  return ACPV0->hasSameValue(ACPV1);
1668  } else if (!isARMCP0 && !isARMCP1) {
1669  return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1670  }
1671  return false;
1672  } else if (Opcode == ARM::PICLDR) {
1673  if (MI1.getOpcode() != Opcode)
1674  return false;
1675  if (MI0.getNumOperands() != MI1.getNumOperands())
1676  return false;
1677 
1678  unsigned Addr0 = MI0.getOperand(1).getReg();
1679  unsigned Addr1 = MI1.getOperand(1).getReg();
1680  if (Addr0 != Addr1) {
1681  if (!MRI ||
1684  return false;
1685 
1686  // This assumes SSA form.
1687  MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1688  MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1689  // Check if the loaded value, e.g. a constantpool of a global address, are
1690  // the same.
1691  if (!produceSameValue(*Def0, *Def1, MRI))
1692  return false;
1693  }
1694 
1695  for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
1696  // %12 = PICLDR %11, 0, 14, %noreg
1697  const MachineOperand &MO0 = MI0.getOperand(i);
1698  const MachineOperand &MO1 = MI1.getOperand(i);
1699  if (!MO0.isIdenticalTo(MO1))
1700  return false;
1701  }
1702  return true;
1703  }
1704 
1706 }
1707 
1708 /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1709 /// determine if two loads are loading from the same base address. It should
1710 /// only return true if the base pointers are the same and the only differences
1711 /// between the two addresses is the offset. It also returns the offsets by
1712 /// reference.
1713 ///
1714 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1715 /// is permanently disabled.
1717  int64_t &Offset1,
1718  int64_t &Offset2) const {
1719  // Don't worry about Thumb: just ARM and Thumb2.
1720  if (Subtarget.isThumb1Only()) return false;
1721 
1722  if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1723  return false;
1724 
1725  switch (Load1->getMachineOpcode()) {
1726  default:
1727  return false;
1728  case ARM::LDRi12:
1729  case ARM::LDRBi12:
1730  case ARM::LDRD:
1731  case ARM::LDRH:
1732  case ARM::LDRSB:
1733  case ARM::LDRSH:
1734  case ARM::VLDRD:
1735  case ARM::VLDRS:
1736  case ARM::t2LDRi8:
1737  case ARM::t2LDRBi8:
1738  case ARM::t2LDRDi8:
1739  case ARM::t2LDRSHi8:
1740  case ARM::t2LDRi12:
1741  case ARM::t2LDRBi12:
1742  case ARM::t2LDRSHi12:
1743  break;
1744  }
1745 
1746  switch (Load2->getMachineOpcode()) {
1747  default:
1748  return false;
1749  case ARM::LDRi12:
1750  case ARM::LDRBi12:
1751  case ARM::LDRD:
1752  case ARM::LDRH:
1753  case ARM::LDRSB:
1754  case ARM::LDRSH:
1755  case ARM::VLDRD:
1756  case ARM::VLDRS:
1757  case ARM::t2LDRi8:
1758  case ARM::t2LDRBi8:
1759  case ARM::t2LDRSHi8:
1760  case ARM::t2LDRi12:
1761  case ARM::t2LDRBi12:
1762  case ARM::t2LDRSHi12:
1763  break;
1764  }
1765 
1766  // Check if base addresses and chain operands match.
1767  if (Load1->getOperand(0) != Load2->getOperand(0) ||
1768  Load1->getOperand(4) != Load2->getOperand(4))
1769  return false;
1770 
1771  // Index should be Reg0.
1772  if (Load1->getOperand(3) != Load2->getOperand(3))
1773  return false;
1774 
1775  // Determine the offsets.
1776  if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
1777  isa<ConstantSDNode>(Load2->getOperand(1))) {
1778  Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
1779  Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
1780  return true;
1781  }
1782 
1783  return false;
1784 }
1785 
1786 /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
1787 /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
1788 /// be scheduled togther. On some targets if two loads are loading from
1789 /// addresses in the same cache line, it's better if they are scheduled
1790 /// together. This function takes two integers that represent the load offsets
1791 /// from the common base address. It returns true if it decides it's desirable
1792 /// to schedule the two loads together. "NumLoads" is the number of loads that
1793 /// have already been scheduled after Load1.
1794 ///
1795 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1796 /// is permanently disabled.
1798  int64_t Offset1, int64_t Offset2,
1799  unsigned NumLoads) const {
1800  // Don't worry about Thumb: just ARM and Thumb2.
1801  if (Subtarget.isThumb1Only()) return false;
1802 
1803  assert(Offset2 > Offset1);
1804 
1805  if ((Offset2 - Offset1) / 8 > 64)
1806  return false;
1807 
1808  // Check if the machine opcodes are different. If they are different
1809  // then we consider them to not be of the same base address,
1810  // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
1811  // In this case, they are considered to be the same because they are different
1812  // encoding forms of the same basic instruction.
1813  if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
1814  !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
1815  Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
1816  (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
1817  Load2->getMachineOpcode() == ARM::t2LDRBi8)))
1818  return false; // FIXME: overly conservative?
1819 
1820  // Four loads in a row should be sufficient.
1821  if (NumLoads >= 3)
1822  return false;
1823 
1824  return true;
1825 }
1826 
1828  const MachineBasicBlock *MBB,
1829  const MachineFunction &MF) const {
1830  // Debug info is never a scheduling boundary. It's necessary to be explicit
1831  // due to the special treatment of IT instructions below, otherwise a
1832  // dbg_value followed by an IT will result in the IT instruction being
1833  // considered a scheduling hazard, which is wrong. It should be the actual
1834  // instruction preceding the dbg_value instruction(s), just like it is
1835  // when debug info is not present.
1836  if (MI.isDebugInstr())
1837  return false;
1838 
1839  // Terminators and labels can't be scheduled around.
1840  if (MI.isTerminator() || MI.isPosition())
1841  return true;
1842 
1843  // Treat the start of the IT block as a scheduling boundary, but schedule
1844  // t2IT along with all instructions following it.
1845  // FIXME: This is a big hammer. But the alternative is to add all potential
1846  // true and anti dependencies to IT block instructions as implicit operands
1847  // to the t2IT instruction. The added compile time and complexity does not
1848  // seem worth it.
1850  // Make sure to skip any debug instructions
1851  while (++I != MBB->end() && I->isDebugInstr())
1852  ;
1853  if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
1854  return true;
1855 
1856  // Don't attempt to schedule around any instruction that defines
1857  // a stack-oriented pointer, as it's unlikely to be profitable. This
1858  // saves compile time, because it doesn't require every single
1859  // stack slot reference to depend on the instruction that does the
1860  // modification.
1861  // Calls don't actually change the stack pointer, even if they have imp-defs.
1862  // No ARM calling conventions change the stack pointer. (X86 calling
1863  // conventions sometimes do).
1864  if (!MI.isCall() && MI.definesRegister(ARM::SP))
1865  return true;
1866 
1867  return false;
1868 }
1869 
1870 bool ARMBaseInstrInfo::
1872  unsigned NumCycles, unsigned ExtraPredCycles,
1873  BranchProbability Probability) const {
1874  if (!NumCycles)
1875  return false;
1876 
1877  // If we are optimizing for size, see if the branch in the predecessor can be
1878  // lowered to cbn?z by the constant island lowering pass, and return false if
1879  // so. This results in a shorter instruction sequence.
1880  if (MBB.getParent()->getFunction().optForSize()) {
1881  MachineBasicBlock *Pred = *MBB.pred_begin();
1882  if (!Pred->empty()) {
1883  MachineInstr *LastMI = &*Pred->rbegin();
1884  if (LastMI->getOpcode() == ARM::t2Bcc) {
1885  MachineBasicBlock::iterator CmpMI = LastMI;
1886  if (CmpMI != Pred->begin()) {
1887  --CmpMI;
1888  if (CmpMI->getOpcode() == ARM::tCMPi8 ||
1889  CmpMI->getOpcode() == ARM::t2CMPri) {
1890  unsigned Reg = CmpMI->getOperand(0).getReg();
1891  unsigned PredReg = 0;
1892  ARMCC::CondCodes P = getInstrPredicate(*CmpMI, PredReg);
1893  if (P == ARMCC::AL && CmpMI->getOperand(1).getImm() == 0 &&
1894  isARMLowRegister(Reg))
1895  return false;
1896  }
1897  }
1898  }
1899  }
1900  }
1901  return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
1902  MBB, 0, 0, Probability);
1903 }
1904 
1905 bool ARMBaseInstrInfo::
1907  unsigned TCycles, unsigned TExtra,
1908  MachineBasicBlock &FBB,
1909  unsigned FCycles, unsigned FExtra,
1910  BranchProbability Probability) const {
1911  if (!TCycles)
1912  return false;
1913 
1914  // Attempt to estimate the relative costs of predication versus branching.
1915  // Here we scale up each component of UnpredCost to avoid precision issue when
1916  // scaling TCycles/FCycles by Probability.
1917  const unsigned ScalingUpFactor = 1024;
1918 
1919  unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
1920  unsigned UnpredCost;
1921  if (!Subtarget.hasBranchPredictor()) {
1922  // When we don't have a branch predictor it's always cheaper to not take a
1923  // branch than take it, so we have to take that into account.
1924  unsigned NotTakenBranchCost = 1;
1925  unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
1926  unsigned TUnpredCycles, FUnpredCycles;
1927  if (!FCycles) {
1928  // Triangle: TBB is the fallthrough
1929  TUnpredCycles = TCycles + NotTakenBranchCost;
1930  FUnpredCycles = TakenBranchCost;
1931  } else {
1932  // Diamond: TBB is the block that is branched to, FBB is the fallthrough
1933  TUnpredCycles = TCycles + TakenBranchCost;
1934  FUnpredCycles = FCycles + NotTakenBranchCost;
1935  // The branch at the end of FBB will disappear when it's predicated, so
1936  // discount it from PredCost.
1937  PredCost -= 1 * ScalingUpFactor;
1938  }
1939  // The total cost is the cost of each path scaled by their probabilites
1940  unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
1941  unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
1942  UnpredCost = TUnpredCost + FUnpredCost;
1943  // When predicating assume that the first IT can be folded away but later
1944  // ones cost one cycle each
1945  if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
1946  PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
1947  }
1948  } else {
1949  unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
1950  unsigned FUnpredCost =
1951  Probability.getCompl().scale(FCycles * ScalingUpFactor);
1952  UnpredCost = TUnpredCost + FUnpredCost;
1953  UnpredCost += 1 * ScalingUpFactor; // The branch itself
1954  UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
1955  }
1956 
1957  return PredCost <= UnpredCost;
1958 }
1959 
1960 bool
1962  MachineBasicBlock &FMBB) const {
1963  // Reduce false anti-dependencies to let the target's out-of-order execution
1964  // engine do its thing.
1965  return Subtarget.isProfitableToUnpredicate();
1966 }
1967 
1968 /// getInstrPredicate - If instruction is predicated, returns its predicate
1969 /// condition, otherwise returns AL. It also returns the condition code
1970 /// register by reference.
1972  unsigned &PredReg) {
1973  int PIdx = MI.findFirstPredOperandIdx();
1974  if (PIdx == -1) {
1975  PredReg = 0;
1976  return ARMCC::AL;
1977  }
1978 
1979  PredReg = MI.getOperand(PIdx+1).getReg();
1980  return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
1981 }
1982 
1983 unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {
1984  if (Opc == ARM::B)
1985  return ARM::Bcc;
1986  if (Opc == ARM::tB)
1987  return ARM::tBcc;
1988  if (Opc == ARM::t2B)
1989  return ARM::t2Bcc;
1990 
1991  llvm_unreachable("Unknown unconditional branch opcode!");
1992 }
1993 
1995  bool NewMI,
1996  unsigned OpIdx1,
1997  unsigned OpIdx2) const {
1998  switch (MI.getOpcode()) {
1999  case ARM::MOVCCr:
2000  case ARM::t2MOVCCr: {
2001  // MOVCC can be commuted by inverting the condition.
2002  unsigned PredReg = 0;
2003  ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
2004  // MOVCC AL can't be inverted. Shouldn't happen.
2005  if (CC == ARMCC::AL || PredReg != ARM::CPSR)
2006  return nullptr;
2007  MachineInstr *CommutedMI =
2008  TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2009  if (!CommutedMI)
2010  return nullptr;
2011  // After swapping the MOVCC operands, also invert the condition.
2012  CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
2013  .setImm(ARMCC::getOppositeCondition(CC));
2014  return CommutedMI;
2015  }
2016  }
2017  return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2018 }
2019 
2020 /// Identify instructions that can be folded into a MOVCC instruction, and
2021 /// return the defining instruction.
2023  const MachineRegisterInfo &MRI,
2024  const TargetInstrInfo *TII) {
2026  return nullptr;
2027  if (!MRI.hasOneNonDBGUse(Reg))
2028  return nullptr;
2029  MachineInstr *MI = MRI.getVRegDef(Reg);
2030  if (!MI)
2031  return nullptr;
2032  // MI is folded into the MOVCC by predicating it.
2033  if (!MI->isPredicable())
2034  return nullptr;
2035  // Check if MI has any non-dead defs or physreg uses. This also detects
2036  // predicated instructions which will be reading CPSR.
2037  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
2038  const MachineOperand &MO = MI->getOperand(i);
2039  // Reject frame index operands, PEI can't handle the predicated pseudos.
2040  if (MO.isFI() || MO.isCPI() || MO.isJTI())
2041  return nullptr;
2042  if (!MO.isReg())
2043  continue;
2044  // MI can't have any tied operands, that would conflict with predication.
2045  if (MO.isTied())
2046  return nullptr;
2048  return nullptr;
2049  if (MO.isDef() && !MO.isDead())
2050  return nullptr;
2051  }
2052  bool DontMoveAcrossStores = true;
2053  if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
2054  return nullptr;
2055  return MI;
2056 }
2057 
2060  unsigned &TrueOp, unsigned &FalseOp,
2061  bool &Optimizable) const {
2062  assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2063  "Unknown select instruction");
2064  // MOVCC operands:
2065  // 0: Def.
2066  // 1: True use.
2067  // 2: False use.
2068  // 3: Condition code.
2069  // 4: CPSR use.
2070  TrueOp = 1;
2071  FalseOp = 2;
2072  Cond.push_back(MI.getOperand(3));
2073  Cond.push_back(MI.getOperand(4));
2074  // We can always fold a def.
2075  Optimizable = true;
2076  return false;
2077 }
2078 
2079 MachineInstr *
2082  bool PreferFalse) const {
2083  assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2084  "Unknown select instruction");
2087  bool Invert = !DefMI;
2088  if (!DefMI)
2089  DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
2090  if (!DefMI)
2091  return nullptr;
2092 
2093  // Find new register class to use.
2094  MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
2095  unsigned DestReg = MI.getOperand(0).getReg();
2096  const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
2097  if (!MRI.constrainRegClass(DestReg, PreviousClass))
2098  return nullptr;
2099 
2100  // Create a new predicated version of DefMI.
2101  // Rfalse is the first use.
2102  MachineInstrBuilder NewMI =
2103  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
2104 
2105  // Copy all the DefMI operands, excluding its (null) predicate.
2106  const MCInstrDesc &DefDesc = DefMI->getDesc();
2107  for (unsigned i = 1, e = DefDesc.getNumOperands();
2108  i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
2109  NewMI.add(DefMI->getOperand(i));
2110 
2111  unsigned CondCode = MI.getOperand(3).getImm();
2112  if (Invert)
2114  else
2115  NewMI.addImm(CondCode);
2116  NewMI.add(MI.getOperand(4));
2117 
2118  // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
2119  if (NewMI->hasOptionalDef())
2120  NewMI.add(condCodeOp());
2121 
2122  // The output register value when the predicate is false is an implicit
2123  // register operand tied to the first def.
2124  // The tie makes the register allocator ensure the FalseReg is allocated the
2125  // same register as operand 0.
2126  FalseReg.setImplicit();
2127  NewMI.add(FalseReg);
2128  NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
2129 
2130  // Update SeenMIs set: register newly created MI and erase removed DefMI.
2131  SeenMIs.insert(NewMI);
2132  SeenMIs.erase(DefMI);
2133 
2134  // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
2135  // DefMI would be invalid when tranferred inside the loop. Checking for a
2136  // loop is expensive, but at least remove kill flags if they are in different
2137  // BBs.
2138  if (DefMI->getParent() != MI.getParent())
2139  NewMI->clearKillInfo();
2140 
2141  // The caller will erase MI, but not DefMI.
2142  DefMI->eraseFromParent();
2143  return NewMI;
2144 }
2145 
2146 /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
2147 /// instruction is encoded with an 'S' bit is determined by the optional CPSR
2148 /// def operand.
2149 ///
2150 /// This will go away once we can teach tblgen how to set the optional CPSR def
2151 /// operand itself.
2153  uint16_t PseudoOpc;
2154  uint16_t MachineOpc;
2155 };
2156 
2158  {ARM::ADDSri, ARM::ADDri},
2159  {ARM::ADDSrr, ARM::ADDrr},
2160  {ARM::ADDSrsi, ARM::ADDrsi},
2161  {ARM::ADDSrsr, ARM::ADDrsr},
2162 
2163  {ARM::SUBSri, ARM::SUBri},
2164  {ARM::SUBSrr, ARM::SUBrr},
2165  {ARM::SUBSrsi, ARM::SUBrsi},
2166  {ARM::SUBSrsr, ARM::SUBrsr},
2167 
2168  {ARM::RSBSri, ARM::RSBri},
2169  {ARM::RSBSrsi, ARM::RSBrsi},
2170  {ARM::RSBSrsr, ARM::RSBrsr},
2171 
2172  {ARM::tADDSi3, ARM::tADDi3},
2173  {ARM::tADDSi8, ARM::tADDi8},
2174  {ARM::tADDSrr, ARM::tADDrr},
2175  {ARM::tADCS, ARM::tADC},
2176 
2177  {ARM::tSUBSi3, ARM::tSUBi3},
2178  {ARM::tSUBSi8, ARM::tSUBi8},
2179  {ARM::tSUBSrr, ARM::tSUBrr},
2180  {ARM::tSBCS, ARM::tSBC},
2181 
2182  {ARM::t2ADDSri, ARM::t2ADDri},
2183  {ARM::t2ADDSrr, ARM::t2ADDrr},
2184  {ARM::t2ADDSrs, ARM::t2ADDrs},
2185 
2186  {ARM::t2SUBSri, ARM::t2SUBri},
2187  {ARM::t2SUBSrr, ARM::t2SUBrr},
2188  {ARM::t2SUBSrs, ARM::t2SUBrs},
2189 
2190  {ARM::t2RSBSri, ARM::t2RSBri},
2191  {ARM::t2RSBSrs, ARM::t2RSBrs},
2192 };
2193 
2194 unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
2195  for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i)
2196  if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc)
2197  return AddSubFlagsOpcodeMap[i].MachineOpc;
2198  return 0;
2199 }
2200 
2203  const DebugLoc &dl, unsigned DestReg,
2204  unsigned BaseReg, int NumBytes,
2205  ARMCC::CondCodes Pred, unsigned PredReg,
2206  const ARMBaseInstrInfo &TII,
2207  unsigned MIFlags) {
2208  if (NumBytes == 0 && DestReg != BaseReg) {
2209  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
2210  .addReg(BaseReg, RegState::Kill)
2211  .add(predOps(Pred, PredReg))
2212  .add(condCodeOp())
2213  .setMIFlags(MIFlags);
2214  return;
2215  }
2216 
2217  bool isSub = NumBytes < 0;
2218  if (isSub) NumBytes = -NumBytes;
2219 
2220  while (NumBytes) {
2221  unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
2222  unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
2223  assert(ThisVal && "Didn't extract field correctly");
2224 
2225  // We will handle these bits from offset, clear them.
2226  NumBytes &= ~ThisVal;
2227 
2228  assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
2229 
2230  // Build the new ADD / SUB.
2231  unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
2232  BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
2233  .addReg(BaseReg, RegState::Kill)
2234  .addImm(ThisVal)
2235  .add(predOps(Pred, PredReg))
2236  .add(condCodeOp())
2237  .setMIFlags(MIFlags);
2238  BaseReg = DestReg;
2239  }
2240 }
2241 
2243  MachineFunction &MF, MachineInstr *MI,
2244  unsigned NumBytes) {
2245  // This optimisation potentially adds lots of load and store
2246  // micro-operations, it's only really a great benefit to code-size.
2247  if (!MF.getFunction().optForMinSize())
2248  return false;
2249 
2250  // If only one register is pushed/popped, LLVM can use an LDR/STR
2251  // instead. We can't modify those so make sure we're dealing with an
2252  // instruction we understand.
2253  bool IsPop = isPopOpcode(MI->getOpcode());
2254  bool IsPush = isPushOpcode(MI->getOpcode());
2255  if (!IsPush && !IsPop)
2256  return false;
2257 
2258  bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
2259  MI->getOpcode() == ARM::VLDMDIA_UPD;
2260  bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
2261  MI->getOpcode() == ARM::tPOP ||
2262  MI->getOpcode() == ARM::tPOP_RET;
2263 
2264  assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
2265  MI->getOperand(1).getReg() == ARM::SP)) &&
2266  "trying to fold sp update into non-sp-updating push/pop");
2267 
2268  // The VFP push & pop act on D-registers, so we can only fold an adjustment
2269  // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
2270  // if this is violated.
2271  if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
2272  return false;
2273 
2274  // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
2275  // pred) so the list starts at 4. Thumb1 starts after the predicate.
2276  int RegListIdx = IsT1PushPop ? 2 : 4;
2277 
2278  // Calculate the space we'll need in terms of registers.
2279  unsigned RegsNeeded;
2280  const TargetRegisterClass *RegClass;
2281  if (IsVFPPushPop) {
2282  RegsNeeded = NumBytes / 8;
2283  RegClass = &ARM::DPRRegClass;
2284  } else {
2285  RegsNeeded = NumBytes / 4;
2286  RegClass = &ARM::GPRRegClass;
2287  }
2288 
2289  // We're going to have to strip all list operands off before
2290  // re-adding them since the order matters, so save the existing ones
2291  // for later.
2293 
2294  // We're also going to need the first register transferred by this
2295  // instruction, which won't necessarily be the first register in the list.
2296  unsigned FirstRegEnc = -1;
2297 
2299  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
2300  MachineOperand &MO = MI->getOperand(i);
2301  RegList.push_back(MO);
2302 
2303  if (MO.isReg() && TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
2304  FirstRegEnc = TRI->getEncodingValue(MO.getReg());
2305  }
2306 
2307  const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
2308 
2309  // Now try to find enough space in the reglist to allocate NumBytes.
2310  for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
2311  --CurRegEnc) {
2312  unsigned CurReg = RegClass->getRegister(CurRegEnc);
2313  if (!IsPop) {
2314  // Pushing any register is completely harmless, mark the register involved
2315  // as undef since we don't care about its value and must not restore it
2316  // during stack unwinding.
2317  RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
2318  false, false, true));
2319  --RegsNeeded;
2320  continue;
2321  }
2322 
2323  // However, we can only pop an extra register if it's not live. For
2324  // registers live within the function we might clobber a return value
2325  // register; the other way a register can be live here is if it's
2326  // callee-saved.
2327  if (isCalleeSavedRegister(CurReg, CSRegs) ||
2328  MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
2330  // VFP pops don't allow holes in the register list, so any skip is fatal
2331  // for our transformation. GPR pops do, so we should just keep looking.
2332  if (IsVFPPushPop)
2333  return false;
2334  else
2335  continue;
2336  }
2337 
2338  // Mark the unimportant registers as <def,dead> in the POP.
2339  RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
2340  true));
2341  --RegsNeeded;
2342  }
2343 
2344  if (RegsNeeded > 0)
2345  return false;
2346 
2347  // Finally we know we can profitably perform the optimisation so go
2348  // ahead: strip all existing registers off and add them back again
2349  // in the right order.
2350  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
2351  MI->RemoveOperand(i);
2352 
2353  // Add the complete list back in.
2354  MachineInstrBuilder MIB(MF, &*MI);
2355  for (int i = RegList.size() - 1; i >= 0; --i)
2356  MIB.add(RegList[i]);
2357 
2358  return true;
2359 }
2360 
2361 bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2362  unsigned FrameReg, int &Offset,
2363  const ARMBaseInstrInfo &TII) {
2364  unsigned Opcode = MI.getOpcode();
2365  const MCInstrDesc &Desc = MI.getDesc();
2366  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2367  bool isSub = false;
2368 
2369  // Memory operands in inline assembly always use AddrMode2.
2370  if (Opcode == ARM::INLINEASM)
2371  AddrMode = ARMII::AddrMode2;
2372 
2373  if (Opcode == ARM::ADDri) {
2374  Offset += MI.getOperand(FrameRegIdx+1).getImm();
2375  if (Offset == 0) {
2376  // Turn it into a move.
2377  MI.setDesc(TII.get(ARM::MOVr));
2378  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2379  MI.RemoveOperand(FrameRegIdx+1);
2380  Offset = 0;
2381  return true;
2382  } else if (Offset < 0) {
2383  Offset = -Offset;
2384  isSub = true;
2385  MI.setDesc(TII.get(ARM::SUBri));
2386  }
2387 
2388  // Common case: small offset, fits into instruction.
2389  if (ARM_AM::getSOImmVal(Offset) != -1) {
2390  // Replace the FrameIndex with sp / fp
2391  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2392  MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
2393  Offset = 0;
2394  return true;
2395  }
2396 
2397  // Otherwise, pull as much of the immedidate into this ADDri/SUBri
2398  // as possible.
2399  unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
2400  unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
2401 
2402  // We will handle these bits from offset, clear them.
2403  Offset &= ~ThisImmVal;
2404 
2405  // Get the properly encoded SOImmVal field.
2406  assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2407  "Bit extraction didn't work?");
2408  MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
2409  } else {
2410  unsigned ImmIdx = 0;
2411  int InstrOffs = 0;
2412  unsigned NumBits = 0;
2413  unsigned Scale = 1;
2414  switch (AddrMode) {
2415  case ARMII::AddrMode_i12:
2416  ImmIdx = FrameRegIdx + 1;
2417  InstrOffs = MI.getOperand(ImmIdx).getImm();
2418  NumBits = 12;
2419  break;
2420  case ARMII::AddrMode2:
2421  ImmIdx = FrameRegIdx+2;
2422  InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
2423  if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2424  InstrOffs *= -1;
2425  NumBits = 12;
2426  break;
2427  case ARMII::AddrMode3:
2428  ImmIdx = FrameRegIdx+2;
2429  InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
2430  if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2431  InstrOffs *= -1;
2432  NumBits = 8;
2433  break;
2434  case ARMII::AddrMode4:
2435  case ARMII::AddrMode6:
2436  // Can't fold any offset even if it's zero.
2437  return false;
2438  case ARMII::AddrMode5:
2439  ImmIdx = FrameRegIdx+1;
2440  InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2441  if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2442  InstrOffs *= -1;
2443  NumBits = 8;
2444  Scale = 4;
2445  break;
2446  case ARMII::AddrMode5FP16:
2447  ImmIdx = FrameRegIdx+1;
2448  InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2449  if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2450  InstrOffs *= -1;
2451  NumBits = 8;
2452  Scale = 2;
2453  break;
2454  default:
2455  llvm_unreachable("Unsupported addressing mode!");
2456  }
2457 
2458  Offset += InstrOffs * Scale;
2459  assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2460  if (Offset < 0) {
2461  Offset = -Offset;
2462  isSub = true;
2463  }
2464 
2465  // Attempt to fold address comp. if opcode has offset bits
2466  if (NumBits > 0) {
2467  // Common case: small offset, fits into instruction.
2468  MachineOperand &ImmOp = MI.getOperand(ImmIdx);
2469  int ImmedOffset = Offset / Scale;
2470  unsigned Mask = (1 << NumBits) - 1;
2471  if ((unsigned)Offset <= Mask * Scale) {
2472  // Replace the FrameIndex with sp
2473  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2474  // FIXME: When addrmode2 goes away, this will simplify (like the
2475  // T2 version), as the LDR.i12 versions don't need the encoding
2476  // tricks for the offset value.
2477  if (isSub) {
2478  if (AddrMode == ARMII::AddrMode_i12)
2479  ImmedOffset = -ImmedOffset;
2480  else
2481  ImmedOffset |= 1 << NumBits;
2482  }
2483  ImmOp.ChangeToImmediate(ImmedOffset);
2484  Offset = 0;
2485  return true;
2486  }
2487 
2488  // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2489  ImmedOffset = ImmedOffset & Mask;
2490  if (isSub) {
2491  if (AddrMode == ARMII::AddrMode_i12)
2492  ImmedOffset = -ImmedOffset;
2493  else
2494  ImmedOffset |= 1 << NumBits;
2495  }
2496  ImmOp.ChangeToImmediate(ImmedOffset);
2497  Offset &= ~(Mask*Scale);
2498  }
2499  }
2500 
2501  Offset = (isSub) ? -Offset : Offset;
2502  return Offset == 0;
2503 }
2504 
2505 /// analyzeCompare - For a comparison instruction, return the source registers
2506 /// in SrcReg and SrcReg2 if having two register operands, and the value it
2507 /// compares against in CmpValue. Return true if the comparison instruction
2508 /// can be analyzed.
2509 bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
2510  unsigned &SrcReg2, int &CmpMask,
2511  int &CmpValue) const {
2512  switch (MI.getOpcode()) {
2513  default: break;
2514  case ARM::CMPri:
2515  case ARM::t2CMPri:
2516  case ARM::tCMPi8:
2517  SrcReg = MI.getOperand(0).getReg();
2518  SrcReg2 = 0;
2519  CmpMask = ~0;
2520  CmpValue = MI.getOperand(1).getImm();
2521  return true;
2522  case ARM::CMPrr:
2523  case ARM::t2CMPrr:
2524  SrcReg = MI.getOperand(0).getReg();
2525  SrcReg2 = MI.getOperand(1).getReg();
2526  CmpMask = ~0;
2527  CmpValue = 0;
2528  return true;
2529  case ARM::TSTri:
2530  case ARM::t2TSTri:
2531  SrcReg = MI.getOperand(0).getReg();
2532  SrcReg2 = 0;
2533  CmpMask = MI.getOperand(1).getImm();
2534  CmpValue = 0;
2535  return true;
2536  }
2537 
2538  return false;
2539 }
2540 
2541 /// isSuitableForMask - Identify a suitable 'and' instruction that
2542 /// operates on the given source register and applies the same mask
2543 /// as a 'tst' instruction. Provide a limited look-through for copies.
2544 /// When successful, MI will hold the found instruction.
2545 static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
2546  int CmpMask, bool CommonUse) {
2547  switch (MI->getOpcode()) {
2548  case ARM::ANDri:
2549  case ARM::t2ANDri:
2550  if (CmpMask != MI->getOperand(2).getImm())
2551  return false;
2552  if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
2553  return true;
2554  break;
2555  }
2556 
2557  return false;
2558 }
2559 
2560 /// getSwappedCondition - assume the flags are set by MI(a,b), return
2561 /// the condition code if we modify the instructions such that flags are
2562 /// set by MI(b,a).
2564  switch (CC) {
2565  default: return ARMCC::AL;
2566  case ARMCC::EQ: return ARMCC::EQ;
2567  case ARMCC::NE: return ARMCC::NE;
2568  case ARMCC::HS: return ARMCC::LS;
2569  case ARMCC::LO: return ARMCC::HI;
2570  case ARMCC::HI: return ARMCC::LO;
2571  case ARMCC::LS: return ARMCC::HS;
2572  case ARMCC::GE: return ARMCC::LE;
2573  case ARMCC::LT: return ARMCC::GT;
2574  case ARMCC::GT: return ARMCC::LT;
2575  case ARMCC::LE: return ARMCC::GE;
2576  }
2577 }
2578 
2579 /// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
2580 /// the condition code if we modify the instructions such that flags are
2581 /// set by ADD(a,b,X).
2583  switch (CC) {
2584  default: return ARMCC::AL;
2585  case ARMCC::HS: return ARMCC::LO;
2586  case ARMCC::LO: return ARMCC::HS;
2587  case ARMCC::VS: return ARMCC::VS;
2588  case ARMCC::VC: return ARMCC::VC;
2589  }
2590 }
2591 
2592 /// isRedundantFlagInstr - check whether the first instruction, whose only
2593 /// purpose is to update flags, can be made redundant.
2594 /// CMPrr can be made redundant by SUBrr if the operands are the same.
2595 /// CMPri can be made redundant by SUBri if the operands are the same.
2596 /// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
2597 /// This function can be extended later on.
2598 inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
2599  unsigned SrcReg, unsigned SrcReg2,
2600  int ImmValue, const MachineInstr *OI) {
2601  if ((CmpI->getOpcode() == ARM::CMPrr ||
2602  CmpI->getOpcode() == ARM::t2CMPrr) &&
2603  (OI->getOpcode() == ARM::SUBrr ||
2604  OI->getOpcode() == ARM::t2SUBrr) &&
2605  ((OI->getOperand(1).getReg() == SrcReg &&
2606  OI->getOperand(2).getReg() == SrcReg2) ||
2607  (OI->getOperand(1).getReg() == SrcReg2 &&
2608  OI->getOperand(2).getReg() == SrcReg)))
2609  return true;
2610 
2611  if ((CmpI->getOpcode() == ARM::CMPri ||
2612  CmpI->getOpcode() == ARM::t2CMPri) &&
2613  (OI->getOpcode() == ARM::SUBri ||
2614  OI->getOpcode() == ARM::t2SUBri) &&
2615  OI->getOperand(1).getReg() == SrcReg &&
2616  OI->getOperand(2).getImm() == ImmValue)
2617  return true;
2618 
2619  if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2620  (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
2621  OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
2622  OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
2623  OI->getOperand(0).getReg() == SrcReg &&
2624  OI->getOperand(1).getReg() == SrcReg2)
2625  return true;
2626  return false;
2627 }
2628 
2629 static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
2630  switch (MI->getOpcode()) {
2631  default: return false;
2632  case ARM::tLSLri:
2633  case ARM::tLSRri:
2634  case ARM::tLSLrr:
2635  case ARM::tLSRrr:
2636  case ARM::tSUBrr:
2637  case ARM::tADDrr:
2638  case ARM::tADDi3:
2639  case ARM::tADDi8:
2640  case ARM::tSUBi3:
2641  case ARM::tSUBi8:
2642  case ARM::tMUL:
2643  IsThumb1 = true;
2645  case ARM::RSBrr:
2646  case ARM::RSBri:
2647  case ARM::RSCrr:
2648  case ARM::RSCri:
2649  case ARM::ADDrr:
2650  case ARM::ADDri:
2651  case ARM::ADCrr:
2652  case ARM::ADCri:
2653  case ARM::SUBrr:
2654  case ARM::SUBri:
2655  case ARM::SBCrr:
2656  case ARM::SBCri:
2657  case ARM::t2RSBri:
2658  case ARM::t2ADDrr:
2659  case ARM::t2ADDri:
2660  case ARM::t2ADCrr:
2661  case ARM::t2ADCri:
2662  case ARM::t2SUBrr:
2663  case ARM::t2SUBri:
2664  case ARM::t2SBCrr:
2665  case ARM::t2SBCri:
2666  case ARM::ANDrr:
2667  case ARM::ANDri:
2668  case ARM::t2ANDrr:
2669  case ARM::t2ANDri:
2670  case ARM::ORRrr:
2671  case ARM::ORRri:
2672  case ARM::t2ORRrr:
2673  case ARM::t2ORRri:
2674  case ARM::EORrr:
2675  case ARM::EORri:
2676  case ARM::t2EORrr:
2677  case ARM::t2EORri:
2678  case ARM::t2LSRri:
2679  case ARM::t2LSRrr:
2680  case ARM::t2LSLri:
2681  case ARM::t2LSLrr:
2682  return true;
2683  }
2684 }
2685 
2686 /// optimizeCompareInstr - Convert the instruction supplying the argument to the
2687 /// comparison into one that sets the zero bit in the flags register;
2688 /// Remove a redundant Compare instruction if an earlier instruction can set the
2689 /// flags in the same way as Compare.
2690 /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
2691 /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
2692 /// condition code of instructions which use the flags.
2694  MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
2695  int CmpValue, const MachineRegisterInfo *MRI) const {
2696  // Get the unique definition of SrcReg.
2697  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2698  if (!MI) return false;
2699 
2700  // Masked compares sometimes use the same register as the corresponding 'and'.
2701  if (CmpMask != ~0) {
2702  if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) {
2703  MI = nullptr;
2705  UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
2706  UI != UE; ++UI) {
2707  if (UI->getParent() != CmpInstr.getParent())
2708  continue;
2709  MachineInstr *PotentialAND = &*UI;
2710  if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
2711  isPredicated(*PotentialAND))
2712  continue;
2713  MI = PotentialAND;
2714  break;
2715  }
2716  if (!MI) return false;
2717  }
2718  }
2719 
2720  // Get ready to iterate backward from CmpInstr.
2721  MachineBasicBlock::iterator I = CmpInstr, E = MI,
2722  B = CmpInstr.getParent()->begin();
2723 
2724  // Early exit if CmpInstr is at the beginning of the BB.
2725  if (I == B) return false;
2726 
2727  // There are two possible candidates which can be changed to set CPSR:
2728  // One is MI, the other is a SUB or ADD instruction.
2729  // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
2730  // ADDr[ri](r1, r2, X).
2731  // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
2732  MachineInstr *SubAdd = nullptr;
2733  if (SrcReg2 != 0)
2734  // MI is not a candidate for CMPrr.
2735  MI = nullptr;
2736  else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
2737  // Conservatively refuse to convert an instruction which isn't in the same
2738  // BB as the comparison.
2739  // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
2740  // Thus we cannot return here.
2741  if (CmpInstr.getOpcode() == ARM::CMPri ||
2742  CmpInstr.getOpcode() == ARM::t2CMPri)
2743  MI = nullptr;
2744  else
2745  return false;
2746  }
2747 
2748  bool IsThumb1 = false;
2749  if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
2750  return false;
2751 
2752  // We also want to do this peephole for cases like this: if (a*b == 0),
2753  // and optimise away the CMP instruction from the generated code sequence:
2754  // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
2755  // resulting from the select instruction, but these MOVS instructions for
2756  // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
2757  // However, if we only have MOVS instructions in between the CMP and the
2758  // other instruction (the MULS in this example), then the CPSR is dead so we
2759  // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
2760  // reordering and then continue the analysis hoping we can eliminate the
2761  // CMP. This peephole works on the vregs, so is still in SSA form. As a
2762  // consequence, the movs won't redefine/kill the MUL operands which would
2763  // make this reordering illegal.
2764  if (MI && IsThumb1) {
2765  --I;
2766  bool CanReorder = true;
2767  const bool HasStmts = I != E;
2768  for (; I != E; --I) {
2769  if (I->getOpcode() != ARM::tMOVi8) {
2770  CanReorder = false;
2771  break;
2772  }
2773  }
2774  if (HasStmts && CanReorder) {
2775  MI = MI->removeFromParent();
2776  E = CmpInstr;
2777  CmpInstr.getParent()->insert(E, MI);
2778  }
2779  I = CmpInstr;
2780  E = MI;
2781  }
2782 
2783  // Check that CPSR isn't set between the comparison instruction and the one we
2784  // want to change. At the same time, search for SubAdd.
2786  do {
2787  const MachineInstr &Instr = *--I;
2788 
2789  // Check whether CmpInstr can be made redundant by the current instruction.
2790  if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr)) {
2791  SubAdd = &*I;
2792  break;
2793  }
2794 
2795  // Allow E (which was initially MI) to be SubAdd but do not search before E.
2796  if (I == E)
2797  break;
2798 
2799  if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
2800  Instr.readsRegister(ARM::CPSR, TRI))
2801  // This instruction modifies or uses CPSR after the one we want to
2802  // change. We can't do this transformation.
2803  return false;
2804 
2805  } while (I != B);
2806 
2807  // Return false if no candidates exist.
2808  if (!MI && !SubAdd)
2809  return false;
2810 
2811  // The single candidate is called MI.
2812  if (!MI) MI = SubAdd;
2813 
2814  // We can't use a predicated instruction - it doesn't always write the flags.
2815  if (isPredicated(*MI))
2816  return false;
2817 
2818  // Scan forward for the use of CPSR
2819  // When checking against MI: if it's a conditional code that requires
2820  // checking of the V bit or C bit, then this is not safe to do.
2821  // It is safe to remove CmpInstr if CPSR is redefined or killed.
2822  // If we are done with the basic block, we need to check whether CPSR is
2823  // live-out.
2825  OperandsToUpdate;
2826  bool isSafe = false;
2827  I = CmpInstr;
2828  E = CmpInstr.getParent()->end();
2829  while (!isSafe && ++I != E) {
2830  const MachineInstr &Instr = *I;
2831  for (unsigned IO = 0, EO = Instr.getNumOperands();
2832  !isSafe && IO != EO; ++IO) {
2833  const MachineOperand &MO = Instr.getOperand(IO);
2834  if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
2835  isSafe = true;
2836  break;
2837  }
2838  if (!MO.isReg() || MO.getReg() != ARM::CPSR)
2839  continue;
2840  if (MO.isDef()) {
2841  isSafe = true;
2842  break;
2843  }
2844  // Condition code is after the operand before CPSR except for VSELs.
2845  ARMCC::CondCodes CC;
2846  bool IsInstrVSel = true;
2847  switch (Instr.getOpcode()) {
2848  default:
2849  IsInstrVSel = false;
2850  CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
2851  break;
2852  case ARM::VSELEQD:
2853  case ARM::VSELEQS:
2854  CC = ARMCC::EQ;
2855  break;
2856  case ARM::VSELGTD:
2857  case ARM::VSELGTS:
2858  CC = ARMCC::GT;
2859  break;
2860  case ARM::VSELGED:
2861  case ARM::VSELGES:
2862  CC = ARMCC::GE;
2863  break;
2864  case ARM::VSELVSS:
2865  case ARM::VSELVSD:
2866  CC = ARMCC::VS;
2867  break;
2868  }
2869 
2870  if (SubAdd) {
2871  // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
2872  // on CMP needs to be updated to be based on SUB.
2873  // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
2874  // needs to be modified.
2875  // Push the condition code operands to OperandsToUpdate.
2876  // If it is safe to remove CmpInstr, the condition code of these
2877  // operands will be modified.
2878  unsigned Opc = SubAdd->getOpcode();
2879  bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
2880  Opc == ARM::SUBri || Opc == ARM::t2SUBri;
2881  if (!IsSub || (SrcReg2 != 0 && SubAdd->getOperand(1).getReg() == SrcReg2 &&
2882  SubAdd->getOperand(2).getReg() == SrcReg)) {
2883  // VSel doesn't support condition code update.
2884  if (IsInstrVSel)
2885  return false;
2886  // Ensure we can swap the condition.
2887  ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC));
2888  if (NewCC == ARMCC::AL)
2889  return false;
2890  OperandsToUpdate.push_back(
2891  std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
2892  }
2893  } else {
2894  // No SubAdd, so this is x = <op> y, z; cmp x, 0.
2895  switch (CC) {
2896  case ARMCC::EQ: // Z
2897  case ARMCC::NE: // Z
2898  case ARMCC::MI: // N
2899  case ARMCC::PL: // N
2900  case ARMCC::AL: // none
2901  // CPSR can be used multiple times, we should continue.
2902  break;
2903  case ARMCC::HS: // C
2904  case ARMCC::LO: // C
2905  case ARMCC::VS: // V
2906  case ARMCC::VC: // V
2907  case ARMCC::HI: // C Z
2908  case ARMCC::LS: // C Z
2909  case ARMCC::GE: // N V
2910  case ARMCC::LT: // N V
2911  case ARMCC::GT: // Z N V
2912  case ARMCC::LE: // Z N V
2913  // The instruction uses the V bit or C bit which is not safe.
2914  return false;
2915  }
2916  }
2917  }
2918  }
2919 
2920  // If CPSR is not killed nor re-defined, we should check whether it is
2921  // live-out. If it is live-out, do not optimize.
2922  if (!isSafe) {
2923  MachineBasicBlock *MBB = CmpInstr.getParent();
2925  SE = MBB->succ_end(); SI != SE; ++SI)
2926  if ((*SI)->isLiveIn(ARM::CPSR))
2927  return false;
2928  }
2929 
2930  // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
2931  // set CPSR so this is represented as an explicit output)
2932  if (!IsThumb1) {
2933  MI->getOperand(5).setReg(ARM::CPSR);
2934  MI->getOperand(5).setIsDef(true);
2935  }
2936  assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
2937  CmpInstr.eraseFromParent();
2938 
2939  // Modify the condition code of operands in OperandsToUpdate.
2940  // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
2941  // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
2942  for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
2943  OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
2944 
2945  return true;
2946 }
2947 
2949  // Do not sink MI if it might be used to optimize a redundant compare.
2950  // We heuristically only look at the instruction immediately following MI to
2951  // avoid potentially searching the entire basic block.
2952  if (isPredicated(MI))
2953  return true;
2955  ++Next;
2956  unsigned SrcReg, SrcReg2;
2957  int CmpMask, CmpValue;
2958  if (Next != MI.getParent()->end() &&
2959  analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
2960  isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI))
2961  return false;
2962  return true;
2963 }
2964 
2966  unsigned Reg,
2967  MachineRegisterInfo *MRI) const {
2968  // Fold large immediates into add, sub, or, xor.
2969  unsigned DefOpc = DefMI.getOpcode();
2970  if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
2971  return false;
2972  if (!DefMI.getOperand(1).isImm())
2973  // Could be t2MOVi32imm @xx
2974  return false;
2975 
2976  if (!MRI->hasOneNonDBGUse(Reg))
2977  return false;
2978 
2979  const MCInstrDesc &DefMCID = DefMI.getDesc();
2980  if (DefMCID.hasOptionalDef()) {
2981  unsigned NumOps = DefMCID.getNumOperands();
2982  const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
2983  if (MO.getReg() == ARM::CPSR && !MO.isDead())
2984  // If DefMI defines CPSR and it is not dead, it's obviously not safe
2985  // to delete DefMI.
2986  return false;
2987  }
2988 
2989  const MCInstrDesc &UseMCID = UseMI.getDesc();
2990  if (UseMCID.hasOptionalDef()) {
2991  unsigned NumOps = UseMCID.getNumOperands();
2992  if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
2993  // If the instruction sets the flag, do not attempt this optimization
2994  // since it may change the semantics of the code.
2995  return false;
2996  }
2997 
2998  unsigned UseOpc = UseMI.getOpcode();
2999  unsigned NewUseOpc = 0;
3000  uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();
3001  uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
3002  bool Commute = false;
3003  switch (UseOpc) {
3004  default: return false;
3005  case ARM::SUBrr:
3006  case ARM::ADDrr:
3007  case ARM::ORRrr:
3008  case ARM::EORrr:
3009  case ARM::t2SUBrr:
3010  case ARM::t2ADDrr:
3011  case ARM::t2ORRrr:
3012  case ARM::t2EORrr: {
3013  Commute = UseMI.getOperand(2).getReg() != Reg;
3014  switch (UseOpc) {
3015  default: break;
3016  case ARM::ADDrr:
3017  case ARM::SUBrr:
3018  if (UseOpc == ARM::SUBrr && Commute)
3019  return false;
3020 
3021  // ADD/SUB are special because they're essentially the same operation, so
3022  // we can handle a larger range of immediates.
3023  if (ARM_AM::isSOImmTwoPartVal(ImmVal))
3024  NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
3025  else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
3026  ImmVal = -ImmVal;
3027  NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
3028  } else
3029  return false;
3030  SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3031  SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3032  break;
3033  case ARM::ORRrr:
3034  case ARM::EORrr:
3035  if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
3036  return false;
3037  SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3038  SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3039  switch (UseOpc) {
3040  default: break;
3041  case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
3042  case ARM::EORrr: NewUseOpc = ARM::EORri; break;
3043  }
3044  break;
3045  case ARM::t2ADDrr:
3046  case ARM::t2SUBrr:
3047  if (UseOpc == ARM::t2SUBrr && Commute)
3048  return false;
3049 
3050  // ADD/SUB are special because they're essentially the same operation, so
3051  // we can handle a larger range of immediates.
3052  if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3053  NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2ADDri : ARM::t2SUBri;
3054  else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
3055  ImmVal = -ImmVal;
3056  NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2SUBri : ARM::t2ADDri;
3057  } else
3058  return false;
3059  SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3060  SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3061  break;
3062  case ARM::t2ORRrr:
3063  case ARM::t2EORrr:
3064  if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3065  return false;
3066  SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3067  SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3068  switch (UseOpc) {
3069  default: break;
3070  case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
3071  case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
3072  }
3073  break;
3074  }
3075  }
3076  }
3077 
3078  unsigned OpIdx = Commute ? 2 : 1;
3079  unsigned Reg1 = UseMI.getOperand(OpIdx).getReg();
3080  bool isKill = UseMI.getOperand(OpIdx).isKill();
3081  unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
3082  BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
3083  NewReg)
3084  .addReg(Reg1, getKillRegState(isKill))
3085  .addImm(SOImmValV1)
3086  .add(predOps(ARMCC::AL))
3087  .add(condCodeOp());
3088  UseMI.setDesc(get(NewUseOpc));
3089  UseMI.getOperand(1).setReg(NewReg);
3090  UseMI.getOperand(1).setIsKill();
3091  UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
3092  DefMI.eraseFromParent();
3093  return true;
3094 }
3095 
3096 static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
3097  const MachineInstr &MI) {
3098  switch (MI.getOpcode()) {
3099  default: {
3100  const MCInstrDesc &Desc = MI.getDesc();
3101  int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
3102  assert(UOps >= 0 && "bad # UOps");
3103  return UOps;
3104  }
3105 
3106  case ARM::LDRrs:
3107  case ARM::LDRBrs:
3108  case ARM::STRrs:
3109  case ARM::STRBrs: {
3110  unsigned ShOpVal = MI.getOperand(3).getImm();
3111  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3112  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3113  if (!isSub &&
3114  (ShImm == 0 ||
3115  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3116  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3117  return 1;
3118  return 2;
3119  }
3120 
3121  case ARM::LDRH:
3122  case ARM::STRH: {
3123  if (!MI.getOperand(2).getReg())
3124  return 1;
3125 
3126  unsigned ShOpVal = MI.getOperand(3).getImm();
3127  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3128  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3129  if (!isSub &&
3130  (ShImm == 0 ||
3131  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3132  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3133  return 1;
3134  return 2;
3135  }
3136 
3137  case ARM::LDRSB:
3138  case ARM::LDRSH:
3139  return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;
3140 
3141  case ARM::LDRSB_POST:
3142  case ARM::LDRSH_POST: {
3143  unsigned Rt = MI.getOperand(0).getReg();
3144  unsigned Rm = MI.getOperand(3).getReg();
3145  return (Rt == Rm) ? 4 : 3;
3146  }
3147 
3148  case ARM::LDR_PRE_REG:
3149  case ARM::LDRB_PRE_REG: {
3150  unsigned Rt = MI.getOperand(0).getReg();
3151  unsigned Rm = MI.getOperand(3).getReg();
3152  if (Rt == Rm)
3153  return 3;
3154  unsigned ShOpVal = MI.getOperand(4).getImm();
3155  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3156  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3157  if (!isSub &&
3158  (ShImm == 0 ||
3159  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3160  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3161  return 2;
3162  return 3;
3163  }
3164 
3165  case ARM::STR_PRE_REG:
3166  case ARM::STRB_PRE_REG: {
3167  unsigned ShOpVal = MI.getOperand(4).getImm();
3168  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3169  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3170  if (!isSub &&
3171  (ShImm == 0 ||
3172  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3173  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3174  return 2;
3175  return 3;
3176  }
3177 
3178  case ARM::LDRH_PRE:
3179  case ARM::STRH_PRE: {
3180  unsigned Rt = MI.getOperand(0).getReg();
3181  unsigned Rm = MI.getOperand(3).getReg();
3182  if (!Rm)
3183  return 2;
3184  if (Rt == Rm)
3185  return 3;
3186  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;
3187  }
3188 
3189  case ARM::LDR_POST_REG:
3190  case ARM::LDRB_POST_REG:
3191  case ARM::LDRH_POST: {
3192  unsigned Rt = MI.getOperand(0).getReg();
3193  unsigned Rm = MI.getOperand(3).getReg();
3194  return (Rt == Rm) ? 3 : 2;
3195  }
3196 
3197  case ARM::LDR_PRE_IMM:
3198  case ARM::LDRB_PRE_IMM:
3199  case ARM::LDR_POST_IMM:
3200  case ARM::LDRB_POST_IMM:
3201  case ARM::STRB_POST_IMM:
3202  case ARM::STRB_POST_REG:
3203  case ARM::STRB_PRE_IMM:
3204  case ARM::STRH_POST:
3205  case ARM::STR_POST_IMM:
3206  case ARM::STR_POST_REG:
3207  case ARM::STR_PRE_IMM:
3208  return 2;
3209 
3210  case ARM::LDRSB_PRE:
3211  case ARM::LDRSH_PRE: {
3212  unsigned Rm = MI.getOperand(3).getReg();
3213  if (Rm == 0)
3214  return 3;
3215  unsigned Rt = MI.getOperand(0).getReg();
3216  if (Rt == Rm)
3217  return 4;
3218  unsigned ShOpVal = MI.getOperand(4).getImm();
3219  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3220  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3221  if (!isSub &&
3222  (ShImm == 0 ||
3223  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3224  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3225  return 3;
3226  return 4;
3227  }
3228 
3229  case ARM::LDRD: {
3230  unsigned Rt = MI.getOperand(0).getReg();
3231  unsigned Rn = MI.getOperand(2).getReg();
3232  unsigned Rm = MI.getOperand(3).getReg();
3233  if (Rm)
3234  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3235  : 3;
3236  return (Rt == Rn) ? 3 : 2;
3237  }
3238 
3239  case ARM::STRD: {
3240  unsigned Rm = MI.getOperand(3).getReg();
3241  if (Rm)
3242  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3243  : 3;
3244  return 2;
3245  }
3246 
3247  case ARM::LDRD_POST:
3248  case ARM::t2LDRD_POST:
3249  return 3;
3250 
3251  case ARM::STRD_POST:
3252  case ARM::t2STRD_POST:
3253  return 4;
3254 
3255  case ARM::LDRD_PRE: {
3256  unsigned Rt = MI.getOperand(0).getReg();
3257  unsigned Rn = MI.getOperand(3).getReg();
3258  unsigned Rm = MI.getOperand(4).getReg();
3259  if (Rm)
3260  return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3261  : 4;
3262  return (Rt == Rn) ? 4 : 3;
3263  }
3264 
3265  case ARM::t2LDRD_PRE: {
3266  unsigned Rt = MI.getOperand(0).getReg();
3267  unsigned Rn = MI.getOperand(3).getReg();
3268  return (Rt == Rn) ? 4 : 3;
3269  }
3270 
3271  case ARM::STRD_PRE: {
3272  unsigned Rm = MI.getOperand(4).getReg();
3273  if (Rm)
3274  return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3275  : 4;
3276  return 3;
3277  }
3278 
3279  case ARM::t2STRD_PRE:
3280  return 3;
3281 
3282  case ARM::t2LDR_POST:
3283  case ARM::t2LDRB_POST:
3284  case ARM::t2LDRB_PRE:
3285  case ARM::t2LDRSBi12:
3286  case ARM::t2LDRSBi8:
3287  case ARM::t2LDRSBpci:
3288  case ARM::t2LDRSBs:
3289  case ARM::t2LDRH_POST:
3290  case ARM::t2LDRH_PRE:
3291  case ARM::t2LDRSBT:
3292  case ARM::t2LDRSB_POST:
3293  case ARM::t2LDRSB_PRE:
3294  case ARM::t2LDRSH_POST:
3295  case ARM::t2LDRSH_PRE:
3296  case ARM::t2LDRSHi12:
3297  case ARM::t2LDRSHi8:
3298  case ARM::t2LDRSHpci:
3299  case ARM::t2LDRSHs:
3300  return 2;
3301 
3302  case ARM::t2LDRDi8: {
3303  unsigned Rt = MI.getOperand(0).getReg();
3304  unsigned Rn = MI.getOperand(2).getReg();
3305  return (Rt == Rn) ? 3 : 2;
3306  }
3307 
3308  case ARM::t2STRB_POST:
3309  case ARM::t2STRB_PRE:
3310  case ARM::t2STRBs:
3311  case ARM::t2STRDi8:
3312  case ARM::t2STRH_POST:
3313  case ARM::t2STRH_PRE:
3314  case ARM::t2STRHs:
3315  case ARM::t2STR_POST:
3316  case ARM::t2STR_PRE:
3317  case ARM::t2STRs:
3318  return 2;
3319  }
3320 }
3321 
3322 // Return the number of 32-bit words loaded by LDM or stored by STM. If this
3323 // can't be easily determined return 0 (missing MachineMemOperand).
3324 //
3325 // FIXME: The current MachineInstr design does not support relying on machine
3326 // mem operands to determine the width of a memory access. Instead, we expect
3327 // the target to provide this information based on the instruction opcode and
3328 // operands. However, using MachineMemOperand is the best solution now for
3329 // two reasons:
3330 //
3331 // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
3332 // operands. This is much more dangerous than using the MachineMemOperand
3333 // sizes because CodeGen passes can insert/remove optional machine operands. In
3334 // fact, it's totally incorrect for preRA passes and appears to be wrong for
3335 // postRA passes as well.
3336 //
3337 // 2) getNumLDMAddresses is only used by the scheduling machine model and any
3338 // machine model that calls this should handle the unknown (zero size) case.
3339 //
3340 // Long term, we should require a target hook that verifies MachineMemOperand
3341 // sizes during MC lowering. That target hook should be local to MC lowering
3342 // because we can't ensure that it is aware of other MI forms. Doing this will
3343 // ensure that MachineMemOperands are correctly propagated through all passes.
3345  unsigned Size = 0;
3347  E = MI.memoperands_end();
3348  I != E; ++I) {
3349  Size += (*I)->getSize();
3350  }
3351  return Size / 4;
3352 }
3353 
3354 static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,
3355  unsigned NumRegs) {
3356  unsigned UOps = 1 + NumRegs; // 1 for address computation.
3357  switch (Opc) {
3358  default:
3359  break;
3360  case ARM::VLDMDIA_UPD:
3361  case ARM::VLDMDDB_UPD:
3362  case ARM::VLDMSIA_UPD:
3363  case ARM::VLDMSDB_UPD:
3364  case ARM::VSTMDIA_UPD:
3365  case ARM::VSTMDDB_UPD:
3366  case ARM::VSTMSIA_UPD:
3367  case ARM::VSTMSDB_UPD:
3368  case ARM::LDMIA_UPD:
3369  case ARM::LDMDA_UPD:
3370  case ARM::LDMDB_UPD:
3371  case ARM::LDMIB_UPD:
3372  case ARM::STMIA_UPD:
3373  case ARM::STMDA_UPD:
3374  case ARM::STMDB_UPD:
3375  case ARM::STMIB_UPD:
3376  case ARM::tLDMIA_UPD:
3377  case ARM::tSTMIA_UPD:
3378  case ARM::t2LDMIA_UPD:
3379  case ARM::t2LDMDB_UPD:
3380  case ARM::t2STMIA_UPD:
3381  case ARM::t2STMDB_UPD:
3382  ++UOps; // One for base register writeback.
3383  break;
3384  case ARM::LDMIA_RET:
3385  case ARM::tPOP_RET:
3386  case ARM::t2LDMIA_RET:
3387  UOps += 2; // One for base reg wb, one for write to pc.
3388  break;
3389  }
3390  return UOps;
3391 }
3392 
3394  const MachineInstr &MI) const {
3395  if (!ItinData || ItinData->isEmpty())
3396  return 1;
3397 
3398  const MCInstrDesc &Desc = MI.getDesc();
3399  unsigned Class = Desc.getSchedClass();
3400  int ItinUOps = ItinData->getNumMicroOps(Class);
3401  if (ItinUOps >= 0) {
3402  if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
3403  return getNumMicroOpsSwiftLdSt(ItinData, MI);
3404 
3405  return ItinUOps;
3406  }
3407 
3408  unsigned Opc = MI.getOpcode();
3409  switch (Opc) {
3410  default:
3411  llvm_unreachable("Unexpected multi-uops instruction!");
3412  case ARM::VLDMQIA:
3413  case ARM::VSTMQIA:
3414  return 2;
3415 
3416  // The number of uOps for load / store multiple are determined by the number
3417  // registers.
3418  //
3419  // On Cortex-A8, each pair of register loads / stores can be scheduled on the
3420  // same cycle. The scheduling for the first load / store must be done
3421  // separately by assuming the address is not 64-bit aligned.
3422  //
3423  // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
3424  // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
3425  // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
3426  case ARM::VLDMDIA:
3427  case ARM::VLDMDIA_UPD:
3428  case ARM::VLDMDDB_UPD:
3429  case ARM::VLDMSIA:
3430  case ARM::VLDMSIA_UPD:
3431  case ARM::VLDMSDB_UPD:
3432  case ARM::VSTMDIA:
3433  case ARM::VSTMDIA_UPD:
3434  case ARM::VSTMDDB_UPD:
3435  case ARM::VSTMSIA:
3436  case ARM::VSTMSIA_UPD:
3437  case ARM::VSTMSDB_UPD: {
3438  unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
3439  return (NumRegs / 2) + (NumRegs % 2) + 1;
3440  }
3441 
3442  case ARM::LDMIA_RET:
3443  case ARM::LDMIA:
3444  case ARM::LDMDA:
3445  case ARM::LDMDB:
3446  case ARM::LDMIB:
3447  case ARM::LDMIA_UPD:
3448  case ARM::LDMDA_UPD:
3449  case ARM::LDMDB_UPD:
3450  case ARM::LDMIB_UPD:
3451  case ARM::STMIA:
3452  case ARM::STMDA:
3453  case ARM::STMDB:
3454  case ARM::STMIB:
3455  case ARM::STMIA_UPD:
3456  case ARM::STMDA_UPD:
3457  case ARM::STMDB_UPD:
3458  case ARM::STMIB_UPD:
3459  case ARM::tLDMIA:
3460  case ARM::tLDMIA_UPD:
3461  case ARM::tSTMIA_UPD:
3462  case ARM::tPOP_RET:
3463  case ARM::tPOP:
3464  case ARM::tPUSH:
3465  case ARM::t2LDMIA_RET:
3466  case ARM::t2LDMIA:
3467  case ARM::t2LDMDB:
3468  case ARM::t2LDMIA_UPD:
3469  case ARM::t2LDMDB_UPD:
3470  case ARM::t2STMIA:
3471  case ARM::t2STMDB:
3472  case ARM::t2STMIA_UPD:
3473  case ARM::t2STMDB_UPD: {
3474  unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
3475  switch (Subtarget.getLdStMultipleTiming()) {
3477  return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs);
3479  // Assume the worst.
3480  return NumRegs;
3482  if (NumRegs < 4)
3483  return 2;
3484  // 4 registers would be issued: 2, 2.
3485  // 5 registers would be issued: 2, 2, 1.
3486  unsigned UOps = (NumRegs / 2);
3487  if (NumRegs % 2)
3488  ++UOps;
3489  return UOps;
3490  }
3492  unsigned UOps = (NumRegs / 2);
3493  // If there are odd number of registers or if it's not 64-bit aligned,
3494  // then it takes an extra AGU (Address Generation Unit) cycle.
3495  if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
3496  (*MI.memoperands_begin())->getAlignment() < 8)
3497  ++UOps;
3498  return UOps;
3499  }
3500  }
3501  }
3502  }
3503  llvm_unreachable("Didn't find the number of microops");
3504 }
3505 
3506 int
3507 ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
3508  const MCInstrDesc &DefMCID,
3509  unsigned DefClass,
3510  unsigned DefIdx, unsigned DefAlign) const {
3511  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3512  if (RegNo <= 0)
3513  // Def is the address writeback.
3514  return ItinData->getOperandCycle(DefClass, DefIdx);
3515 
3516  int DefCycle;
3517  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3518  // (regno / 2) + (regno % 2) + 1
3519  DefCycle = RegNo / 2 + 1;
3520  if (RegNo % 2)
3521  ++DefCycle;
3522  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3523  DefCycle = RegNo;
3524  bool isSLoad = false;
3525 
3526  switch (DefMCID.getOpcode()) {
3527  default: break;
3528  case ARM::VLDMSIA:
3529  case ARM::VLDMSIA_UPD:
3530  case ARM::VLDMSDB_UPD:
3531  isSLoad = true;
3532  break;
3533  }
3534 
3535  // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3536  // then it takes an extra cycle.
3537  if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
3538  ++DefCycle;
3539  } else {
3540  // Assume the worst.
3541  DefCycle = RegNo + 2;
3542  }
3543 
3544  return DefCycle;
3545 }
3546 
3548  unsigned BaseReg = MI.getOperand(0).getReg();
3549  for (unsigned i = 1, sz = MI.getNumOperands(); i < sz; ++i) {
3550  const auto &Op = MI.getOperand(i);
3551  if (Op.isReg() && Op.getReg() == BaseReg)
3552  return true;
3553  }
3554  return false;
3555 }
3556 unsigned
3558  // ins GPR:$Rn, $p (2xOp), reglist:$regs, variable_ops
3559  // (outs GPR:$wb), (ins GPR:$Rn, $p (2xOp), reglist:$regs, variable_ops)
3560  return MI.getNumOperands() + 1 - MI.getDesc().getNumOperands();
3561 }
3562 
3563 int
3564 ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
3565  const MCInstrDesc &DefMCID,
3566  unsigned DefClass,
3567  unsigned DefIdx, unsigned DefAlign) const {
3568  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3569  if (RegNo <= 0)
3570  // Def is the address writeback.
3571  return ItinData->getOperandCycle(DefClass, DefIdx);
3572 
3573  int DefCycle;
3574  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3575  // 4 registers would be issued: 1, 2, 1.
3576  // 5 registers would be issued: 1, 2, 2.
3577  DefCycle = RegNo / 2;
3578  if (DefCycle < 1)
3579  DefCycle = 1;
3580  // Result latency is issue cycle + 2: E2.
3581  DefCycle += 2;
3582  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3583  DefCycle = (RegNo / 2);
3584  // If there are odd number of registers or if it's not 64-bit aligned,
3585  // then it takes an extra AGU (Address Generation Unit) cycle.
3586  if ((RegNo % 2) || DefAlign < 8)
3587  ++DefCycle;
3588  // Result latency is AGU cycles + 2.
3589  DefCycle += 2;
3590  } else {
3591  // Assume the worst.
3592  DefCycle = RegNo + 2;
3593  }
3594 
3595  return DefCycle;
3596 }
3597 
3598 int
3599 ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
3600  const MCInstrDesc &UseMCID,
3601  unsigned UseClass,
3602  unsigned UseIdx, unsigned UseAlign) const {
3603  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3604  if (RegNo <= 0)
3605  return ItinData->getOperandCycle(UseClass, UseIdx);
3606 
3607  int UseCycle;
3608  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3609  // (regno / 2) + (regno % 2) + 1
3610  UseCycle = RegNo / 2 + 1;
3611  if (RegNo % 2)
3612  ++UseCycle;
3613  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3614  UseCycle = RegNo;
3615  bool isSStore = false;
3616 
3617  switch (UseMCID.getOpcode()) {
3618  default: break;
3619  case ARM::VSTMSIA:
3620  case ARM::VSTMSIA_UPD:
3621  case ARM::VSTMSDB_UPD:
3622  isSStore = true;
3623  break;
3624  }
3625 
3626  // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3627  // then it takes an extra cycle.
3628  if ((isSStore && (RegNo % 2)) || UseAlign < 8)
3629  ++UseCycle;
3630  } else {
3631  // Assume the worst.
3632  UseCycle = RegNo + 2;
3633  }
3634 
3635  return UseCycle;
3636 }
3637 
3638 int
3639 ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
3640  const MCInstrDesc &UseMCID,
3641  unsigned UseClass,
3642  unsigned UseIdx, unsigned UseAlign) const {
3643  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3644  if (RegNo <= 0)
3645  return ItinData->getOperandCycle(UseClass, UseIdx);
3646 
3647  int UseCycle;
3648  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3649  UseCycle = RegNo / 2;
3650  if (UseCycle < 2)
3651  UseCycle = 2;
3652  // Read in E3.
3653  UseCycle += 2;
3654  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3655  UseCycle = (RegNo / 2);
3656  // If there are odd number of registers or if it's not 64-bit aligned,
3657  // then it takes an extra AGU (Address Generation Unit) cycle.
3658  if ((RegNo % 2) || UseAlign < 8)
3659  ++UseCycle;
3660  } else {
3661  // Assume the worst.
3662  UseCycle = 1;
3663  }
3664  return UseCycle;
3665 }
3666 
3667 int
3669  const MCInstrDesc &DefMCID,
3670  unsigned DefIdx, unsigned DefAlign,
3671  const MCInstrDesc &UseMCID,
3672  unsigned UseIdx, unsigned UseAlign) const {
3673  unsigned DefClass = DefMCID.getSchedClass();
3674  unsigned UseClass = UseMCID.getSchedClass();
3675 
3676  if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
3677  return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
3678 
3679  // This may be a def / use of a variable_ops instruction, the operand
3680  // latency might be determinable dynamically. Let the target try to
3681  // figure it out.
3682  int DefCycle = -1;
3683  bool LdmBypass = false;
3684  switch (DefMCID.getOpcode()) {
3685  default:
3686  DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
3687  break;
3688 
3689  case ARM::VLDMDIA:
3690  case ARM::VLDMDIA_UPD:
3691  case ARM::VLDMDDB_UPD:
3692  case ARM::VLDMSIA:
3693  case ARM::VLDMSIA_UPD:
3694  case ARM::VLDMSDB_UPD:
3695  DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3696  break;
3697 
3698  case ARM::LDMIA_RET:
3699  case ARM::LDMIA:
3700  case ARM::LDMDA:
3701  case ARM::LDMDB:
3702  case ARM::LDMIB:
3703  case ARM::LDMIA_UPD:
3704  case ARM::LDMDA_UPD:
3705  case ARM::LDMDB_UPD:
3706  case ARM::LDMIB_UPD:
3707  case ARM::tLDMIA:
3708  case ARM::tLDMIA_UPD:
3709  case ARM::tPUSH:
3710  case ARM::t2LDMIA_RET:
3711  case ARM::t2LDMIA:
3712  case ARM::t2LDMDB:
3713  case ARM::t2LDMIA_UPD:
3714  case ARM::t2LDMDB_UPD:
3715  LdmBypass = true;
3716  DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3717  break;
3718  }
3719 
3720  if (DefCycle == -1)
3721  // We can't seem to determine the result latency of the def, assume it's 2.
3722  DefCycle = 2;
3723 
3724  int UseCycle = -1;
3725  switch (UseMCID.getOpcode()) {
3726  default:
3727  UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
3728  break;
3729 
3730  case ARM::VSTMDIA:
3731  case ARM::VSTMDIA_UPD:
3732  case ARM::VSTMDDB_UPD:
3733  case ARM::VSTMSIA:
3734  case ARM::VSTMSIA_UPD:
3735  case ARM::VSTMSDB_UPD:
3736  UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3737  break;
3738 
3739  case ARM::STMIA:
3740  case ARM::STMDA:
3741  case ARM::STMDB:
3742  case ARM::STMIB:
3743  case ARM::STMIA_UPD:
3744  case ARM::STMDA_UPD:
3745  case ARM::STMDB_UPD:
3746  case ARM::STMIB_UPD:
3747  case ARM::tSTMIA_UPD:
3748  case ARM::tPOP_RET:
3749  case ARM::tPOP:
3750  case ARM::t2STMIA:
3751  case ARM::t2STMDB:
3752  case ARM::t2STMIA_UPD:
3753  case ARM::t2STMDB_UPD:
3754  UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3755  break;
3756  }
3757 
3758  if (UseCycle == -1)
3759  // Assume it's read in the first stage.
3760  UseCycle = 1;
3761 
3762  UseCycle = DefCycle - UseCycle + 1;
3763  if (UseCycle > 0) {
3764  if (LdmBypass) {
3765  // It's a variable_ops instruction so we can't use DefIdx here. Just use
3766  // first def operand.
3767  if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
3768  UseClass, UseIdx))
3769  --UseCycle;
3770  } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
3771  UseClass, UseIdx)) {
3772  --UseCycle;
3773  }
3774  }
3775 
3776  return UseCycle;
3777 }
3778 
3780  const MachineInstr *MI, unsigned Reg,
3781  unsigned &DefIdx, unsigned &Dist) {
3782  Dist = 0;
3783 
3786  assert(II->isInsideBundle() && "Empty bundle?");
3787 
3788  int Idx = -1;
3789  while (II->isInsideBundle()) {
3790  Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
3791  if (Idx != -1)
3792  break;
3793  --II;
3794  ++Dist;
3795  }
3796 
3797  assert(Idx != -1 && "Cannot find bundled definition!");
3798  DefIdx = Idx;
3799  return &*II;
3800 }
3801 
3803  const MachineInstr &MI, unsigned Reg,
3804  unsigned &UseIdx, unsigned &Dist) {
3805  Dist = 0;
3806 
3808  assert(II->isInsideBundle() && "Empty bundle?");
3810 
3811  // FIXME: This doesn't properly handle multiple uses.
3812  int Idx = -1;
3813  while (II != E && II->isInsideBundle()) {
3814  Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
3815  if (Idx != -1)
3816  break;
3817  if (II->getOpcode() != ARM::t2IT)
3818  ++Dist;
3819  ++II;
3820  }
3821 
3822  if (Idx == -1) {
3823  Dist = 0;
3824  return nullptr;
3825  }
3826 
3827  UseIdx = Idx;
3828  return &*II;
3829 }
3830 
3831 /// Return the number of cycles to add to (or subtract from) the static
3832 /// itinerary based on the def opcode and alignment. The caller will ensure that
3833 /// adjusted latency is at least one cycle.
3834 static int adjustDefLatency(const ARMSubtarget &Subtarget,
3835  const MachineInstr &DefMI,
3836  const MCInstrDesc &DefMCID, unsigned DefAlign) {
3837  int Adjust = 0;
3838  if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
3839  // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
3840  // variants are one cycle cheaper.
3841  switch (DefMCID.getOpcode()) {
3842  default: break;
3843  case ARM::LDRrs:
3844  case ARM::LDRBrs: {
3845  unsigned ShOpVal = DefMI.getOperand(3).getImm();
3846  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3847  if (ShImm == 0 ||
3848  (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
3849  --Adjust;
3850  break;
3851  }
3852  case ARM::t2LDRs:
3853  case ARM::t2LDRBs:
3854  case ARM::t2LDRHs:
3855  case ARM::t2LDRSHs: {
3856  // Thumb2 mode: lsl only.
3857  unsigned ShAmt = DefMI.getOperand(3).getImm();
3858  if (ShAmt == 0 || ShAmt == 2)
3859  --Adjust;
3860  break;
3861  }
3862  }
3863  } else if (Subtarget.isSwift()) {
3864  // FIXME: Properly handle all of the latency adjustments for address
3865  // writeback.
3866  switch (DefMCID.getOpcode()) {
3867  default: break;
3868  case ARM::LDRrs:
3869  case ARM::LDRBrs: {
3870  unsigned ShOpVal = DefMI.getOperand(3).getImm();
3871  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3872  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3873  if (!isSub &&
3874  (ShImm == 0 ||
3875  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3876  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3877  Adjust -= 2;
3878  else if (!isSub &&
3879  ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
3880  --Adjust;
3881  break;
3882  }
3883  case ARM::t2LDRs:
3884  case ARM::t2LDRBs:
3885  case ARM::t2LDRHs:
3886  case ARM::t2LDRSHs: {
3887  // Thumb2 mode: lsl only.
3888  unsigned ShAmt = DefMI.getOperand(3).getImm();
3889  if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
3890  Adjust -= 2;
3891  break;
3892  }
3893  }
3894  }
3895 
3896  if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
3897  switch (DefMCID.getOpcode()) {
3898  default: break;
3899  case ARM::VLD1q8:
3900  case ARM::VLD1q16:
3901  case ARM::VLD1q32:
3902  case ARM::VLD1q64:
3903  case ARM::VLD1q8wb_fixed:
3904  case ARM::VLD1q16wb_fixed:
3905  case ARM::VLD1q32wb_fixed:
3906  case ARM::VLD1q64wb_fixed:
3907  case ARM::VLD1q8wb_register:
3908  case ARM::VLD1q16wb_register:
3909  case ARM::VLD1q32wb_register:
3910  case ARM::VLD1q64wb_register:
3911  case ARM::VLD2d8:
3912  case ARM::VLD2d16:
3913  case ARM::VLD2d32:
3914  case ARM::VLD2q8:
3915  case ARM::VLD2q16:
3916  case ARM::VLD2q32:
3917  case ARM::VLD2d8wb_fixed:
3918  case ARM::VLD2d16wb_fixed:
3919  case ARM::VLD2d32wb_fixed:
3920  case ARM::VLD2q8wb_fixed:
3921  case ARM::VLD2q16wb_fixed:
3922  case ARM::VLD2q32wb_fixed:
3923  case ARM::VLD2d8wb_register:
3924  case ARM::VLD2d16wb_register:
3925  case ARM::VLD2d32wb_register:
3926  case ARM::VLD2q8wb_register:
3927  case ARM::VLD2q16wb_register:
3928  case ARM::VLD2q32wb_register:
3929  case ARM::VLD3d8:
3930  case ARM::VLD3d16:
3931  case ARM::VLD3d32:
3932  case ARM::VLD1d64T:
3933  case ARM::VLD3d8_UPD:
3934  case ARM::VLD3d16_UPD:
3935  case ARM::VLD3d32_UPD:
3936  case ARM::VLD1d64Twb_fixed:
3937  case ARM::VLD1d64Twb_register:
3938  case ARM::VLD3q8_UPD:
3939  case ARM::VLD3q16_UPD:
3940  case ARM::VLD3q32_UPD:
3941  case ARM::VLD4d8:
3942  case ARM::VLD4d16:
3943  case ARM::VLD4d32:
3944  case ARM::VLD1d64Q:
3945  case ARM::VLD4d8_UPD:
3946  case ARM::VLD4d16_UPD:
3947  case ARM::VLD4d32_UPD:
3948  case ARM::VLD1d64Qwb_fixed:
3949  case ARM::VLD1d64Qwb_register:
3950  case ARM::VLD4q8_UPD:
3951  case ARM::VLD4q16_UPD:
3952  case ARM::VLD4q32_UPD:
3953  case ARM::VLD1DUPq8:
3954  case ARM::VLD1DUPq16:
3955  case ARM::VLD1DUPq32:
3956  case ARM::VLD1DUPq8wb_fixed:
3957  case ARM::VLD1DUPq16wb_fixed:
3958  case ARM::VLD1DUPq32wb_fixed:
3959  case ARM::VLD1DUPq8wb_register:
3960  case ARM::VLD1DUPq16wb_register:
3961  case ARM::VLD1DUPq32wb_register:
3962  case ARM::VLD2DUPd8:
3963  case ARM::VLD2DUPd16:
3964  case ARM::VLD2DUPd32:
3965  case ARM::VLD2DUPd8wb_fixed:
3966  case ARM::VLD2DUPd16wb_fixed:
3967  case ARM::VLD2DUPd32wb_fixed:
3968  case ARM::VLD2DUPd8wb_register:
3969  case ARM::VLD2DUPd16wb_register:
3970  case ARM::VLD2DUPd32wb_register:
3971  case ARM::VLD4DUPd8:
3972  case ARM::VLD4DUPd16:
3973  case ARM::VLD4DUPd32:
3974  case ARM::VLD4DUPd8_UPD:
3975  case ARM::VLD4DUPd16_UPD:
3976  case ARM::VLD4DUPd32_UPD:
3977  case ARM::VLD1LNd8:
3978  case ARM::VLD1LNd16:
3979  case ARM::VLD1LNd32:
3980  case ARM::VLD1LNd8_UPD:
3981  case ARM::VLD1LNd16_UPD:
3982  case ARM::VLD1LNd32_UPD:
3983  case ARM::VLD2LNd8:
3984  case ARM::VLD2LNd16:
3985  case ARM::VLD2LNd32:
3986  case ARM::VLD2LNq16:
3987  case ARM::VLD2LNq32:
3988  case ARM::VLD2LNd8_UPD:
3989  case ARM::VLD2LNd16_UPD:
3990  case ARM::VLD2LNd32_UPD:
3991  case ARM::VLD2LNq16_UPD:
3992  case ARM::VLD2LNq32_UPD:
3993  case ARM::VLD4LNd8:
3994  case ARM::VLD4LNd16:
3995  case ARM::VLD4LNd32:
3996  case ARM::VLD4LNq16:
3997  case ARM::VLD4LNq32:
3998  case ARM::VLD4LNd8_UPD:
3999  case ARM::VLD4LNd16_UPD:
4000  case ARM::VLD4LNd32_UPD:
4001  case ARM::VLD4LNq16_UPD:
4002  case ARM::VLD4LNq32_UPD:
4003  // If the address is not 64-bit aligned, the latencies of these
4004  // instructions increases by one.
4005  ++Adjust;
4006  break;
4007  }
4008  }
4009  return Adjust;
4010 }
4011 
4013  const MachineInstr &DefMI,
4014  unsigned DefIdx,
4015  const MachineInstr &UseMI,
4016  unsigned UseIdx) const {
4017  // No operand latency. The caller may fall back to getInstrLatency.
4018  if (!ItinData || ItinData->isEmpty())
4019  return -1;
4020 
4021  const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
4022  unsigned Reg = DefMO.getReg();
4023 
4024  const MachineInstr *ResolvedDefMI = &DefMI;
4025  unsigned DefAdj = 0;
4026  if (DefMI.isBundle())
4027  ResolvedDefMI =
4028  getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);
4029  if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
4030  ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
4031  return 1;
4032  }
4033 
4034  const MachineInstr *ResolvedUseMI = &UseMI;
4035  unsigned UseAdj = 0;
4036  if (UseMI.isBundle()) {
4037  ResolvedUseMI =
4038  getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
4039  if (!ResolvedUseMI)
4040  return -1;
4041  }
4042 
4043  return getOperandLatencyImpl(
4044  ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
4045  Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
4046 }
4047 
4048 int ARMBaseInstrInfo::getOperandLatencyImpl(
4049  const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4050  unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
4051  const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
4052  unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
4053  if (Reg == ARM::CPSR) {
4054  if (DefMI.getOpcode() == ARM::FMSTAT) {
4055  // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
4056  return Subtarget.isLikeA9() ? 1 : 20;
4057  }
4058 
4059  // CPSR set and branch can be paired in the same cycle.
4060  if (UseMI.isBranch())
4061  return 0;
4062 
4063  // Otherwise it takes the instruction latency (generally one).
4064  unsigned Latency = getInstrLatency(ItinData, DefMI);
4065 
4066  // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
4067  // its uses. Instructions which are otherwise scheduled between them may
4068  // incur a code size penalty (not able to use the CPSR setting 16-bit
4069  // instructions).
4070  if (Latency > 0 && Subtarget.isThumb2()) {
4071  const MachineFunction *MF = DefMI.getParent()->getParent();
4072  // FIXME: Use Function::optForSize().
4073  if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize))
4074  --Latency;
4075  }
4076  return Latency;
4077  }
4078 
4079  if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
4080  return -1;
4081 
4082  unsigned DefAlign = DefMI.hasOneMemOperand()
4083  ? (*DefMI.memoperands_begin())->getAlignment()
4084  : 0;
4085  unsigned UseAlign = UseMI.hasOneMemOperand()
4086  ? (*UseMI.memoperands_begin())->getAlignment()
4087  : 0;
4088 
4089  // Get the itinerary's latency if possible, and handle variable_ops.
4090  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, UseMCID,
4091  UseIdx, UseAlign);
4092  // Unable to find operand latency. The caller may resort to getInstrLatency.
4093  if (Latency < 0)
4094  return Latency;
4095 
4096  // Adjust for IT block position.
4097  int Adj = DefAdj + UseAdj;
4098 
4099  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4100  Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
4101  if (Adj >= 0 || (int)Latency > -Adj) {
4102  return Latency + Adj;
4103  }
4104  // Return the itinerary latency, which may be zero but not less than zero.
4105  return Latency;
4106 }
4107 
4108 int
4110  SDNode *DefNode, unsigned DefIdx,
4111  SDNode *UseNode, unsigned UseIdx) const {
4112  if (!DefNode->isMachineOpcode())
4113  return 1;
4114 
4115  const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
4116 
4117  if (isZeroCost(DefMCID.Opcode))
4118  return 0;
4119 
4120  if (!ItinData || ItinData->isEmpty())
4121  return DefMCID.mayLoad() ? 3 : 1;
4122 
4123  if (!UseNode->isMachineOpcode()) {
4124  int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
4125  int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
4126  int Threshold = 1 + Adj;
4127  return Latency <= Threshold ? 1 : Latency - Adj;
4128  }
4129 
4130  const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
4131  const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
4132  unsigned DefAlign = !DefMN->memoperands_empty()
4133  ? (*DefMN->memoperands_begin())->getAlignment() : 0;
4134  const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
4135  unsigned UseAlign = !UseMN->memoperands_empty()
4136  ? (*UseMN->memoperands_begin())->getAlignment() : 0;
4137  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
4138  UseMCID, UseIdx, UseAlign);
4139 
4140  if (Latency > 1 &&
4141  (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
4142  Subtarget.isCortexA7())) {
4143  // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4144  // variants are one cycle cheaper.
4145  switch (DefMCID.getOpcode()) {
4146  default: break;
4147  case ARM::LDRrs:
4148  case ARM::LDRBrs: {
4149  unsigned ShOpVal =
4150  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4151  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4152  if (ShImm == 0 ||
4153  (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4154  --Latency;
4155  break;
4156  }
4157  case ARM::t2LDRs:
4158  case ARM::t2LDRBs:
4159  case ARM::t2LDRHs:
4160  case ARM::t2LDRSHs: {
4161  // Thumb2 mode: lsl only.
4162  unsigned ShAmt =
4163  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4164  if (ShAmt == 0 || ShAmt == 2)
4165  --Latency;
4166  break;
4167  }
4168  }
4169  } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) {
4170  // FIXME: Properly handle all of the latency adjustments for address
4171  // writeback.
4172  switch (DefMCID.getOpcode()) {
4173  default: break;
4174  case ARM::LDRrs:
4175  case ARM::LDRBrs: {
4176  unsigned ShOpVal =
4177  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4178  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4179  if (ShImm == 0 ||
4180  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4181  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4182  Latency -= 2;
4183  else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4184  --Latency;
4185  break;
4186  }
4187  case ARM::t2LDRs:
4188  case ARM::t2LDRBs:
4189  case ARM::t2LDRHs:
4190  case ARM::t2LDRSHs:
4191  // Thumb2 mode: lsl 0-3 only.
4192  Latency -= 2;
4193  break;
4194  }
4195  }
4196 
4197  if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
4198  switch (DefMCID.getOpcode()) {
4199  default: break;
4200  case ARM::VLD1q8:
4201  case ARM::VLD1q16:
4202  case ARM::VLD1q32:
4203  case ARM::VLD1q64:
4204  case ARM::VLD1q8wb_register:
4205  case ARM::VLD1q16wb_register:
4206  case ARM::VLD1q32wb_register:
4207  case ARM::VLD1q64wb_register:
4208  case ARM::VLD1q8wb_fixed:
4209  case ARM::VLD1q16wb_fixed:
4210  case ARM::VLD1q32wb_fixed:
4211  case ARM::VLD1q64wb_fixed:
4212  case ARM::VLD2d8:
4213  case ARM::VLD2d16:
4214  case ARM::VLD2d32:
4215  case ARM::VLD2q8Pseudo:
4216  case ARM::VLD2q16Pseudo:
4217  case ARM::VLD2q32Pseudo:
4218  case ARM::VLD2d8wb_fixed:
4219  case ARM::VLD2d16wb_fixed:
4220  case ARM::VLD2d32wb_fixed:
4221  case ARM::VLD2q8PseudoWB_fixed:
4222  case ARM::VLD2q16PseudoWB_fixed:
4223  case ARM::VLD2q32PseudoWB_fixed:
4224  case ARM::VLD2d8wb_register:
4225  case ARM::VLD2d16wb_register:
4226  case ARM::VLD2d32wb_register:
4227  case ARM::VLD2q8PseudoWB_register:
4228  case ARM::VLD2q16PseudoWB_register:
4229  case ARM::VLD2q32PseudoWB_register:
4230  case ARM::VLD3d8Pseudo:
4231  case ARM::VLD3d16Pseudo:
4232  case ARM::VLD3d32Pseudo:
4233  case ARM::VLD1d64TPseudo:
4234  case ARM::VLD1d64TPseudoWB_fixed:
4235  case ARM::VLD1d64TPseudoWB_register:
4236  case ARM::VLD3d8Pseudo_UPD:
4237  case ARM::VLD3d16Pseudo_UPD:
4238  case ARM::VLD3d32Pseudo_UPD:
4239  case ARM::VLD3q8Pseudo_UPD:
4240  case ARM::VLD3q16Pseudo_UPD:
4241  case ARM::VLD3q32Pseudo_UPD:
4242  case ARM::VLD3q8oddPseudo:
4243  case ARM::VLD3q16oddPseudo:
4244  case ARM::VLD3q32oddPseudo:
4245  case ARM::VLD3q8oddPseudo_UPD:
4246  case ARM::VLD3q16oddPseudo_UPD:
4247  case ARM::VLD3q32oddPseudo_UPD:
4248  case ARM::VLD4d8Pseudo:
4249  case ARM::VLD4d16Pseudo:
4250  case ARM::VLD4d32Pseudo:
4251  case ARM::VLD1d64QPseudo:
4252  case ARM::VLD1d64QPseudoWB_fixed:
4253  case ARM::VLD1d64QPseudoWB_register:
4254  case ARM::VLD4d8Pseudo_UPD:
4255  case ARM::VLD4d16Pseudo_UPD:
4256  case ARM::VLD4d32Pseudo_UPD:
4257  case ARM::VLD4q8Pseudo_UPD:
4258  case ARM::VLD4q16Pseudo_UPD:
4259  case ARM::VLD4q32Pseudo_UPD:
4260  case ARM::VLD4q8oddPseudo:
4261  case ARM::VLD4q16oddPseudo:
4262  case ARM::VLD4q32oddPseudo:
4263  case ARM::VLD4q8oddPseudo_UPD:
4264  case ARM::VLD4q16oddPseudo_UPD:
4265  case ARM::VLD4q32oddPseudo_UPD:
4266  case ARM::VLD1DUPq8:
4267  case ARM::VLD1DUPq16:
4268  case ARM::VLD1DUPq32:
4269  case ARM::VLD1DUPq8wb_fixed:
4270  case ARM::VLD1DUPq16wb_fixed:
4271  case ARM::VLD1DUPq32wb_fixed:
4272  case ARM::VLD1DUPq8wb_register:
4273  case ARM::VLD1DUPq16wb_register:
4274  case ARM::VLD1DUPq32wb_register:
4275  case ARM::VLD2DUPd8:
4276  case ARM::VLD2DUPd16:
4277  case ARM::VLD2DUPd32:
4278  case ARM::VLD2DUPd8wb_fixed:
4279  case ARM::VLD2DUPd16wb_fixed:
4280  case ARM::VLD2DUPd32wb_fixed:
4281  case ARM::VLD2DUPd8wb_register:
4282  case ARM::VLD2DUPd16wb_register:
4283  case ARM::VLD2DUPd32wb_register:
4284  case ARM::VLD4DUPd8Pseudo:
4285  case ARM::VLD4DUPd16Pseudo:
4286  case ARM::VLD4DUPd32Pseudo:
4287  case ARM::VLD4DUPd8Pseudo_UPD:
4288  case ARM::VLD4DUPd16Pseudo_UPD:
4289  case ARM::VLD4DUPd32Pseudo_UPD:
4290  case ARM::VLD1LNq8Pseudo:
4291  case ARM::VLD1LNq16Pseudo:
4292  case ARM::VLD1LNq32Pseudo:
4293  case ARM::VLD1LNq8Pseudo_UPD:
4294  case ARM::VLD1LNq16Pseudo_UPD:
4295  case ARM::VLD1LNq32Pseudo_UPD:
4296  case ARM::VLD2LNd8Pseudo:
4297  case ARM::VLD2LNd16Pseudo:
4298  case ARM::VLD2LNd32Pseudo:
4299  case ARM::VLD2LNq16Pseudo:
4300  case ARM::VLD2LNq32Pseudo:
4301  case ARM::VLD2LNd8Pseudo_UPD:
4302  case ARM::VLD2LNd16Pseudo_UPD:
4303  case ARM::VLD2LNd32Pseudo_UPD:
4304  case ARM::VLD2LNq16Pseudo_UPD:
4305  case ARM::VLD2LNq32Pseudo_UPD:
4306  case ARM::VLD4LNd8Pseudo:
4307  case ARM::VLD4LNd16Pseudo:
4308  case ARM::VLD4LNd32Pseudo:
4309  case ARM::VLD4LNq16Pseudo:
4310  case ARM::VLD4LNq32Pseudo:
4311  case ARM::VLD4LNd8Pseudo_UPD:
4312  case ARM::VLD4LNd16Pseudo_UPD:
4313  case ARM::VLD4LNd32Pseudo_UPD:
4314  case ARM::VLD4LNq16Pseudo_UPD:
4315  case ARM::VLD4LNq32Pseudo_UPD:
4316  // If the address is not 64-bit aligned, the latencies of these
4317  // instructions increases by one.
4318  ++Latency;
4319  break;
4320  }
4321 
4322  return Latency;
4323 }
4324 
4325 unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
4326  if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4327  MI.isImplicitDef())
4328  return 0;
4329 
4330  if (MI.isBundle())
4331  return 0;
4332 
4333  const MCInstrDesc &MCID = MI.getDesc();
4334 
4335  if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4336  !Subtarget.cheapPredicableCPSRDef())) {
4337  // When predicated, CPSR is an additional source operand for CPSR updating
4338  // instructions, this apparently increases their latencies.
4339  return 1;
4340  }
4341  return 0;
4342 }
4343 
4344 unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4345  const MachineInstr &MI,
4346  unsigned *PredCost) const {
4347  if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4348  MI.isImplicitDef())
4349  return 1;
4350 
4351  // An instruction scheduler typically runs on unbundled instructions, however
4352  // other passes may query the latency of a bundled instruction.
4353  if (MI.isBundle()) {
4354  unsigned Latency = 0;
4357  while (++I != E && I->isInsideBundle()) {
4358  if (I->getOpcode() != ARM::t2IT)
4359  Latency += getInstrLatency(ItinData, *I, PredCost);
4360  }
4361  return Latency;
4362  }
4363 
4364  const MCInstrDesc &MCID = MI.getDesc();
4365  if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4366  !Subtarget.cheapPredicableCPSRDef()))) {
4367  // When predicated, CPSR is an additional source operand for CPSR updating
4368  // instructions, this apparently increases their latencies.
4369  *PredCost = 1;
4370  }
4371  // Be sure to call getStageLatency for an empty itinerary in case it has a
4372  // valid MinLatency property.
4373  if (!ItinData)
4374  return MI.mayLoad() ? 3 : 1;
4375 
4376  unsigned Class = MCID.getSchedClass();
4377 
4378  // For instructions with variable uops, use uops as latency.
4379  if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
4380  return getNumMicroOps(ItinData, MI);
4381 
4382  // For the common case, fall back on the itinerary's latency.
4383  unsigned Latency = ItinData->getStageLatency(Class);
4384 
4385  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4386  unsigned DefAlign =
4387  MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlignment() : 0;
4388  int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
4389  if (Adj >= 0 || (int)Latency > -Adj) {
4390  return Latency + Adj;
4391  }
4392  return Latency;
4393 }
4394 
4395 int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4396  SDNode *Node) const {
4397  if (!Node->isMachineOpcode())
4398  return 1;
4399 
4400  if (!ItinData || ItinData->isEmpty())
4401  return 1;
4402 
4403  unsigned Opcode = Node->getMachineOpcode();
4404  switch (Opcode) {
4405  default:
4406  return ItinData->getStageLatency(get(Opcode).getSchedClass());
4407  case ARM::VLDMQIA:
4408  case ARM::VSTMQIA:
4409  return 2;
4410  }
4411 }
4412 
4413 bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
4414  const MachineRegisterInfo *MRI,
4415  const MachineInstr &DefMI,
4416  unsigned DefIdx,
4417  const MachineInstr &UseMI,
4418  unsigned UseIdx) const {
4419  unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4420  unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
4421  if (Subtarget.nonpipelinedVFP() &&
4422  (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
4423  return true;
4424 
4425  // Hoist VFP / NEON instructions with 4 or higher latency.
4426  unsigned Latency =
4427  SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);
4428  if (Latency <= 3)
4429  return false;
4430  return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
4431  UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
4432 }
4433 
4434 bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
4435  const MachineInstr &DefMI,
4436  unsigned DefIdx) const {
4437  const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
4438  if (!ItinData || ItinData->isEmpty())
4439  return false;
4440 
4441  unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4442  if (DDomain == ARMII::DomainGeneral) {
4443  unsigned DefClass = DefMI.getDesc().getSchedClass();
4444  int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
4445  return (DefCycle != -1 && DefCycle <= 2);
4446  }
4447  return false;
4448 }
4449 
4450 bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
4451  StringRef &ErrInfo) const {
4452  if (convertAddSubFlagsOpcode(MI.getOpcode())) {
4453  ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
4454  return false;
4455  }
4456  return true;
4457 }
4458 
4459 // LoadStackGuard has so far only been implemented for MachO. Different code
4460 // sequence is needed for other targets.
4462  unsigned LoadImmOpc,
4463  unsigned LoadOpc) const {
4464  assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
4465  "ROPI/RWPI not currently supported with stack guard");
4466 
4467  MachineBasicBlock &MBB = *MI->getParent();
4468  DebugLoc DL = MI->getDebugLoc();
4469  unsigned Reg = MI->getOperand(0).getReg();
4470  const GlobalValue *GV =
4471  cast<GlobalValue>((*MI->memoperands_begin())->getValue());
4472  MachineInstrBuilder MIB;
4473 
4474  BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4476 
4477  if (Subtarget.isGVIndirectSymbol(GV)) {
4478  MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4479  MIB.addReg(Reg, RegState::Kill).addImm(0);
4480  auto Flags = MachineMemOperand::MOLoad |
4483  MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
4484  MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4);
4485  MIB.addMemOperand(MMO).add(predOps(ARMCC::AL));
4486  }
4487 
4488  MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4489  MIB.addReg(Reg, RegState::Kill)
4490  .addImm(0)
4491  .setMemRefs(MI->memoperands_begin(), MI->memoperands_end())
4492  .add(predOps(ARMCC::AL));
4493 }
4494 
4495 bool
4496 ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
4497  unsigned &AddSubOpc,
4498  bool &NegAcc, bool &HasLane) const {
4499  DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
4500  if (I == MLxEntryMap.end())
4501  return false;
4502 
4503  const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
4504  MulOpc = Entry.MulOpc;
4505  AddSubOpc = Entry.AddSubOpc;
4506  NegAcc = Entry.NegAcc;
4507  HasLane = Entry.HasLane;
4508  return true;
4509 }
4510 
4511 //===----------------------------------------------------------------------===//
4512 // Execution domains.
4513 //===----------------------------------------------------------------------===//
4514 //
4515 // Some instructions go down the NEON pipeline, some go down the VFP pipeline,
4516 // and some can go down both. The vmov instructions go down the VFP pipeline,
4517 // but they can be changed to vorr equivalents that are executed by the NEON
4518 // pipeline.
4519 //
4520 // We use the following execution domain numbering:
4521 //
4524  ExeVFP = 1,
4526 };
4527 
4528 //
4529 // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
4530 //
4531 std::pair<uint16_t, uint16_t>
4533  // If we don't have access to NEON instructions then we won't be able
4534  // to swizzle anything to the NEON domain. Check to make sure.
4535  if (Subtarget.hasNEON()) {
4536  // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
4537  // if they are not predicated.
4538  if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI))
4539  return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4540 
4541  // CortexA9 is particularly picky about mixing the two and wants these
4542  // converted.
4543  if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) &&
4544  (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR ||
4545  MI.getOpcode() == ARM::VMOVS))
4546  return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4547  }
4548  // No other instructions can be swizzled, so just determine their domain.
4549  unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;
4550 
4551  if (Domain & ARMII::DomainNEON)
4552  return std::make_pair(ExeNEON, 0);
4553 
4554  // Certain instructions can go either way on Cortex-A8.
4555  // Treat them as NEON instructions.
4556  if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
4557  return std::make_pair(ExeNEON, 0);
4558 
4559  if (Domain & ARMII::DomainVFP)
4560  return std::make_pair(ExeVFP, 0);
4561 
4562  return std::make_pair(ExeGeneric, 0);
4563 }
4564 
4566  unsigned SReg, unsigned &Lane) {
4567  unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
4568  Lane = 0;
4569 
4570  if (DReg != ARM::NoRegister)
4571  return DReg;
4572 
4573  Lane = 1;
4574  DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
4575 
4576  assert(DReg && "S-register with no D super-register?");
4577  return DReg;
4578 }
4579 
4580 /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
4581 /// set ImplicitSReg to a register number that must be marked as implicit-use or
4582 /// zero if no register needs to be defined as implicit-use.
4583 ///
4584 /// If the function cannot determine if an SPR should be marked implicit use or
4585 /// not, it returns false.
4586 ///
4587 /// This function handles cases where an instruction is being modified from taking
4588 /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
4589 /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
4590 /// lane of the DPR).
4591 ///
4592 /// If the other SPR is defined, an implicit-use of it should be added. Else,
4593 /// (including the case where the DPR itself is defined), it should not.
4594 ///
4596  MachineInstr &MI, unsigned DReg,
4597  unsigned Lane, unsigned &ImplicitSReg) {
4598  // If the DPR is defined or used already, the other SPR lane will be chained
4599  // correctly, so there is nothing to be done.
4600  if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) {
4601  ImplicitSReg = 0;
4602  return true;
4603  }
4604 
4605  // Otherwise we need to go searching to see if the SPR is set explicitly.
4606  ImplicitSReg = TRI->getSubReg(DReg,
4607  (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
4609  MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
4610 
4611  if (LQR == MachineBasicBlock::LQR_Live)
4612  return true;
4613  else if (LQR == MachineBasicBlock::LQR_Unknown)
4614  return false;
4615 
4616  // If the register is known not to be live, there is no need to add an
4617  // implicit-use.
4618  ImplicitSReg = 0;
4619  return true;
4620 }
4621 
4623  unsigned Domain) const {
4624  unsigned DstReg, SrcReg, DReg;
4625  unsigned Lane;
4626  MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
4628  switch (MI.getOpcode()) {
4629  default:
4630  llvm_unreachable("cannot handle opcode!");
4631  break;
4632  case ARM::VMOVD:
4633  if (Domain != ExeNEON)
4634  break;
4635 
4636  // Zap the predicate operands.
4637  assert(!isPredicated(MI) && "Cannot predicate a VORRd");
4638 
4639  // Make sure we've got NEON instructions.
4640  assert(Subtarget.hasNEON() && "VORRd requires NEON");
4641 
4642  // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
4643  DstReg = MI.getOperand(0).getReg();
4644  SrcReg = MI.getOperand(1).getReg();
4645 
4646  for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4647  MI.RemoveOperand(i - 1);
4648 
4649  // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
4650  MI.setDesc(get(ARM::VORRd));
4651  MIB.addReg(DstReg, RegState::Define)
4652  .addReg(SrcReg)
4653  .addReg(SrcReg)
4654  .add(predOps(ARMCC::AL));
4655  break;
4656  case ARM::VMOVRS:
4657  if (Domain != ExeNEON)
4658  break;
4659  assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
4660 
4661  // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
4662  DstReg = MI.getOperand(0).getReg();
4663  SrcReg = MI.getOperand(1).getReg();
4664 
4665  for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4666  MI.RemoveOperand(i - 1);
4667 
4668  DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
4669 
4670  // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
4671  // Note that DSrc has been widened and the other lane may be undef, which
4672  // contaminates the entire register.
4673  MI.setDesc(get(ARM::VGETLNi32));
4674  MIB.addReg(DstReg, RegState::Define)
4675  .addReg(DReg, RegState::Undef)
4676  .addImm(Lane)
4677  .add(predOps(ARMCC::AL));
4678 
4679  // The old source should be an implicit use, otherwise we might think it
4680  // was dead before here.
4681  MIB.addReg(SrcReg, RegState::Implicit);
4682  break;
4683  case ARM::VMOVSR: {
4684  if (Domain != ExeNEON)
4685  break;
4686  assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
4687 
4688  // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
4689  DstReg = MI.getOperand(0).getReg();
4690  SrcReg = MI.getOperand(1).getReg();
4691 
4692  DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
4693 
4694  unsigned ImplicitSReg;
4695  if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
4696  break;
4697 
4698  for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4699  MI.RemoveOperand(i - 1);
4700 
4701  // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
4702  // Again DDst may be undefined at the beginning of this instruction.
4703  MI.setDesc(get(ARM::VSETLNi32));
4704  MIB.addReg(DReg, RegState::Define)
4705  .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI)))
4706  .addReg(SrcReg)
4707  .addImm(Lane)
4708  .add(predOps(ARMCC::AL));
4709 
4710  // The narrower destination must be marked as set to keep previous chains
4711  // in place.
4712  MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
4713  if (ImplicitSReg != 0)
4714  MIB.addReg(ImplicitSReg, RegState::Implicit);
4715  break;
4716  }
4717  case ARM::VMOVS: {
4718  if (Domain != ExeNEON)
4719  break;
4720 
4721  // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
4722  DstReg = MI.getOperand(0).getReg();
4723  SrcReg = MI.getOperand(1).getReg();
4724 
4725  unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
4726  DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
4727  DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
4728 
4729  unsigned ImplicitSReg;
4730  if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
4731  break;
4732 
4733  for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4734  MI.RemoveOperand(i - 1);
4735 
4736  if (DSrc == DDst) {
4737  // Destination can be:
4738  // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
4739  MI.setDesc(get(ARM::VDUPLN32d));
4740  MIB.addReg(DDst, RegState::Define)
4741  .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI)))
4742  .addImm(SrcLane)
4743  .add(predOps(ARMCC::AL));
4744 
4745  // Neither the source or the destination are naturally represented any
4746  // more, so add them in manually.
4747  MIB.addReg(DstReg, RegState::Implicit | RegState::Define);
4748  MIB.addReg(SrcReg, RegState::Implicit);
4749  if (ImplicitSReg != 0)
4750  MIB.addReg(ImplicitSReg, RegState::Implicit);
4751  break;
4752  }
4753 
4754  // In general there's no single instruction that can perform an S <-> S
4755  // move in NEON space, but a pair of VEXT instructions *can* do the
4756  // job. It turns out that the VEXTs needed will only use DSrc once, with
4757  // the position based purely on the combination of lane-0 and lane-1
4758  // involved. For example
4759  // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
4760  // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
4761  // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
4762  // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
4763  //
4764  // Pattern of the MachineInstrs is:
4765  // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
4766  MachineInstrBuilder NewMIB;
4767  NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32),
4768  DDst);
4769 
4770  // On the first instruction, both DSrc and DDst may be undef if present.
4771  // Specifically when the original instruction didn't have them as an
4772  // <imp-use>.
4773  unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
4774  bool CurUndef = !MI.readsRegister(CurReg, TRI);
4775  NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
4776 
4777  CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
4778  CurUndef = !MI.readsRegister(CurReg, TRI);
4779  NewMIB.addReg(CurReg, getUndefRegState(CurUndef))
4780  .addImm(1)
4781  .add(predOps(ARMCC::AL));
4782 
4783  if (SrcLane == DstLane)
4784  NewMIB.addReg(SrcReg, RegState::Implicit);
4785 
4786  MI.setDesc(get(ARM::VEXTd32));
4787  MIB.addReg(DDst, RegState::Define);
4788 
4789  // On the second instruction, DDst has definitely been defined above, so
4790  // it is not undef. DSrc, if present, can be undef as above.
4791  CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
4792  CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
4793  MIB.addReg(CurReg, getUndefRegState(CurUndef));
4794 
4795  CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
4796  CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
4797  MIB.addReg(CurReg, getUndefRegState(CurUndef))
4798  .addImm(1)
4799  .add(predOps(ARMCC::AL));
4800 
4801  if (SrcLane != DstLane)
4802  MIB.addReg(SrcReg, RegState::Implicit);
4803 
4804  // As before, the original destination is no longer represented, add it
4805  // implicitly.
4806  MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
4807  if (ImplicitSReg != 0)
4808  MIB.addReg(ImplicitSReg, RegState::Implicit);
4809  break;
4810  }
4811  }
4812 }
4813 
4814 //===----------------------------------------------------------------------===//
4815 // Partial register updates
4816 //===----------------------------------------------------------------------===//
4817 //
4818 // Swift renames NEON registers with 64-bit granularity. That means any
4819 // instruction writing an S-reg implicitly reads the containing D-reg. The
4820 // problem is mostly avoided by translating f32 operations to v2f32 operations
4821 // on D-registers, but f32 loads are still a problem.
4822 //
4823 // These instructions can load an f32 into a NEON register:
4824 //
4825 // VLDRS - Only writes S, partial D update.
4826 // VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
4827 // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
4828 //
4829 // FCONSTD can be used as a dependency-breaking instruction.
4831  const MachineInstr &MI, unsigned OpNum,
4832  const TargetRegisterInfo *TRI) const {
4833  auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance();
4834  if (!PartialUpdateClearance)
4835  return 0;
4836 
4837  assert(TRI && "Need TRI instance");
4838 
4839  const MachineOperand &MO = MI.getOperand(OpNum);
4840  if (MO.readsReg())
4841  return 0;
4842  unsigned Reg = MO.getReg();
4843  int UseOp = -1;
4844 
4845  switch (MI.getOpcode()) {
4846  // Normal instructions writing only an S-register.
4847  case ARM::VLDRS:
4848  case ARM::FCONSTS:
4849  case ARM::VMOVSR:
4850  case ARM::VMOVv8i8:
4851  case ARM::VMOVv4i16:
4852  case ARM::VMOVv2i32:
4853  case ARM::VMOVv2f32:
4854  case ARM::VMOVv1i64:
4855  UseOp = MI.findRegisterUseOperandIdx(Reg, false, TRI);
4856  break;
4857 
4858  // Explicitly reads the dependency.
4859  case ARM::VLD1LNd32:
4860  UseOp = 3;
4861  break;
4862  default:
4863  return 0;
4864  }
4865 
4866  // If this instruction actually reads a value from Reg, there is no unwanted
4867  // dependency.
4868  if (UseOp != -1 && MI.getOperand(UseOp).readsReg())
4869  return 0;
4870 
4871  // We must be able to clobber the whole D-reg.
4873  // Virtual register must be a def undef foo:ssub_0 operand.
4874  if (!MO.getSubReg() || MI.readsVirtualRegister(Reg))
4875  return 0;
4876  } else if (ARM::SPRRegClass.contains(Reg)) {
4877  // Physical register: MI must define the full D-reg.
4878  unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
4879  &ARM::DPRRegClass);
4880  if (!DReg || !MI.definesRegister(DReg, TRI))
4881  return 0;
4882  }
4883 
4884  // MI has an unwanted D-register dependency.
4885  // Avoid defs in the previous N instructrions.
4886  return PartialUpdateClearance;
4887 }
4888 
4889 // Break a partial register dependency after getPartialRegUpdateClearance
4890 // returned non-zero.
4892  MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
4893  assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def");
4894  assert(TRI && "Need TRI instance");
4895 
4896  const MachineOperand &MO = MI.getOperand(OpNum);
4897  unsigned Reg = MO.getReg();
4899  "Can't break virtual register dependencies.");
4900  unsigned DReg = Reg;
4901 
4902  // If MI defines an S-reg, find the corresponding D super-register.
4903  if (ARM::SPRRegClass.contains(Reg)) {
4904  DReg = ARM::D0 + (Reg - ARM::S0) / 2;
4905  assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
4906  }
4907 
4908  assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
4909  assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
4910 
4911  // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
4912  // the full D-register by loading the same value to both lanes. The
4913  // instruction is micro-coded with 2 uops, so don't do this until we can
4914  // properly schedule micro-coded instructions. The dispatcher stalls cause
4915  // too big regressions.
4916 
4917  // Insert the dependency-breaking FCONSTD before MI.
4918  // 96 is the encoding of 0.5, but the actual value doesn't matter here.
4919  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
4920  .addImm(96)
4921  .add(predOps(ARMCC::AL));
4922  MI.addRegisterKilled(DReg, TRI, true);
4923 }
4924 
4926  return Subtarget.getFeatureBits()[ARM::HasV6KOps];
4927 }
4928 
4930  if (MI->getNumOperands() < 4)
4931  return true;
4932  unsigned ShOpVal = MI->getOperand(3).getImm();
4933  unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
4934  // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
4935  if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
4936  ((ShImm == 1 || ShImm == 2) &&
4937  ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
4938  return true;
4939 
4940  return false;
4941 }
4942 
4944  const MachineInstr &MI, unsigned DefIdx,
4945  SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
4946  assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
4947  assert(MI.isRegSequenceLike() && "Invalid kind of instruction");
4948 
4949  switch (MI.getOpcode()) {
4950  case ARM::VMOVDRR:
4951  // dX = VMOVDRR rY, rZ
4952  // is the same as:
4953  // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
4954  // Populate the InputRegs accordingly.
4955  // rY
4956  const MachineOperand *MOReg = &MI.getOperand(1);
4957  if (!MOReg->isUndef())
4958  InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
4959  MOReg->getSubReg(), ARM::ssub_0));
4960  // rZ
4961  MOReg = &MI.getOperand(2);
4962  if (!MOReg->isUndef())
4963  InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
4964  MOReg->getSubReg(), ARM::ssub_1));
4965  return true;
4966  }
4967  llvm_unreachable("Target dependent opcode missing");
4968 }
4969 
4971  const MachineInstr &MI, unsigned DefIdx,
4972  RegSubRegPairAndIdx &InputReg) const {
4973  assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
4974  assert(MI.isExtractSubregLike() && "Invalid kind of instruction");
4975 
4976  switch (MI.getOpcode()) {
4977  case ARM::VMOVRRD:
4978  // rX, rY = VMOVRRD dZ
4979  // is the same as:
4980  // rX = EXTRACT_SUBREG dZ, ssub_0
4981  // rY = EXTRACT_SUBREG dZ, ssub_1
4982  const MachineOperand &MOReg = MI.getOperand(2);
4983  if (MOReg.isUndef())
4984  return false;
4985  InputReg.Reg = MOReg.getReg();
4986  InputReg.SubReg = MOReg.getSubReg();
4987  InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
4988  return true;
4989  }
4990  llvm_unreachable("Target dependent opcode missing");
4991 }
4992 
4994  const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
4995  RegSubRegPairAndIdx &InsertedReg) const {
4996  assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
4997  assert(MI.isInsertSubregLike() && "Invalid kind of instruction");
4998 
4999  switch (MI.getOpcode()) {
5000  case ARM::VSETLNi32:
5001  // dX = VSETLNi32 dY, rZ, imm
5002  const MachineOperand &MOBaseReg = MI.getOperand(1);
5003  const MachineOperand &MOInsertedReg = MI.getOperand(2);
5004  if (MOInsertedReg.isUndef())
5005  return false;
5006  const MachineOperand &MOIndex = MI.getOperand(3);
5007  BaseReg.Reg = MOBaseReg.getReg();
5008  BaseReg.SubReg = MOBaseReg.getSubReg();
5009 
5010  InsertedReg.Reg = MOInsertedReg.getReg();
5011  InsertedReg.SubReg = MOInsertedReg.getSubReg();
5012  InsertedReg.SubIdx = MOIndex.getImm() == 0 ? ARM::ssub_0 : ARM::ssub_1;
5013  return true;
5014  }
5015  llvm_unreachable("Target dependent opcode missing");
5016 }
bool isLdstSoMinusReg(const MachineInstr &MI, unsigned Op) const
MachineConstantPoolValue * MachineCPVal
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const override
bool getInsertSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const override
Build the equivalent inputs of a INSERT_SUBREG for the given MI and DefIdx.
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
Definition: MachineInstr.h:997
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
bool checkVLDnAccessAlignment() const
Definition: ARMSubtarget.h:581
BranchProbability getCompl() const
int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate...
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:479
instr_iterator instr_end()
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
unsigned getT2SOImmTwoPartFirst(unsigned Imm)
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isThumb() const
Definition: ARMSubtarget.h:677
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
bool DefinesPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred) const override
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, unsigned Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before...
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
unsigned getRegister(unsigned i) const
Return the specified register in the class.
bool isExtractSubregLike(QueryType Type=IgnoreBundle) const
Return true if this instruction behaves the same way as the generic EXTRACT_SUBREG instructions...
Definition: MachineInstr.h:628
bool isCopyInstr(const MachineInstr &MI, MachineOperand &Src, MachineOperand &Dest) const override
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:285
ARMConstantPoolValue - ARM specific constantpool value.
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:161
unsigned getReg() const
getReg - Returns the register number.
bool expandPostRAPseudo(MachineInstr &MI) const override
void setIsUndef(bool Val=true)
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, int CmpMask, bool CommonUse)
isSuitableForMask - Identify a suitable &#39;and&#39; instruction that operates on the given source register ...
unsigned Reg
bool isPredicated(const MachineInstr &MI) const override
virtual MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const
Clones instruction or the whole instruction bundle Orig and insert into MBB before InsertBefore...
unsigned getSubReg() const
bool isPredicable(QueryType Type=AllInBundle) const
Return true if this instruction has a predicate operand that controls execution.
Definition: MachineInstr.h:533
bool isV8EligibleForIT(const InstrType *Instr)
Definition: ARMFeatures.h:25
static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, unsigned SReg, unsigned &Lane)
bool isRegSequence() const
Definition: MachineInstr.h:878
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:307
bool isSOImmTwoPartVal(unsigned V)
isSOImmTwoPartVal - Return true if the specified value can be obtained by or&#39;ing together two SOImmVa...
VarInfo - This represents the regions where a virtual register is live in the program.
Definition: LiveVariables.h:79
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
void setIsDead(bool Val=true)
unsigned getPartialRegUpdateClearance(const MachineInstr &, unsigned, const TargetRegisterInfo *) const override
setjmp/longjmp based exceptions
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
bool isLdstScaledRegNotPlusLsl2(const MachineInstr &MI, unsigned Op) const
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
bool mayLoad() const
Return true if this instruction could possibly read memory.
Definition: MCInstrDesc.h:388
unsigned getSOImmTwoPartSecond(unsigned V)
getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal, return the second chunk of ...
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:353
bool isCopyLike() const
Return true if the instruction behaves like a copy.
Definition: MachineInstr.h:900
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
static bool isThumb(const MCSubtargetInfo &STI)
unsigned getSpillSize(const TargetRegisterClass &RC) const
Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...
static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1)
return AArch64::GPR64RegClass contains(Reg)
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
bool removeKill(MachineInstr &MI)
removeKill - Delete a kill corresponding to the specified machine instruction.
Definition: LiveVariables.h:94
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
void clearKillInfo()
Clears kill flags on all operands.
static bool isCPSRDefined(const MachineInstr &MI)
Can load/store 1 register/cycle.
Definition: ARMSubtarget.h:123
static uint32_t getAlignment(const MCSectionCOFF &Sec)
bool isFpMLxInstruction(unsigned Opcode) const
isFpMLxInstruction - Return true if the specified opcode is a fp MLA / MLS instruction.
A description of a memory reference used in the backend.
bool isMachineConstantPoolEntry() const
isMachineConstantPoolEntry - Return true if the MachineConstantPoolEntry is indeed a target specific ...
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
ShiftOpc getAM2ShiftOpc(unsigned AM2Opc)
unsigned getSOImmValRotate(unsigned Imm)
getSOImmValRotate - Try to handle Imm with an immediate shifter operand, computing the rotate amount ...
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:208
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:314
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, unsigned &SrcReg2, int &CmpMask, int &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2 if h...
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void setImplicit(bool Val=true)
static bool isLoad(int Opcode)
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int &Offset, const ARMBaseInstrInfo &TII)
rewriteARMFrameIndex / rewriteT2FrameIndex - Rewrite MI to access &#39;Offset&#39; bytes from the FP...
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:495
This file contains the simple types necessary to represent the attributes associated with functions a...
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
const MachineInstrBuilder & AddDReg(MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI) const
void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override
bool isAm2ScaledReg(const MachineInstr &MI, unsigned Op) const
The memory access is dereferenceable (i.e., doesn&#39;t trap).
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...