LLVM  9.0.0svn
ARMBaseInstrInfo.cpp
Go to the documentation of this file.
1 //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the Base ARM implementation of the TargetInstrInfo class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "ARMBaseInstrInfo.h"
14 #include "ARMBaseRegisterInfo.h"
15 #include "ARMConstantPoolValue.h"
16 #include "ARMFeatures.h"
17 #include "ARMHazardRecognizer.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMSubtarget.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Triple.h"
42 #include "llvm/IR/Attributes.h"
43 #include "llvm/IR/Constants.h"
44 #include "llvm/IR/DebugLoc.h"
45 #include "llvm/IR/Function.h"
46 #include "llvm/IR/GlobalValue.h"
47 #include "llvm/MC/MCAsmInfo.h"
48 #include "llvm/MC/MCInstrDesc.h"
51 #include "llvm/Support/Casting.h"
53 #include "llvm/Support/Compiler.h"
54 #include "llvm/Support/Debug.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <cstdint>
61 #include <iterator>
62 #include <new>
63 #include <utility>
64 #include <vector>
65 
66 using namespace llvm;
67 
68 #define DEBUG_TYPE "arm-instrinfo"
69 
70 #define GET_INSTRINFO_CTOR_DTOR
71 #include "ARMGenInstrInfo.inc"
72 
73 static cl::opt<bool>
74 EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
75  cl::desc("Enable ARM 2-addr to 3-addr conv"));
76 
77 /// ARM_MLxEntry - Record information about MLA / MLS instructions.
78 struct ARM_MLxEntry {
79  uint16_t MLxOpc; // MLA / MLS opcode
80  uint16_t MulOpc; // Expanded multiplication opcode
81  uint16_t AddSubOpc; // Expanded add / sub opcode
82  bool NegAcc; // True if the acc is negated before the add / sub.
83  bool HasLane; // True if instruction has an extra "lane" operand.
84 };
85 
86 static const ARM_MLxEntry ARM_MLxTable[] = {
87  // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
88  // fp scalar ops
89  { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
90  { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
91  { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
92  { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
93  { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
94  { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
95  { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
96  { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
97 
98  // fp SIMD ops
99  { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
100  { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
101  { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
102  { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
103  { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
104  { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
105  { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
106  { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
107 };
108 
110  : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
111  Subtarget(STI) {
112  for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
113  if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
114  llvm_unreachable("Duplicated entries?");
115  MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
116  MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
117  }
118 }
119 
120 // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
121 // currently defaults to no prepass hazard recognizer.
124  const ScheduleDAG *DAG) const {
125  if (usePreRAHazardRecognizer()) {
126  const InstrItineraryData *II =
127  static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
128  return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
129  }
131 }
132 
135  const ScheduleDAG *DAG) const {
136  if (Subtarget.isThumb2() || Subtarget.hasVFP2())
137  return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG);
139 }
140 
143  // FIXME: Thumb2 support.
144 
145  if (!EnableARM3Addr)
146  return nullptr;
147 
148  MachineFunction &MF = *MI.getParent()->getParent();
149  uint64_t TSFlags = MI.getDesc().TSFlags;
150  bool isPre = false;
151  switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
152  default: return nullptr;
153  case ARMII::IndexModePre:
154  isPre = true;
155  break;
157  break;
158  }
159 
160  // Try splitting an indexed load/store to an un-indexed one plus an add/sub
161  // operation.
162  unsigned MemOpc = getUnindexedOpcode(MI.getOpcode());
163  if (MemOpc == 0)
164  return nullptr;
165 
166  MachineInstr *UpdateMI = nullptr;
167  MachineInstr *MemMI = nullptr;
168  unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
169  const MCInstrDesc &MCID = MI.getDesc();
170  unsigned NumOps = MCID.getNumOperands();
171  bool isLoad = !MI.mayStore();
172  const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0);
173  const MachineOperand &Base = MI.getOperand(2);
174  const MachineOperand &Offset = MI.getOperand(NumOps - 3);
175  unsigned WBReg = WB.getReg();
176  unsigned BaseReg = Base.getReg();
177  unsigned OffReg = Offset.getReg();
178  unsigned OffImm = MI.getOperand(NumOps - 2).getImm();
179  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm();
180  switch (AddrMode) {
181  default: llvm_unreachable("Unknown indexed op!");
182  case ARMII::AddrMode2: {
183  bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
184  unsigned Amt = ARM_AM::getAM2Offset(OffImm);
185  if (OffReg == 0) {
186  if (ARM_AM::getSOImmVal(Amt) == -1)
187  // Can't encode it in a so_imm operand. This transformation will
188  // add more than 1 instruction. Abandon!
189  return nullptr;
190  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
191  get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
192  .addReg(BaseReg)
193  .addImm(Amt)
194  .add(predOps(Pred))
195  .add(condCodeOp());
196  } else if (Amt != 0) {
198  unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
199  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
200  get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
201  .addReg(BaseReg)
202  .addReg(OffReg)
203  .addReg(0)
204  .addImm(SOOpc)
205  .add(predOps(Pred))
206  .add(condCodeOp());
207  } else
208  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
209  get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
210  .addReg(BaseReg)
211  .addReg(OffReg)
212  .add(predOps(Pred))
213  .add(condCodeOp());
214  break;
215  }
216  case ARMII::AddrMode3 : {
217  bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
218  unsigned Amt = ARM_AM::getAM3Offset(OffImm);
219  if (OffReg == 0)
220  // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
221  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
222  get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
223  .addReg(BaseReg)
224  .addImm(Amt)
225  .add(predOps(Pred))
226  .add(condCodeOp());
227  else
228  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
229  get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
230  .addReg(BaseReg)
231  .addReg(OffReg)
232  .add(predOps(Pred))
233  .add(condCodeOp());
234  break;
235  }
236  }
237 
238  std::vector<MachineInstr*> NewMIs;
239  if (isPre) {
240  if (isLoad)
241  MemMI =
242  BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
243  .addReg(WBReg)
244  .addImm(0)
245  .addImm(Pred);
246  else
247  MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
248  .addReg(MI.getOperand(1).getReg())
249  .addReg(WBReg)
250  .addReg(0)
251  .addImm(0)
252  .addImm(Pred);
253  NewMIs.push_back(MemMI);
254  NewMIs.push_back(UpdateMI);
255  } else {
256  if (isLoad)
257  MemMI =
258  BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
259  .addReg(BaseReg)
260  .addImm(0)
261  .addImm(Pred);
262  else
263  MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
264  .addReg(MI.getOperand(1).getReg())
265  .addReg(BaseReg)
266  .addReg(0)
267  .addImm(0)
268  .addImm(Pred);
269  if (WB.isDead())
270  UpdateMI->getOperand(0).setIsDead();
271  NewMIs.push_back(UpdateMI);
272  NewMIs.push_back(MemMI);
273  }
274 
275  // Transfer LiveVariables states, kill / dead info.
276  if (LV) {
277  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
278  MachineOperand &MO = MI.getOperand(i);
280  unsigned Reg = MO.getReg();
281 
283  if (MO.isDef()) {
284  MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
285  if (MO.isDead())
286  LV->addVirtualRegisterDead(Reg, *NewMI);
287  }
288  if (MO.isUse() && MO.isKill()) {
289  for (unsigned j = 0; j < 2; ++j) {
290  // Look at the two new MI's in reverse order.
291  MachineInstr *NewMI = NewMIs[j];
292  if (!NewMI->readsRegister(Reg))
293  continue;
294  LV->addVirtualRegisterKilled(Reg, *NewMI);
295  if (VI.removeKill(MI))
296  VI.Kills.push_back(NewMI);
297  break;
298  }
299  }
300  }
301  }
302  }
303 
305  MFI->insert(MBBI, NewMIs[1]);
306  MFI->insert(MBBI, NewMIs[0]);
307  return NewMIs[0];
308 }
309 
310 // Branch analysis.
312  MachineBasicBlock *&TBB,
313  MachineBasicBlock *&FBB,
315  bool AllowModify) const {
316  TBB = nullptr;
317  FBB = nullptr;
318 
320  if (I == MBB.begin())
321  return false; // Empty blocks are easy.
322  --I;
323 
324  // Walk backwards from the end of the basic block until the branch is
325  // analyzed or we give up.
326  while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
327  // Flag to be raised on unanalyzeable instructions. This is useful in cases
328  // where we want to clean up on the end of the basic block before we bail
329  // out.
330  bool CantAnalyze = false;
331 
332  // Skip over DEBUG values and predicated nonterminators.
333  while (I->isDebugInstr() || !I->isTerminator()) {
334  if (I == MBB.begin())
335  return false;
336  --I;
337  }
338 
339  if (isIndirectBranchOpcode(I->getOpcode()) ||
340  isJumpTableBranchOpcode(I->getOpcode())) {
341  // Indirect branches and jump tables can't be analyzed, but we still want
342  // to clean up any instructions at the tail of the basic block.
343  CantAnalyze = true;
344  } else if (isUncondBranchOpcode(I->getOpcode())) {
345  TBB = I->getOperand(0).getMBB();
346  } else if (isCondBranchOpcode(I->getOpcode())) {
347  // Bail out if we encounter multiple conditional branches.
348  if (!Cond.empty())
349  return true;
350 
351  assert(!FBB && "FBB should have been null.");
352  FBB = TBB;
353  TBB = I->getOperand(0).getMBB();
354  Cond.push_back(I->getOperand(1));
355  Cond.push_back(I->getOperand(2));
356  } else if (I->isReturn()) {
357  // Returns can't be analyzed, but we should run cleanup.
358  CantAnalyze = !isPredicated(*I);
359  } else {
360  // We encountered other unrecognized terminator. Bail out immediately.
361  return true;
362  }
363 
364  // Cleanup code - to be run for unpredicated unconditional branches and
365  // returns.
366  if (!isPredicated(*I) &&
367  (isUncondBranchOpcode(I->getOpcode()) ||
368  isIndirectBranchOpcode(I->getOpcode()) ||
369  isJumpTableBranchOpcode(I->getOpcode()) ||
370  I->isReturn())) {
371  // Forget any previous condition branch information - it no longer applies.
372  Cond.clear();
373  FBB = nullptr;
374 
375  // If we can modify the function, delete everything below this
376  // unconditional branch.
377  if (AllowModify) {
378  MachineBasicBlock::iterator DI = std::next(I);
379  while (DI != MBB.end()) {
380  MachineInstr &InstToDelete = *DI;
381  ++DI;
382  InstToDelete.eraseFromParent();
383  }
384  }
385  }
386 
387  if (CantAnalyze)
388  return true;
389 
390  if (I == MBB.begin())
391  return false;
392 
393  --I;
394  }
395 
396  // We made it past the terminators without bailing out - we must have
397  // analyzed this branch successfully.
398  return false;
399 }
400 
402  int *BytesRemoved) const {
403  assert(!BytesRemoved && "code size not handled");
404 
406  if (I == MBB.end())
407  return 0;
408 
409  if (!isUncondBranchOpcode(I->getOpcode()) &&
410  !isCondBranchOpcode(I->getOpcode()))
411  return 0;
412 
413  // Remove the branch.
414  I->eraseFromParent();
415 
416  I = MBB.end();
417 
418  if (I == MBB.begin()) return 1;
419  --I;
420  if (!isCondBranchOpcode(I->getOpcode()))
421  return 1;
422 
423  // Remove the branch.
424  I->eraseFromParent();
425  return 2;
426 }
427 
429  MachineBasicBlock *TBB,
430  MachineBasicBlock *FBB,
432  const DebugLoc &DL,
433  int *BytesAdded) const {
434  assert(!BytesAdded && "code size not handled");
436  int BOpc = !AFI->isThumbFunction()
437  ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
438  int BccOpc = !AFI->isThumbFunction()
439  ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
440  bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
441 
442  // Shouldn't be a fall through.
443  assert(TBB && "insertBranch must not be told to insert a fallthrough");
444  assert((Cond.size() == 2 || Cond.size() == 0) &&
445  "ARM branch conditions have two components!");
446 
447  // For conditional branches, we use addOperand to preserve CPSR flags.
448 
449  if (!FBB) {
450  if (Cond.empty()) { // Unconditional branch?
451  if (isThumb)
452  BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL));
453  else
454  BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
455  } else
456  BuildMI(&MBB, DL, get(BccOpc))
457  .addMBB(TBB)
458  .addImm(Cond[0].getImm())
459  .add(Cond[1]);
460  return 1;
461  }
462 
463  // Two-way conditional branch.
464  BuildMI(&MBB, DL, get(BccOpc))
465  .addMBB(TBB)
466  .addImm(Cond[0].getImm())
467  .add(Cond[1]);
468  if (isThumb)
469  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
470  else
471  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
472  return 2;
473 }
474 
477  ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
478  Cond[0].setImm(ARMCC::getOppositeCondition(CC));
479  return false;
480 }
481 
483  if (MI.isBundle()) {
486  while (++I != E && I->isInsideBundle()) {
487  int PIdx = I->findFirstPredOperandIdx();
488  if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
489  return true;
490  }
491  return false;
492  }
493 
494  int PIdx = MI.findFirstPredOperandIdx();
495  return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
496 }
497 
500  unsigned Opc = MI.getOpcode();
501  if (isUncondBranchOpcode(Opc)) {
502  MI.setDesc(get(getMatchingCondBranchOpcode(Opc)));
504  .addImm(Pred[0].getImm())
505  .addReg(Pred[1].getReg());
506  return true;
507  }
508 
509  int PIdx = MI.findFirstPredOperandIdx();
510  if (PIdx != -1) {
511  MachineOperand &PMO = MI.getOperand(PIdx);
512  PMO.setImm(Pred[0].getImm());
513  MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
514  return true;
515  }
516  return false;
517 }
518 
520  ArrayRef<MachineOperand> Pred2) const {
521  if (Pred1.size() > 2 || Pred2.size() > 2)
522  return false;
523 
524  ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
525  ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
526  if (CC1 == CC2)
527  return true;
528 
529  switch (CC1) {
530  default:
531  return false;
532  case ARMCC::AL:
533  return true;
534  case ARMCC::HS:
535  return CC2 == ARMCC::HI;
536  case ARMCC::LS:
537  return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
538  case ARMCC::GE:
539  return CC2 == ARMCC::GT;
540  case ARMCC::LE:
541  return CC2 == ARMCC::LT;
542  }
543 }
544 
546  MachineInstr &MI, std::vector<MachineOperand> &Pred) const {
547  bool Found = false;
548  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
549  const MachineOperand &MO = MI.getOperand(i);
550  if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) ||
551  (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) {
552  Pred.push_back(MO);
553  Found = true;
554  }
555  }
556 
557  return Found;
558 }
559 
561  for (const auto &MO : MI.operands())
562  if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
563  return true;
564  return false;
565 }
566 
568  unsigned Op) const {
569  const MachineOperand &Offset = MI.getOperand(Op + 1);
570  return Offset.getReg() != 0;
571 }
572 
573 // Load with negative register offset requires additional 1cyc and +I unit
574 // for Cortex A57
576  unsigned Op) const {
577  const MachineOperand &Offset = MI.getOperand(Op + 1);
578  const MachineOperand &Opc = MI.getOperand(Op + 2);
579  assert(Opc.isImm());
580  assert(Offset.isReg());
581  int64_t OpcImm = Opc.getImm();
582 
583  bool isSub = ARM_AM::getAM3Op(OpcImm) == ARM_AM::sub;
584  return (isSub && Offset.getReg() != 0);
585 }
586 
588  unsigned Op) const {
589  const MachineOperand &Opc = MI.getOperand(Op + 2);
590  unsigned OffImm = Opc.getImm();
591  return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
592 }
593 
594 // Load, scaled register offset, not plus LSL2
596  unsigned Op) const {
597  const MachineOperand &Opc = MI.getOperand(Op + 2);
598  unsigned OffImm = Opc.getImm();
599 
600  bool isAdd = ARM_AM::getAM2Op(OffImm) == ARM_AM::add;
601  unsigned Amt = ARM_AM::getAM2Offset(OffImm);
603  if (ShiftOpc == ARM_AM::no_shift) return false; // not scaled
604  bool SimpleScaled = (isAdd && ShiftOpc == ARM_AM::lsl && Amt == 2);
605  return !SimpleScaled;
606 }
607 
608 // Minus reg for ldstso addr mode
610  unsigned Op) const {
611  unsigned OffImm = MI.getOperand(Op + 2).getImm();
612  return ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
613 }
614 
615 // Load, scaled register offset
617  unsigned Op) const {
618  unsigned OffImm = MI.getOperand(Op + 2).getImm();
619  return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
620 }
621 
622 static bool isEligibleForITBlock(const MachineInstr *MI) {
623  switch (MI->getOpcode()) {
624  default: return true;
625  case ARM::tADC: // ADC (register) T1
626  case ARM::tADDi3: // ADD (immediate) T1
627  case ARM::tADDi8: // ADD (immediate) T2
628  case ARM::tADDrr: // ADD (register) T1
629  case ARM::tAND: // AND (register) T1
630  case ARM::tASRri: // ASR (immediate) T1
631  case ARM::tASRrr: // ASR (register) T1
632  case ARM::tBIC: // BIC (register) T1
633  case ARM::tEOR: // EOR (register) T1
634  case ARM::tLSLri: // LSL (immediate) T1
635  case ARM::tLSLrr: // LSL (register) T1
636  case ARM::tLSRri: // LSR (immediate) T1
637  case ARM::tLSRrr: // LSR (register) T1
638  case ARM::tMUL: // MUL T1
639  case ARM::tMVN: // MVN (register) T1
640  case ARM::tORR: // ORR (register) T1
641  case ARM::tROR: // ROR (register) T1
642  case ARM::tRSB: // RSB (immediate) T1
643  case ARM::tSBC: // SBC (register) T1
644  case ARM::tSUBi3: // SUB (immediate) T1
645  case ARM::tSUBi8: // SUB (immediate) T2
646  case ARM::tSUBrr: // SUB (register) T1
647  return !ARMBaseInstrInfo::isCPSRDefined(*MI);
648  }
649 }
650 
651 /// isPredicable - Return true if the specified instruction can be predicated.
652 /// By default, this returns true for every instruction with a
653 /// PredicateOperand.
655  if (!MI.isPredicable())
656  return false;
657 
658  if (MI.isBundle())
659  return false;
660 
661  if (!isEligibleForITBlock(&MI))
662  return false;
663 
664  const ARMFunctionInfo *AFI =
666 
667  // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
668  // In their ARM encoding, they can't be encoded in a conditional form.
670  return false;
671 
672  if (AFI->isThumb2Function()) {
673  if (getSubtarget().restrictIT())
674  return isV8EligibleForIT(&MI);
675  }
676 
677  return true;
678 }
679 
680 namespace llvm {
681 
682 template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
683  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
684  const MachineOperand &MO = MI->getOperand(i);
685  if (!MO.isReg() || MO.isUndef() || MO.isUse())
686  continue;
687  if (MO.getReg() != ARM::CPSR)
688  continue;
689  if (!MO.isDead())
690  return false;
691  }
692  // all definitions of CPSR are dead
693  return true;
694 }
695 
696 } // end namespace llvm
697 
698 /// GetInstSize - Return the size of the specified MachineInstr.
699 ///
701  const MachineBasicBlock &MBB = *MI.getParent();
702  const MachineFunction *MF = MBB.getParent();
703  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
704 
705  const MCInstrDesc &MCID = MI.getDesc();
706  if (MCID.getSize())
707  return MCID.getSize();
708 
709  // If this machine instr is an inline asm, measure it.
710  if (MI.getOpcode() == ARM::INLINEASM) {
711  unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
712  if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction())
713  Size = alignTo(Size, 4);
714  return Size;
715  }
716  unsigned Opc = MI.getOpcode();
717  switch (Opc) {
718  default:
719  // pseudo-instruction sizes are zero.
720  return 0;
721  case TargetOpcode::BUNDLE:
722  return getInstBundleLength(MI);
723  case ARM::MOVi16_ga_pcrel:
724  case ARM::MOVTi16_ga_pcrel:
725  case ARM::t2MOVi16_ga_pcrel:
726  case ARM::t2MOVTi16_ga_pcrel:
727  return 4;
728  case ARM::MOVi32imm:
729  case ARM::t2MOVi32imm:
730  return 8;
731  case ARM::CONSTPOOL_ENTRY:
732  case ARM::JUMPTABLE_INSTS:
733  case ARM::JUMPTABLE_ADDRS:
734  case ARM::JUMPTABLE_TBB:
735  case ARM::JUMPTABLE_TBH:
736  // If this machine instr is a constant pool entry, its size is recorded as
737  // operand #2.
738  return MI.getOperand(2).getImm();
739  case ARM::Int_eh_sjlj_longjmp:
740  return 16;
741  case ARM::tInt_eh_sjlj_longjmp:
742  return 10;
743  case ARM::tInt_WIN_eh_sjlj_longjmp:
744  return 12;
745  case ARM::Int_eh_sjlj_setjmp:
746  case ARM::Int_eh_sjlj_setjmp_nofp:
747  return 20;
748  case ARM::tInt_eh_sjlj_setjmp:
749  case ARM::t2Int_eh_sjlj_setjmp:
750  case ARM::t2Int_eh_sjlj_setjmp_nofp:
751  return 12;
752  case ARM::SPACE:
753  return MI.getOperand(1).getImm();
754  }
755 }
756 
757 unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
758  unsigned Size = 0;
761  while (++I != E && I->isInsideBundle()) {
762  assert(!I->isBundle() && "No nested bundle!");
763  Size += getInstSizeInBytes(*I);
764  }
765  return Size;
766 }
767 
770  unsigned DestReg, bool KillSrc,
771  const ARMSubtarget &Subtarget) const {
772  unsigned Opc = Subtarget.isThumb()
773  ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
774  : ARM::MRS;
775 
776  MachineInstrBuilder MIB =
777  BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
778 
779  // There is only 1 A/R class MRS instruction, and it always refers to
780  // APSR. However, there are lots of other possibilities on M-class cores.
781  if (Subtarget.isMClass())
782  MIB.addImm(0x800);
783 
784  MIB.add(predOps(ARMCC::AL))
785  .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
786 }
787 
790  unsigned SrcReg, bool KillSrc,
791  const ARMSubtarget &Subtarget) const {
792  unsigned Opc = Subtarget.isThumb()
793  ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
794  : ARM::MSR;
795 
796  MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
797 
798  if (Subtarget.isMClass())
799  MIB.addImm(0x800);
800  else
801  MIB.addImm(8);
802 
803  MIB.addReg(SrcReg, getKillRegState(KillSrc))
806 }
807 
810  const DebugLoc &DL, unsigned DestReg,
811  unsigned SrcReg, bool KillSrc) const {
812  bool GPRDest = ARM::GPRRegClass.contains(DestReg);
813  bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
814 
815  if (GPRDest && GPRSrc) {
816  BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
817  .addReg(SrcReg, getKillRegState(KillSrc))
819  .add(condCodeOp());
820  return;
821  }
822 
823  bool SPRDest = ARM::SPRRegClass.contains(DestReg);
824  bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
825 
826  unsigned Opc = 0;
827  if (SPRDest && SPRSrc)
828  Opc = ARM::VMOVS;
829  else if (GPRDest && SPRSrc)
830  Opc = ARM::VMOVRS;
831  else if (SPRDest && GPRSrc)
832  Opc = ARM::VMOVSR;
833  else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP())
834  Opc = ARM::VMOVD;
835  else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
836  Opc = ARM::VORRq;
837 
838  if (Opc) {
839  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
840  MIB.addReg(SrcReg, getKillRegState(KillSrc));
841  if (Opc == ARM::VORRq)
842  MIB.addReg(SrcReg, getKillRegState(KillSrc));
843  MIB.add(predOps(ARMCC::AL));
844  return;
845  }
846 
847  // Handle register classes that require multiple instructions.
848  unsigned BeginIdx = 0;
849  unsigned SubRegs = 0;
850  int Spacing = 1;
851 
852  // Use VORRq when possible.
853  if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
854  Opc = ARM::VORRq;
855  BeginIdx = ARM::qsub_0;
856  SubRegs = 2;
857  } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
858  Opc = ARM::VORRq;
859  BeginIdx = ARM::qsub_0;
860  SubRegs = 4;
861  // Fall back to VMOVD.
862  } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
863  Opc = ARM::VMOVD;
864  BeginIdx = ARM::dsub_0;
865  SubRegs = 2;
866  } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
867  Opc = ARM::VMOVD;
868  BeginIdx = ARM::dsub_0;
869  SubRegs = 3;
870  } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
871  Opc = ARM::VMOVD;
872  BeginIdx = ARM::dsub_0;
873  SubRegs = 4;
874  } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
875  Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
876  BeginIdx = ARM::gsub_0;
877  SubRegs = 2;
878  } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
879  Opc = ARM::VMOVD;
880  BeginIdx = ARM::dsub_0;
881  SubRegs = 2;
882  Spacing = 2;
883  } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
884  Opc = ARM::VMOVD;
885  BeginIdx = ARM::dsub_0;
886  SubRegs = 3;
887  Spacing = 2;
888  } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
889  Opc = ARM::VMOVD;
890  BeginIdx = ARM::dsub_0;
891  SubRegs = 4;
892  Spacing = 2;
893  } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) {
894  Opc = ARM::VMOVS;
895  BeginIdx = ARM::ssub_0;
896  SubRegs = 2;
897  } else if (SrcReg == ARM::CPSR) {
898  copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
899  return;
900  } else if (DestReg == ARM::CPSR) {
901  copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
902  return;
903  }
904 
905  assert(Opc && "Impossible reg-to-reg copy");
906 
909 
910  // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
911  if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
912  BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
913  Spacing = -Spacing;
914  }
915 #ifndef NDEBUG
916  SmallSet<unsigned, 4> DstRegs;
917 #endif
918  for (unsigned i = 0; i != SubRegs; ++i) {
919  unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
920  unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
921  assert(Dst && Src && "Bad sub-register");
922 #ifndef NDEBUG
923  assert(!DstRegs.count(Src) && "destructive vector copy");
924  DstRegs.insert(Dst);
925 #endif
926  Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
927  // VORR takes two source operands.
928  if (Opc == ARM::VORRq)
929  Mov.addReg(Src);
930  Mov = Mov.add(predOps(ARMCC::AL));
931  // MOVr can set CC.
932  if (Opc == ARM::MOVr)
933  Mov = Mov.add(condCodeOp());
934  }
935  // Add implicit super-register defs and kills to the last instruction.
936  Mov->addRegisterDefined(DestReg, TRI);
937  if (KillSrc)
938  Mov->addRegisterKilled(SrcReg, TRI);
939 }
940 
942  const MachineOperand *&Src,
943  const MachineOperand *&Dest) const {
944  // VMOVRRD is also a copy instruction but it requires
945  // special way of handling. It is more complex copy version
946  // and since that we are not considering it. For recognition
947  // of such instruction isExtractSubregLike MI interface fuction
948  // could be used.
949  // VORRq is considered as a move only if two inputs are
950  // the same register.
951  if (!MI.isMoveReg() ||
952  (MI.getOpcode() == ARM::VORRq &&
953  MI.getOperand(1).getReg() != MI.getOperand(2).getReg()))
954  return false;
955  Dest = &MI.getOperand(0);
956  Src = &MI.getOperand(1);
957  return true;
958 }
959 
960 const MachineInstrBuilder &
962  unsigned SubIdx, unsigned State,
963  const TargetRegisterInfo *TRI) const {
964  if (!SubIdx)
965  return MIB.addReg(Reg, State);
966 
968  return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
969  return MIB.addReg(Reg, State, SubIdx);
970 }
971 
974  unsigned SrcReg, bool isKill, int FI,
975  const TargetRegisterClass *RC,
976  const TargetRegisterInfo *TRI) const {
977  MachineFunction &MF = *MBB.getParent();
978  MachineFrameInfo &MFI = MF.getFrameInfo();
979  unsigned Align = MFI.getObjectAlignment(FI);
980 
983  MFI.getObjectSize(FI), Align);
984 
985  switch (TRI->getSpillSize(*RC)) {
986  case 2:
987  if (ARM::HPRRegClass.hasSubClassEq(RC)) {
988  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH))
989  .addReg(SrcReg, getKillRegState(isKill))
990  .addFrameIndex(FI)
991  .addImm(0)
992  .addMemOperand(MMO)
993  .add(predOps(ARMCC::AL));
994  } else
995  llvm_unreachable("Unknown reg class!");
996  break;
997  case 4:
998  if (ARM::GPRRegClass.hasSubClassEq(RC)) {
999  BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12))
1000  .addReg(SrcReg, getKillRegState(isKill))
1001  .addFrameIndex(FI)
1002  .addImm(0)
1003  .addMemOperand(MMO)
1004  .add(predOps(ARMCC::AL));
1005  } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1006  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS))
1007  .addReg(SrcReg, getKillRegState(isKill))
1008  .addFrameIndex(FI)
1009  .addImm(0)
1010  .addMemOperand(MMO)
1011  .add(predOps(ARMCC::AL));
1012  } else
1013  llvm_unreachable("Unknown reg class!");
1014  break;
1015  case 8:
1016  if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1017  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD))
1018  .addReg(SrcReg, getKillRegState(isKill))
1019  .addFrameIndex(FI)
1020  .addImm(0)
1021  .addMemOperand(MMO)
1022  .add(predOps(ARMCC::AL));
1023  } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1024  if (Subtarget.hasV5TEOps()) {
1025  MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD));
1026  AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1027  AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1028  MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1029  .add(predOps(ARMCC::AL));
1030  } else {
1031  // Fallback to STM instruction, which has existed since the dawn of
1032  // time.
1033  MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA))
1034  .addFrameIndex(FI)
1035  .addMemOperand(MMO)
1036  .add(predOps(ARMCC::AL));
1037  AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1038  AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1039  }
1040  } else
1041  llvm_unreachable("Unknown reg class!");
1042  break;
1043  case 16:
1044  if (ARM::DPairRegClass.hasSubClassEq(RC)) {
1045  // Use aligned spills if the stack can be realigned.
1046  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1047  BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
1048  .addFrameIndex(FI)
1049  .addImm(16)
1050  .addReg(SrcReg, getKillRegState(isKill))
1051  .addMemOperand(MMO)
1052  .add(predOps(ARMCC::AL));
1053  } else {
1054  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA))
1055  .addReg(SrcReg, getKillRegState(isKill))
1056  .addFrameIndex(FI)
1057  .addMemOperand(MMO)
1058  .add(predOps(ARMCC::AL));
1059  }
1060  } else
1061  llvm_unreachable("Unknown reg class!");
1062  break;
1063  case 24:
1064  if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1065  // Use aligned spills if the stack can be realigned.
1066  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1067  BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))
1068  .addFrameIndex(FI)
1069  .addImm(16)
1070  .addReg(SrcReg, getKillRegState(isKill))
1071  .addMemOperand(MMO)
1072  .add(predOps(ARMCC::AL));
1073  } else {
1074  MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(),
1075  get(ARM::VSTMDIA))
1076  .addFrameIndex(FI)
1077  .add(predOps(ARMCC::AL))
1078  .addMemOperand(MMO);
1079  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1080  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1081  AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1082  }
1083  } else
1084  llvm_unreachable("Unknown reg class!");
1085  break;
1086  case 32:
1087  if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
1088  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1089  // FIXME: It's possible to only store part of the QQ register if the
1090  // spilled def has a sub-register index.
1091  BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo))
1092  .addFrameIndex(FI)
1093  .addImm(16)
1094  .addReg(SrcReg, getKillRegState(isKill))
1095  .addMemOperand(MMO)
1096  .add(predOps(ARMCC::AL));
1097  } else {
1098  MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(),
1099  get(ARM::VSTMDIA))
1100  .addFrameIndex(FI)
1101  .add(predOps(ARMCC::AL))
1102  .addMemOperand(MMO);
1103  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1104  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1105  MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1106  AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1107  }
1108  } else
1109  llvm_unreachable("Unknown reg class!");
1110  break;
1111  case 64:
1112  if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1113  MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA))
1114  .addFrameIndex(FI)
1115  .add(predOps(ARMCC::AL))
1116  .addMemOperand(MMO);
1117  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1118  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1119  MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1120  MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1121  MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
1122  MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
1123  MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
1124  AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
1125  } else
1126  llvm_unreachable("Unknown reg class!");
1127  break;
1128  default:
1129  llvm_unreachable("Unknown reg class!");
1130  }
1131 }
1132 
1134  int &FrameIndex) const {
1135  switch (MI.getOpcode()) {
1136  default: break;
1137  case ARM::STRrs:
1138  case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
1139  if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1140  MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1141  MI.getOperand(3).getImm() == 0) {
1142  FrameIndex = MI.getOperand(1).getIndex();
1143  return MI.getOperand(0).getReg();
1144  }
1145  break;
1146  case ARM::STRi12:
1147  case ARM::t2STRi12:
1148  case ARM::tSTRspi:
1149  case ARM::VSTRD:
1150  case ARM::VSTRS:
1151  if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1152  MI.getOperand(2).getImm() == 0) {
1153  FrameIndex = MI.getOperand(1).getIndex();
1154  return MI.getOperand(0).getReg();
1155  }
1156  break;
1157  case ARM::VST1q64:
1158  case ARM::VST1d64TPseudo:
1159  case ARM::VST1d64QPseudo:
1160  if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
1161  FrameIndex = MI.getOperand(0).getIndex();
1162  return MI.getOperand(2).getReg();
1163  }
1164  break;
1165  case ARM::VSTMQIA:
1166  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1167  FrameIndex = MI.getOperand(1).getIndex();
1168  return MI.getOperand(0).getReg();
1169  }
1170  break;
1171  }
1172 
1173  return 0;
1174 }
1175 
1177  int &FrameIndex) const {
1179  if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses)) {
1180  FrameIndex =
1181  cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1182  ->getFrameIndex();
1183  return true;
1184  }
1185  return false;
1186 }
1187 
1188 void ARMBaseInstrInfo::
1190  unsigned DestReg, int FI,
1191  const TargetRegisterClass *RC,
1192  const TargetRegisterInfo *TRI) const {
1193  DebugLoc DL;
1194  if (I != MBB.end()) DL = I->getDebugLoc();
1195  MachineFunction &MF = *MBB.getParent();
1196  MachineFrameInfo &MFI = MF.getFrameInfo();
1197  unsigned Align = MFI.getObjectAlignment(FI);
1200  MFI.getObjectSize(FI), Align);
1201 
1202  switch (TRI->getSpillSize(*RC)) {
1203  case 2:
1204  if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1205  BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg)
1206  .addFrameIndex(FI)
1207  .addImm(0)
1208  .addMemOperand(MMO)
1209  .add(predOps(ARMCC::AL));
1210  } else
1211  llvm_unreachable("Unknown reg class!");
1212  break;
1213  case 4:
1214  if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1215  BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
1216  .addFrameIndex(FI)
1217  .addImm(0)
1218  .addMemOperand(MMO)
1219  .add(predOps(ARMCC::AL));
1220  } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1221  BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
1222  .addFrameIndex(FI)
1223  .addImm(0)
1224  .addMemOperand(MMO)
1225  .add(predOps(ARMCC::AL));
1226  } else
1227  llvm_unreachable("Unknown reg class!");
1228  break;
1229  case 8:
1230  if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1231  BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
1232  .addFrameIndex(FI)
1233  .addImm(0)
1234  .addMemOperand(MMO)
1235  .add(predOps(ARMCC::AL));
1236  } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1237  MachineInstrBuilder MIB;
1238 
1239  if (Subtarget.hasV5TEOps()) {
1240  MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
1241  AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1242  AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1243  MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1244  .add(predOps(ARMCC::AL));
1245  } else {
1246  // Fallback to LDM instruction, which has existed since the dawn of
1247  // time.
1248  MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
1249  .addFrameIndex(FI)
1250  .addMemOperand(MMO)
1251  .add(predOps(ARMCC::AL));
1252  MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1253  MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1254  }
1255 
1257  MIB.addReg(DestReg, RegState::ImplicitDefine);
1258  } else
1259  llvm_unreachable("Unknown reg class!");
1260  break;
1261  case 16:
1262  if (ARM::DPairRegClass.hasSubClassEq(RC)) {
1263  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1264  BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
1265  .addFrameIndex(FI)
1266  .addImm(16)
1267  .addMemOperand(MMO)
1268  .add(predOps(ARMCC::AL));
1269  } else {
1270  BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
1271  .addFrameIndex(FI)
1272  .addMemOperand(MMO)
1273  .add(predOps(ARMCC::AL));
1274  }
1275  } else
1276  llvm_unreachable("Unknown reg class!");
1277  break;
1278  case 24:
1279  if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1280  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1281  BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
1282  .addFrameIndex(FI)
1283  .addImm(16)
1284  .addMemOperand(MMO)
1285  .add(predOps(ARMCC::AL));
1286  } else {
1287  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1288  .addFrameIndex(FI)
1289  .addMemOperand(MMO)
1290  .add(predOps(ARMCC::AL));
1291  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1292  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1293  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1295  MIB.addReg(DestReg, RegState::ImplicitDefine);
1296  }
1297  } else
1298  llvm_unreachable("Unknown reg class!");
1299  break;
1300  case 32:
1301  if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
1302  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1303  BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
1304  .addFrameIndex(FI)
1305  .addImm(16)
1306  .addMemOperand(MMO)
1307  .add(predOps(ARMCC::AL));
1308  } else {
1309  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1310  .addFrameIndex(FI)
1311  .add(predOps(ARMCC::AL))
1312  .addMemOperand(MMO);
1313  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1314  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1315  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1316  MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1318  MIB.addReg(DestReg, RegState::ImplicitDefine);
1319  }
1320  } else
1321  llvm_unreachable("Unknown reg class!");
1322  break;
1323  case 64:
1324  if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1325  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1326  .addFrameIndex(FI)
1327  .add(predOps(ARMCC::AL))
1328  .addMemOperand(MMO);
1329  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1330  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1331  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1332  MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1333  MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
1334  MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
1335  MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
1336  MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
1338  MIB.addReg(DestReg, RegState::ImplicitDefine);
1339  } else
1340  llvm_unreachable("Unknown reg class!");
1341  break;
1342  default:
1343  llvm_unreachable("Unknown regclass!");
1344  }
1345 }
1346 
1348  int &FrameIndex) const {
1349  switch (MI.getOpcode()) {
1350  default: break;
1351  case ARM::LDRrs:
1352  case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
1353  if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1354  MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1355  MI.getOperand(3).getImm() == 0) {
1356  FrameIndex = MI.getOperand(1).getIndex();
1357  return MI.getOperand(0).getReg();
1358  }
1359  break;
1360  case ARM::LDRi12:
1361  case ARM::t2LDRi12:
1362  case ARM::tLDRspi:
1363  case ARM::VLDRD:
1364  case ARM::VLDRS:
1365  if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1366  MI.getOperand(2).getImm() == 0) {
1367  FrameIndex = MI.getOperand(1).getIndex();
1368  return MI.getOperand(0).getReg();
1369  }
1370  break;
1371  case ARM::VLD1q64:
1372  case ARM::VLD1d8TPseudo:
1373  case ARM::VLD1d16TPseudo:
1374  case ARM::VLD1d32TPseudo:
1375  case ARM::VLD1d64TPseudo:
1376  case ARM::VLD1d8QPseudo:
1377  case ARM::VLD1d16QPseudo:
1378  case ARM::VLD1d32QPseudo:
1379  case ARM::VLD1d64QPseudo:
1380  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1381  FrameIndex = MI.getOperand(1).getIndex();
1382  return MI.getOperand(0).getReg();
1383  }
1384  break;
1385  case ARM::VLDMQIA:
1386  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1387  FrameIndex = MI.getOperand(1).getIndex();
1388  return MI.getOperand(0).getReg();
1389  }
1390  break;
1391  }
1392 
1393  return 0;
1394 }
1395 
1397  int &FrameIndex) const {
1399  if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses)) {
1400  FrameIndex =
1401  cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1402  ->getFrameIndex();
1403  return true;
1404  }
1405  return false;
1406 }
1407 
1408 /// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
1409 /// depending on whether the result is used.
1410 void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
1411  bool isThumb1 = Subtarget.isThumb1Only();
1412  bool isThumb2 = Subtarget.isThumb2();
1413  const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
1414 
1415  DebugLoc dl = MI->getDebugLoc();
1416  MachineBasicBlock *BB = MI->getParent();
1417 
1418  MachineInstrBuilder LDM, STM;
1419  if (isThumb1 || !MI->getOperand(1).isDead()) {
1420  MachineOperand LDWb(MI->getOperand(1));
1421  LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
1422  : isThumb1 ? ARM::tLDMIA_UPD
1423  : ARM::LDMIA_UPD))
1424  .add(LDWb);
1425  } else {
1426  LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
1427  }
1428 
1429  if (isThumb1 || !MI->getOperand(0).isDead()) {
1430  MachineOperand STWb(MI->getOperand(0));
1431  STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
1432  : isThumb1 ? ARM::tSTMIA_UPD
1433  : ARM::STMIA_UPD))
1434  .add(STWb);
1435  } else {
1436  STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
1437  }
1438 
1439  MachineOperand LDBase(MI->getOperand(3));
1440  LDM.add(LDBase).add(predOps(ARMCC::AL));
1441 
1442  MachineOperand STBase(MI->getOperand(2));
1443  STM.add(STBase).add(predOps(ARMCC::AL));
1444 
1445  // Sort the scratch registers into ascending order.
1447  SmallVector<unsigned, 6> ScratchRegs;
1448  for(unsigned I = 5; I < MI->getNumOperands(); ++I)
1449  ScratchRegs.push_back(MI->getOperand(I).getReg());
1450  llvm::sort(ScratchRegs,
1451  [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {
1452  return TRI.getEncodingValue(Reg1) <
1453  TRI.getEncodingValue(Reg2);
1454  });
1455 
1456  for (const auto &Reg : ScratchRegs) {
1457  LDM.addReg(Reg, RegState::Define);
1458  STM.addReg(Reg, RegState::Kill);
1459  }
1460 
1461  BB->erase(MI);
1462 }
1463 
1465  if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
1466  assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() &&
1467  "LOAD_STACK_GUARD currently supported only for MachO.");
1468  expandLoadStackGuard(MI);
1469  MI.getParent()->erase(MI);
1470  return true;
1471  }
1472 
1473  if (MI.getOpcode() == ARM::MEMCPY) {
1474  expandMEMCPY(MI);
1475  return true;
1476  }
1477 
1478  // This hook gets to expand COPY instructions before they become
1479  // copyPhysReg() calls. Look for VMOVS instructions that can legally be
1480  // widened to VMOVD. We prefer the VMOVD when possible because it may be
1481  // changed into a VORR that can go down the NEON pipeline.
1482  if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || Subtarget.isFPOnlySP())
1483  return false;
1484 
1485  // Look for a copy between even S-registers. That is where we keep floats
1486  // when using NEON v2f32 instructions for f32 arithmetic.
1487  unsigned DstRegS = MI.getOperand(0).getReg();
1488  unsigned SrcRegS = MI.getOperand(1).getReg();
1489  if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
1490  return false;
1491 
1493  unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
1494  &ARM::DPRRegClass);
1495  unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
1496  &ARM::DPRRegClass);
1497  if (!DstRegD || !SrcRegD)
1498  return false;
1499 
1500  // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
1501  // legal if the COPY already defines the full DstRegD, and it isn't a
1502  // sub-register insertion.
1503  if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
1504  return false;
1505 
1506  // A dead copy shouldn't show up here, but reject it just in case.
1507  if (MI.getOperand(0).isDead())
1508  return false;
1509 
1510  // All clear, widen the COPY.
1511  LLVM_DEBUG(dbgs() << "widening: " << MI);
1512  MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
1513 
1514  // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg
1515  // or some other super-register.
1516  int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD);
1517  if (ImpDefIdx != -1)
1518  MI.RemoveOperand(ImpDefIdx);
1519 
1520  // Change the opcode and operands.
1521  MI.setDesc(get(ARM::VMOVD));
1522  MI.getOperand(0).setReg(DstRegD);
1523  MI.getOperand(1).setReg(SrcRegD);
1524  MIB.add(predOps(ARMCC::AL));
1525 
1526  // We are now reading SrcRegD instead of SrcRegS. This may upset the
1527  // register scavenger and machine verifier, so we need to indicate that we
1528  // are reading an undefined value from SrcRegD, but a proper value from
1529  // SrcRegS.
1530  MI.getOperand(1).setIsUndef();
1531  MIB.addReg(SrcRegS, RegState::Implicit);
1532 
1533  // SrcRegD may actually contain an unrelated value in the ssub_1
1534  // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
1535  if (MI.getOperand(1).isKill()) {
1536  MI.getOperand(1).setIsKill(false);
1537  MI.addRegisterKilled(SrcRegS, TRI, true);
1538  }
1539 
1540  LLVM_DEBUG(dbgs() << "replaced by: " << MI);
1541  return true;
1542 }
1543 
1544 /// Create a copy of a const pool value. Update CPI to the new index and return
1545 /// the label UID.
1546 static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1549 
1550  const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1551  assert(MCPE.isMachineConstantPoolEntry() &&
1552  "Expecting a machine constantpool entry!");
1553  ARMConstantPoolValue *ACPV =
1554  static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1555 
1556  unsigned PCLabelId = AFI->createPICLabelUId();
1557  ARMConstantPoolValue *NewCPV = nullptr;
1558 
1559  // FIXME: The below assumes PIC relocation model and that the function
1560  // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1561  // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1562  // instructions, so that's probably OK, but is PIC always correct when
1563  // we get here?
1564  if (ACPV->isGlobalValue())
1566  cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
1567  4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
1568  else if (ACPV->isExtSymbol())
1569  NewCPV = ARMConstantPoolSymbol::
1570  Create(MF.getFunction().getContext(),
1571  cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1572  else if (ACPV->isBlockAddress())
1573  NewCPV = ARMConstantPoolConstant::
1574  Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1576  else if (ACPV->isLSDA())
1577  NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId,
1578  ARMCP::CPLSDA, 4);
1579  else if (ACPV->isMachineBasicBlock())
1580  NewCPV = ARMConstantPoolMBB::
1581  Create(MF.getFunction().getContext(),
1582  cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1583  else
1584  llvm_unreachable("Unexpected ARM constantpool value type!!");
1585  CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
1586  return PCLabelId;
1587 }
1588 
1591  unsigned DestReg, unsigned SubIdx,
1592  const MachineInstr &Orig,
1593  const TargetRegisterInfo &TRI) const {
1594  unsigned Opcode = Orig.getOpcode();
1595  switch (Opcode) {
1596  default: {
1597  MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
1598  MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1599  MBB.insert(I, MI);
1600  break;
1601  }
1602  case ARM::tLDRpci_pic:
1603  case ARM::t2LDRpci_pic: {
1604  MachineFunction &MF = *MBB.getParent();
1605  unsigned CPI = Orig.getOperand(1).getIndex();
1606  unsigned PCLabelId = duplicateCPV(MF, CPI);
1607  BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
1608  .addConstantPoolIndex(CPI)
1609  .addImm(PCLabelId)
1610  .cloneMemRefs(Orig);
1611  break;
1612  }
1613  }
1614 }
1615 
1616 MachineInstr &
1618  MachineBasicBlock::iterator InsertBefore,
1619  const MachineInstr &Orig) const {
1620  MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
1622  for (;;) {
1623  switch (I->getOpcode()) {
1624  case ARM::tLDRpci_pic:
1625  case ARM::t2LDRpci_pic: {
1626  MachineFunction &MF = *MBB.getParent();
1627  unsigned CPI = I->getOperand(1).getIndex();
1628  unsigned PCLabelId = duplicateCPV(MF, CPI);
1629  I->getOperand(1).setIndex(CPI);
1630  I->getOperand(2).setImm(PCLabelId);
1631  break;
1632  }
1633  }
1634  if (!I->isBundledWithSucc())
1635  break;
1636  ++I;
1637  }
1638  return Cloned;
1639 }
1640 
1642  const MachineInstr &MI1,
1643  const MachineRegisterInfo *MRI) const {
1644  unsigned Opcode = MI0.getOpcode();
1645  if (Opcode == ARM::t2LDRpci ||
1646  Opcode == ARM::t2LDRpci_pic ||
1647  Opcode == ARM::tLDRpci ||
1648  Opcode == ARM::tLDRpci_pic ||
1649  Opcode == ARM::LDRLIT_ga_pcrel ||
1650  Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1651  Opcode == ARM::tLDRLIT_ga_pcrel ||
1652  Opcode == ARM::MOV_ga_pcrel ||
1653  Opcode == ARM::MOV_ga_pcrel_ldr ||
1654  Opcode == ARM::t2MOV_ga_pcrel) {
1655  if (MI1.getOpcode() != Opcode)
1656  return false;
1657  if (MI0.getNumOperands() != MI1.getNumOperands())
1658  return false;
1659 
1660  const MachineOperand &MO0 = MI0.getOperand(1);
1661  const MachineOperand &MO1 = MI1.getOperand(1);
1662  if (MO0.getOffset() != MO1.getOffset())
1663  return false;
1664 
1665  if (Opcode == ARM::LDRLIT_ga_pcrel ||
1666  Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1667  Opcode == ARM::tLDRLIT_ga_pcrel ||
1668  Opcode == ARM::MOV_ga_pcrel ||
1669  Opcode == ARM::MOV_ga_pcrel_ldr ||
1670  Opcode == ARM::t2MOV_ga_pcrel)
1671  // Ignore the PC labels.
1672  return MO0.getGlobal() == MO1.getGlobal();
1673 
1674  const MachineFunction *MF = MI0.getParent()->getParent();
1675  const MachineConstantPool *MCP = MF->getConstantPool();
1676  int CPI0 = MO0.getIndex();
1677  int CPI1 = MO1.getIndex();
1678  const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1679  const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1680  bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1681  bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1682  if (isARMCP0 && isARMCP1) {
1683  ARMConstantPoolValue *ACPV0 =
1684  static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1685  ARMConstantPoolValue *ACPV1 =
1686  static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1687  return ACPV0->hasSameValue(ACPV1);
1688  } else if (!isARMCP0 && !isARMCP1) {
1689  return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1690  }
1691  return false;
1692  } else if (Opcode == ARM::PICLDR) {
1693  if (MI1.getOpcode() != Opcode)
1694  return false;
1695  if (MI0.getNumOperands() != MI1.getNumOperands())
1696  return false;
1697 
1698  unsigned Addr0 = MI0.getOperand(1).getReg();
1699  unsigned Addr1 = MI1.getOperand(1).getReg();
1700  if (Addr0 != Addr1) {
1701  if (!MRI ||
1704  return false;
1705 
1706  // This assumes SSA form.
1707  MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1708  MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1709  // Check if the loaded value, e.g. a constantpool of a global address, are
1710  // the same.
1711  if (!produceSameValue(*Def0, *Def1, MRI))
1712  return false;
1713  }
1714 
1715  for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
1716  // %12 = PICLDR %11, 0, 14, %noreg
1717  const MachineOperand &MO0 = MI0.getOperand(i);
1718  const MachineOperand &MO1 = MI1.getOperand(i);
1719  if (!MO0.isIdenticalTo(MO1))
1720  return false;
1721  }
1722  return true;
1723  }
1724 
1726 }
1727 
1728 /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1729 /// determine if two loads are loading from the same base address. It should
1730 /// only return true if the base pointers are the same and the only differences
1731 /// between the two addresses is the offset. It also returns the offsets by
1732 /// reference.
1733 ///
1734 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1735 /// is permanently disabled.
1737  int64_t &Offset1,
1738  int64_t &Offset2) const {
1739  // Don't worry about Thumb: just ARM and Thumb2.
1740  if (Subtarget.isThumb1Only()) return false;
1741 
1742  if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1743  return false;
1744 
1745  switch (Load1->getMachineOpcode()) {
1746  default:
1747  return false;
1748  case ARM::LDRi12:
1749  case ARM::LDRBi12:
1750  case ARM::LDRD:
1751  case ARM::LDRH:
1752  case ARM::LDRSB:
1753  case ARM::LDRSH:
1754  case ARM::VLDRD:
1755  case ARM::VLDRS:
1756  case ARM::t2LDRi8:
1757  case ARM::t2LDRBi8:
1758  case ARM::t2LDRDi8:
1759  case ARM::t2LDRSHi8:
1760  case ARM::t2LDRi12:
1761  case ARM::t2LDRBi12:
1762  case ARM::t2LDRSHi12:
1763  break;
1764  }
1765 
1766  switch (Load2->getMachineOpcode()) {
1767  default:
1768  return false;
1769  case ARM::LDRi12:
1770  case ARM::LDRBi12:
1771  case ARM::LDRD:
1772  case ARM::LDRH:
1773  case ARM::LDRSB:
1774  case ARM::LDRSH:
1775  case ARM::VLDRD:
1776  case ARM::VLDRS:
1777  case ARM::t2LDRi8:
1778  case ARM::t2LDRBi8:
1779  case ARM::t2LDRSHi8:
1780  case ARM::t2LDRi12:
1781  case ARM::t2LDRBi12:
1782  case ARM::t2LDRSHi12:
1783  break;
1784  }
1785 
1786  // Check if base addresses and chain operands match.
1787  if (Load1->getOperand(0) != Load2->getOperand(0) ||
1788  Load1->getOperand(4) != Load2->getOperand(4))
1789  return false;
1790 
1791  // Index should be Reg0.
1792  if (Load1->getOperand(3) != Load2->getOperand(3))
1793  return false;
1794 
1795  // Determine the offsets.
1796  if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
1797  isa<ConstantSDNode>(Load2->getOperand(1))) {
1798  Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
1799  Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
1800  return true;
1801  }
1802 
1803  return false;
1804 }
1805 
1806 /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
1807 /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
1808 /// be scheduled togther. On some targets if two loads are loading from
1809 /// addresses in the same cache line, it's better if they are scheduled
1810 /// together. This function takes two integers that represent the load offsets
1811 /// from the common base address. It returns true if it decides it's desirable
1812 /// to schedule the two loads together. "NumLoads" is the number of loads that
1813 /// have already been scheduled after Load1.
1814 ///
1815 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1816 /// is permanently disabled.
1818  int64_t Offset1, int64_t Offset2,
1819  unsigned NumLoads) const {
1820  // Don't worry about Thumb: just ARM and Thumb2.
1821  if (Subtarget.isThumb1Only()) return false;
1822 
1823  assert(Offset2 > Offset1);
1824 
1825  if ((Offset2 - Offset1) / 8 > 64)
1826  return false;
1827 
1828  // Check if the machine opcodes are different. If they are different
1829  // then we consider them to not be of the same base address,
1830  // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
1831  // In this case, they are considered to be the same because they are different
1832  // encoding forms of the same basic instruction.
1833  if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
1834  !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
1835  Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
1836  (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
1837  Load2->getMachineOpcode() == ARM::t2LDRBi8)))
1838  return false; // FIXME: overly conservative?
1839 
1840  // Four loads in a row should be sufficient.
1841  if (NumLoads >= 3)
1842  return false;
1843 
1844  return true;
1845 }
1846 
1848  const MachineBasicBlock *MBB,
1849  const MachineFunction &MF) const {
1850  // Debug info is never a scheduling boundary. It's necessary to be explicit
1851  // due to the special treatment of IT instructions below, otherwise a
1852  // dbg_value followed by an IT will result in the IT instruction being
1853  // considered a scheduling hazard, which is wrong. It should be the actual
1854  // instruction preceding the dbg_value instruction(s), just like it is
1855  // when debug info is not present.
1856  if (MI.isDebugInstr())
1857  return false;
1858 
1859  // Terminators and labels can't be scheduled around.
1860  if (MI.isTerminator() || MI.isPosition())
1861  return true;
1862 
1863  // Treat the start of the IT block as a scheduling boundary, but schedule
1864  // t2IT along with all instructions following it.
1865  // FIXME: This is a big hammer. But the alternative is to add all potential
1866  // true and anti dependencies to IT block instructions as implicit operands
1867  // to the t2IT instruction. The added compile time and complexity does not
1868  // seem worth it.
1870  // Make sure to skip any debug instructions
1871  while (++I != MBB->end() && I->isDebugInstr())
1872  ;
1873  if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
1874  return true;
1875 
1876  // Don't attempt to schedule around any instruction that defines
1877  // a stack-oriented pointer, as it's unlikely to be profitable. This
1878  // saves compile time, because it doesn't require every single
1879  // stack slot reference to depend on the instruction that does the
1880  // modification.
1881  // Calls don't actually change the stack pointer, even if they have imp-defs.
1882  // No ARM calling conventions change the stack pointer. (X86 calling
1883  // conventions sometimes do).
1884  if (!MI.isCall() && MI.definesRegister(ARM::SP))
1885  return true;
1886 
1887  return false;
1888 }
1889 
1890 bool ARMBaseInstrInfo::
1892  unsigned NumCycles, unsigned ExtraPredCycles,
1893  BranchProbability Probability) const {
1894  if (!NumCycles)
1895  return false;
1896 
1897  // If we are optimizing for size, see if the branch in the predecessor can be
1898  // lowered to cbn?z by the constant island lowering pass, and return false if
1899  // so. This results in a shorter instruction sequence.
1900  if (MBB.getParent()->getFunction().optForSize()) {
1901  MachineBasicBlock *Pred = *MBB.pred_begin();
1902  if (!Pred->empty()) {
1903  MachineInstr *LastMI = &*Pred->rbegin();
1904  if (LastMI->getOpcode() == ARM::t2Bcc) {
1905  MachineBasicBlock::iterator CmpMI = LastMI;
1906  if (CmpMI != Pred->begin()) {
1907  --CmpMI;
1908  if (CmpMI->getOpcode() == ARM::tCMPi8 ||
1909  CmpMI->getOpcode() == ARM::t2CMPri) {
1910  unsigned Reg = CmpMI->getOperand(0).getReg();
1911  unsigned PredReg = 0;
1912  ARMCC::CondCodes P = getInstrPredicate(*CmpMI, PredReg);
1913  if (P == ARMCC::AL && CmpMI->getOperand(1).getImm() == 0 &&
1914  isARMLowRegister(Reg))
1915  return false;
1916  }
1917  }
1918  }
1919  }
1920  }
1921  return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
1922  MBB, 0, 0, Probability);
1923 }
1924 
1925 bool ARMBaseInstrInfo::
1927  unsigned TCycles, unsigned TExtra,
1928  MachineBasicBlock &FBB,
1929  unsigned FCycles, unsigned FExtra,
1930  BranchProbability Probability) const {
1931  if (!TCycles)
1932  return false;
1933 
1934  // Attempt to estimate the relative costs of predication versus branching.
1935  // Here we scale up each component of UnpredCost to avoid precision issue when
1936  // scaling TCycles/FCycles by Probability.
1937  const unsigned ScalingUpFactor = 1024;
1938 
1939  unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
1940  unsigned UnpredCost;
1941  if (!Subtarget.hasBranchPredictor()) {
1942  // When we don't have a branch predictor it's always cheaper to not take a
1943  // branch than take it, so we have to take that into account.
1944  unsigned NotTakenBranchCost = 1;
1945  unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
1946  unsigned TUnpredCycles, FUnpredCycles;
1947  if (!FCycles) {
1948  // Triangle: TBB is the fallthrough
1949  TUnpredCycles = TCycles + NotTakenBranchCost;
1950  FUnpredCycles = TakenBranchCost;
1951  } else {
1952  // Diamond: TBB is the block that is branched to, FBB is the fallthrough
1953  TUnpredCycles = TCycles + TakenBranchCost;
1954  FUnpredCycles = FCycles + NotTakenBranchCost;
1955  // The branch at the end of FBB will disappear when it's predicated, so
1956  // discount it from PredCost.
1957  PredCost -= 1 * ScalingUpFactor;
1958  }
1959  // The total cost is the cost of each path scaled by their probabilites
1960  unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
1961  unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
1962  UnpredCost = TUnpredCost + FUnpredCost;
1963  // When predicating assume that the first IT can be folded away but later
1964  // ones cost one cycle each
1965  if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
1966  PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
1967  }
1968  } else {
1969  unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
1970  unsigned FUnpredCost =
1971  Probability.getCompl().scale(FCycles * ScalingUpFactor);
1972  UnpredCost = TUnpredCost + FUnpredCost;
1973  UnpredCost += 1 * ScalingUpFactor; // The branch itself
1974  UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
1975  }
1976 
1977  return PredCost <= UnpredCost;
1978 }
1979 
1980 bool
1982  MachineBasicBlock &FMBB) const {
1983  // Reduce false anti-dependencies to let the target's out-of-order execution
1984  // engine do its thing.
1985  return Subtarget.isProfitableToUnpredicate();
1986 }
1987 
1988 /// getInstrPredicate - If instruction is predicated, returns its predicate
1989 /// condition, otherwise returns AL. It also returns the condition code
1990 /// register by reference.
1992  unsigned &PredReg) {
1993  int PIdx = MI.findFirstPredOperandIdx();
1994  if (PIdx == -1) {
1995  PredReg = 0;
1996  return ARMCC::AL;
1997  }
1998 
1999  PredReg = MI.getOperand(PIdx+1).getReg();
2000  return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
2001 }
2002 
2003 unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {
2004  if (Opc == ARM::B)
2005  return ARM::Bcc;
2006  if (Opc == ARM::tB)
2007  return ARM::tBcc;
2008  if (Opc == ARM::t2B)
2009  return ARM::t2Bcc;
2010 
2011  llvm_unreachable("Unknown unconditional branch opcode!");
2012 }
2013 
2015  bool NewMI,
2016  unsigned OpIdx1,
2017  unsigned OpIdx2) const {
2018  switch (MI.getOpcode()) {
2019  case ARM::MOVCCr:
2020  case ARM::t2MOVCCr: {
2021  // MOVCC can be commuted by inverting the condition.
2022  unsigned PredReg = 0;
2023  ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
2024  // MOVCC AL can't be inverted. Shouldn't happen.
2025  if (CC == ARMCC::AL || PredReg != ARM::CPSR)
2026  return nullptr;
2027  MachineInstr *CommutedMI =
2028  TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2029  if (!CommutedMI)
2030  return nullptr;
2031  // After swapping the MOVCC operands, also invert the condition.
2032  CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
2033  .setImm(ARMCC::getOppositeCondition(CC));
2034  return CommutedMI;
2035  }
2036  }
2037  return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2038 }
2039 
2040 /// Identify instructions that can be folded into a MOVCC instruction, and
2041 /// return the defining instruction.
2043  const MachineRegisterInfo &MRI,
2044  const TargetInstrInfo *TII) {
2046  return nullptr;
2047  if (!MRI.hasOneNonDBGUse(Reg))
2048  return nullptr;
2049  MachineInstr *MI = MRI.getVRegDef(Reg);
2050  if (!MI)
2051  return nullptr;
2052  // MI is folded into the MOVCC by predicating it.
2053  if (!MI->isPredicable())
2054  return nullptr;
2055  // Check if MI has any non-dead defs or physreg uses. This also detects
2056  // predicated instructions which will be reading CPSR.
2057  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
2058  const MachineOperand &MO = MI->getOperand(i);
2059  // Reject frame index operands, PEI can't handle the predicated pseudos.
2060  if (MO.isFI() || MO.isCPI() || MO.isJTI())
2061  return nullptr;
2062  if (!MO.isReg())
2063  continue;
2064  // MI can't have any tied operands, that would conflict with predication.
2065  if (MO.isTied())
2066  return nullptr;
2068  return nullptr;
2069  if (MO.isDef() && !MO.isDead())
2070  return nullptr;
2071  }
2072  bool DontMoveAcrossStores = true;
2073  if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
2074  return nullptr;
2075  return MI;
2076 }
2077 
2080  unsigned &TrueOp, unsigned &FalseOp,
2081  bool &Optimizable) const {
2082  assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2083  "Unknown select instruction");
2084  // MOVCC operands:
2085  // 0: Def.
2086  // 1: True use.
2087  // 2: False use.
2088  // 3: Condition code.
2089  // 4: CPSR use.
2090  TrueOp = 1;
2091  FalseOp = 2;
2092  Cond.push_back(MI.getOperand(3));
2093  Cond.push_back(MI.getOperand(4));
2094  // We can always fold a def.
2095  Optimizable = true;
2096  return false;
2097 }
2098 
2099 MachineInstr *
2102  bool PreferFalse) const {
2103  assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2104  "Unknown select instruction");
2107  bool Invert = !DefMI;
2108  if (!DefMI)
2109  DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
2110  if (!DefMI)
2111  return nullptr;
2112 
2113  // Find new register class to use.
2114  MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
2115  unsigned DestReg = MI.getOperand(0).getReg();
2116  const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
2117  if (!MRI.constrainRegClass(DestReg, PreviousClass))
2118  return nullptr;
2119 
2120  // Create a new predicated version of DefMI.
2121  // Rfalse is the first use.
2122  MachineInstrBuilder NewMI =
2123  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
2124 
2125  // Copy all the DefMI operands, excluding its (null) predicate.
2126  const MCInstrDesc &DefDesc = DefMI->getDesc();
2127  for (unsigned i = 1, e = DefDesc.getNumOperands();
2128  i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
2129  NewMI.add(DefMI->getOperand(i));
2130 
2131  unsigned CondCode = MI.getOperand(3).getImm();
2132  if (Invert)
2134  else
2135  NewMI.addImm(CondCode);
2136  NewMI.add(MI.getOperand(4));
2137 
2138  // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
2139  if (NewMI->hasOptionalDef())
2140  NewMI.add(condCodeOp());
2141 
2142  // The output register value when the predicate is false is an implicit
2143  // register operand tied to the first def.
2144  // The tie makes the register allocator ensure the FalseReg is allocated the
2145  // same register as operand 0.
2146  FalseReg.setImplicit();
2147  NewMI.add(FalseReg);
2148  NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
2149 
2150  // Update SeenMIs set: register newly created MI and erase removed DefMI.
2151  SeenMIs.insert(NewMI);
2152  SeenMIs.erase(DefMI);
2153 
2154  // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
2155  // DefMI would be invalid when tranferred inside the loop. Checking for a
2156  // loop is expensive, but at least remove kill flags if they are in different
2157  // BBs.
2158  if (DefMI->getParent() != MI.getParent())
2159  NewMI->clearKillInfo();
2160 
2161  // The caller will erase MI, but not DefMI.
2162  DefMI->eraseFromParent();
2163  return NewMI;
2164 }
2165 
2166 /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
2167 /// instruction is encoded with an 'S' bit is determined by the optional CPSR
2168 /// def operand.
2169 ///
2170 /// This will go away once we can teach tblgen how to set the optional CPSR def
2171 /// operand itself.
2173  uint16_t PseudoOpc;
2174  uint16_t MachineOpc;
2175 };
2176 
2178  {ARM::ADDSri, ARM::ADDri},
2179  {ARM::ADDSrr, ARM::ADDrr},
2180  {ARM::ADDSrsi, ARM::ADDrsi},
2181  {ARM::ADDSrsr, ARM::ADDrsr},
2182 
2183  {ARM::SUBSri, ARM::SUBri},
2184  {ARM::SUBSrr, ARM::SUBrr},
2185  {ARM::SUBSrsi, ARM::SUBrsi},
2186  {ARM::SUBSrsr, ARM::SUBrsr},
2187 
2188  {ARM::RSBSri, ARM::RSBri},
2189  {ARM::RSBSrsi, ARM::RSBrsi},
2190  {ARM::RSBSrsr, ARM::RSBrsr},
2191 
2192  {ARM::tADDSi3, ARM::tADDi3},
2193  {ARM::tADDSi8, ARM::tADDi8},
2194  {ARM::tADDSrr, ARM::tADDrr},
2195  {ARM::tADCS, ARM::tADC},
2196 
2197  {ARM::tSUBSi3, ARM::tSUBi3},
2198  {ARM::tSUBSi8, ARM::tSUBi8},
2199  {ARM::tSUBSrr, ARM::tSUBrr},
2200  {ARM::tSBCS, ARM::tSBC},
2201  {ARM::tRSBS, ARM::tRSB},
2202 
2203  {ARM::t2ADDSri, ARM::t2ADDri},
2204  {ARM::t2ADDSrr, ARM::t2ADDrr},
2205  {ARM::t2ADDSrs, ARM::t2ADDrs},
2206 
2207  {ARM::t2SUBSri, ARM::t2SUBri},
2208  {ARM::t2SUBSrr, ARM::t2SUBrr},
2209  {ARM::t2SUBSrs, ARM::t2SUBrs},
2210 
2211  {ARM::t2RSBSri, ARM::t2RSBri},
2212  {ARM::t2RSBSrs, ARM::t2RSBrs},
2213 };
2214 
2215 unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
2216  for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i)
2217  if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc)
2218  return AddSubFlagsOpcodeMap[i].MachineOpc;
2219  return 0;
2220 }
2221 
2224  const DebugLoc &dl, unsigned DestReg,
2225  unsigned BaseReg, int NumBytes,
2226  ARMCC::CondCodes Pred, unsigned PredReg,
2227  const ARMBaseInstrInfo &TII,
2228  unsigned MIFlags) {
2229  if (NumBytes == 0 && DestReg != BaseReg) {
2230  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
2231  .addReg(BaseReg, RegState::Kill)
2232  .add(predOps(Pred, PredReg))
2233  .add(condCodeOp())
2234  .setMIFlags(MIFlags);
2235  return;
2236  }
2237 
2238  bool isSub = NumBytes < 0;
2239  if (isSub) NumBytes = -NumBytes;
2240 
2241  while (NumBytes) {
2242  unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
2243  unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
2244  assert(ThisVal && "Didn't extract field correctly");
2245 
2246  // We will handle these bits from offset, clear them.
2247  NumBytes &= ~ThisVal;
2248 
2249  assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
2250 
2251  // Build the new ADD / SUB.
2252  unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
2253  BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
2254  .addReg(BaseReg, RegState::Kill)
2255  .addImm(ThisVal)
2256  .add(predOps(Pred, PredReg))
2257  .add(condCodeOp())
2258  .setMIFlags(MIFlags);
2259  BaseReg = DestReg;
2260  }
2261 }
2262 
2264  MachineFunction &MF, MachineInstr *MI,
2265  unsigned NumBytes) {
2266  // This optimisation potentially adds lots of load and store
2267  // micro-operations, it's only really a great benefit to code-size.
2268  if (!MF.getFunction().optForMinSize())
2269  return false;
2270 
2271  // If only one register is pushed/popped, LLVM can use an LDR/STR
2272  // instead. We can't modify those so make sure we're dealing with an
2273  // instruction we understand.
2274  bool IsPop = isPopOpcode(MI->getOpcode());
2275  bool IsPush = isPushOpcode(MI->getOpcode());
2276  if (!IsPush && !IsPop)
2277  return false;
2278 
2279  bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
2280  MI->getOpcode() == ARM::VLDMDIA_UPD;
2281  bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
2282  MI->getOpcode() == ARM::tPOP ||
2283  MI->getOpcode() == ARM::tPOP_RET;
2284 
2285  assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
2286  MI->getOperand(1).getReg() == ARM::SP)) &&
2287  "trying to fold sp update into non-sp-updating push/pop");
2288 
2289  // The VFP push & pop act on D-registers, so we can only fold an adjustment
2290  // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
2291  // if this is violated.
2292  if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
2293  return false;
2294 
2295  // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
2296  // pred) so the list starts at 4. Thumb1 starts after the predicate.
2297  int RegListIdx = IsT1PushPop ? 2 : 4;
2298 
2299  // Calculate the space we'll need in terms of registers.
2300  unsigned RegsNeeded;
2301  const TargetRegisterClass *RegClass;
2302  if (IsVFPPushPop) {
2303  RegsNeeded = NumBytes / 8;
2304  RegClass = &ARM::DPRRegClass;
2305  } else {
2306  RegsNeeded = NumBytes / 4;
2307  RegClass = &ARM::GPRRegClass;
2308  }
2309 
2310  // We're going to have to strip all list operands off before
2311  // re-adding them since the order matters, so save the existing ones
2312  // for later.
2314 
2315  // We're also going to need the first register transferred by this
2316  // instruction, which won't necessarily be the first register in the list.
2317  unsigned FirstRegEnc = -1;
2318 
2320  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
2321  MachineOperand &MO = MI->getOperand(i);
2322  RegList.push_back(MO);
2323 
2324  if (MO.isReg() && TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
2325  FirstRegEnc = TRI->getEncodingValue(MO.getReg());
2326  }
2327 
2328  const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
2329 
2330  // Now try to find enough space in the reglist to allocate NumBytes.
2331  for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
2332  --CurRegEnc) {
2333  unsigned CurReg = RegClass->getRegister(CurRegEnc);
2334  if (!IsPop) {
2335  // Pushing any register is completely harmless, mark the register involved
2336  // as undef since we don't care about its value and must not restore it
2337  // during stack unwinding.
2338  RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
2339  false, false, true));
2340  --RegsNeeded;
2341  continue;
2342  }
2343 
2344  // However, we can only pop an extra register if it's not live. For
2345  // registers live within the function we might clobber a return value
2346  // register; the other way a register can be live here is if it's
2347  // callee-saved.
2348  if (isCalleeSavedRegister(CurReg, CSRegs) ||
2349  MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
2351  // VFP pops don't allow holes in the register list, so any skip is fatal
2352  // for our transformation. GPR pops do, so we should just keep looking.
2353  if (IsVFPPushPop)
2354  return false;
2355  else
2356  continue;
2357  }
2358 
2359  // Mark the unimportant registers as <def,dead> in the POP.
2360  RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
2361  true));
2362  --RegsNeeded;
2363  }
2364 
2365  if (RegsNeeded > 0)
2366  return false;
2367 
2368  // Finally we know we can profitably perform the optimisation so go
2369  // ahead: strip all existing registers off and add them back again
2370  // in the right order.
2371  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
2372  MI->RemoveOperand(i);
2373 
2374  // Add the complete list back in.
2375  MachineInstrBuilder MIB(MF, &*MI);
2376  for (int i = RegList.size() - 1; i >= 0; --i)
2377  MIB.add(RegList[i]);
2378 
2379  return true;
2380 }
2381 
2382 bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2383  unsigned FrameReg, int &Offset,
2384  const ARMBaseInstrInfo &TII) {
2385  unsigned Opcode = MI.getOpcode();
2386  const MCInstrDesc &Desc = MI.getDesc();
2387  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2388  bool isSub = false;
2389 
2390  // Memory operands in inline assembly always use AddrMode2.
2391  if (Opcode == ARM::INLINEASM)
2392  AddrMode = ARMII::AddrMode2;
2393 
2394  if (Opcode == ARM::ADDri) {
2395  Offset += MI.getOperand(FrameRegIdx+1).getImm();
2396  if (Offset == 0) {
2397  // Turn it into a move.
2398  MI.setDesc(TII.get(ARM::MOVr));
2399  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2400  MI.RemoveOperand(FrameRegIdx+1);
2401  Offset = 0;
2402  return true;
2403  } else if (Offset < 0) {
2404  Offset = -Offset;
2405  isSub = true;
2406  MI.setDesc(TII.get(ARM::SUBri));
2407  }
2408 
2409  // Common case: small offset, fits into instruction.
2410  if (ARM_AM::getSOImmVal(Offset) != -1) {
2411  // Replace the FrameIndex with sp / fp
2412  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2413  MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
2414  Offset = 0;
2415  return true;
2416  }
2417 
2418  // Otherwise, pull as much of the immedidate into this ADDri/SUBri
2419  // as possible.
2420  unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
2421  unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
2422 
2423  // We will handle these bits from offset, clear them.
2424  Offset &= ~ThisImmVal;
2425 
2426  // Get the properly encoded SOImmVal field.
2427  assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2428  "Bit extraction didn't work?");
2429  MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
2430  } else {
2431  unsigned ImmIdx = 0;
2432  int InstrOffs = 0;
2433  unsigned NumBits = 0;
2434  unsigned Scale = 1;
2435  switch (AddrMode) {
2436  case ARMII::AddrMode_i12:
2437  ImmIdx = FrameRegIdx + 1;
2438  InstrOffs = MI.getOperand(ImmIdx).getImm();
2439  NumBits = 12;
2440  break;
2441  case ARMII::AddrMode2:
2442  ImmIdx = FrameRegIdx+2;
2443  InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
2444  if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2445  InstrOffs *= -1;
2446  NumBits = 12;
2447  break;
2448  case ARMII::AddrMode3:
2449  ImmIdx = FrameRegIdx+2;
2450  InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
2451  if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2452  InstrOffs *= -1;
2453  NumBits = 8;
2454  break;
2455  case ARMII::AddrMode4:
2456  case ARMII::AddrMode6:
2457  // Can't fold any offset even if it's zero.
2458  return false;
2459  case ARMII::AddrMode5:
2460  ImmIdx = FrameRegIdx+1;
2461  InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2462  if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2463  InstrOffs *= -1;
2464  NumBits = 8;
2465  Scale = 4;
2466  break;
2467  case ARMII::AddrMode5FP16:
2468  ImmIdx = FrameRegIdx+1;
2469  InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2470  if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2471  InstrOffs *= -1;
2472  NumBits = 8;
2473  Scale = 2;
2474  break;
2475  default:
2476  llvm_unreachable("Unsupported addressing mode!");
2477  }
2478 
2479  Offset += InstrOffs * Scale;
2480  assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2481  if (Offset < 0) {
2482  Offset = -Offset;
2483  isSub = true;
2484  }
2485 
2486  // Attempt to fold address comp. if opcode has offset bits
2487  if (NumBits > 0) {
2488  // Common case: small offset, fits into instruction.
2489  MachineOperand &ImmOp = MI.getOperand(ImmIdx);
2490  int ImmedOffset = Offset / Scale;
2491  unsigned Mask = (1 << NumBits) - 1;
2492  if ((unsigned)Offset <= Mask * Scale) {
2493  // Replace the FrameIndex with sp
2494  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2495  // FIXME: When addrmode2 goes away, this will simplify (like the
2496  // T2 version), as the LDR.i12 versions don't need the encoding
2497  // tricks for the offset value.
2498  if (isSub) {
2499  if (AddrMode == ARMII::AddrMode_i12)
2500  ImmedOffset = -ImmedOffset;
2501  else
2502  ImmedOffset |= 1 << NumBits;
2503  }
2504  ImmOp.ChangeToImmediate(ImmedOffset);
2505  Offset = 0;
2506  return true;
2507  }
2508 
2509  // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2510  ImmedOffset = ImmedOffset & Mask;
2511  if (isSub) {
2512  if (AddrMode == ARMII::AddrMode_i12)
2513  ImmedOffset = -ImmedOffset;
2514  else
2515  ImmedOffset |= 1 << NumBits;
2516  }
2517  ImmOp.ChangeToImmediate(ImmedOffset);
2518  Offset &= ~(Mask*Scale);
2519  }
2520  }
2521 
2522  Offset = (isSub) ? -Offset : Offset;
2523  return Offset == 0;
2524 }
2525 
2526 /// analyzeCompare - For a comparison instruction, return the source registers
2527 /// in SrcReg and SrcReg2 if having two register operands, and the value it
2528 /// compares against in CmpValue. Return true if the comparison instruction
2529 /// can be analyzed.
2530 bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
2531  unsigned &SrcReg2, int &CmpMask,
2532  int &CmpValue) const {
2533  switch (MI.getOpcode()) {
2534  default: break;
2535  case ARM::CMPri:
2536  case ARM::t2CMPri:
2537  case ARM::tCMPi8:
2538  SrcReg = MI.getOperand(0).getReg();
2539  SrcReg2 = 0;
2540  CmpMask = ~0;
2541  CmpValue = MI.getOperand(1).getImm();
2542  return true;
2543  case ARM::CMPrr:
2544  case ARM::t2CMPrr:
2545  SrcReg = MI.getOperand(0).getReg();
2546  SrcReg2 = MI.getOperand(1).getReg();
2547  CmpMask = ~0;
2548  CmpValue = 0;
2549  return true;
2550  case ARM::TSTri:
2551  case ARM::t2TSTri:
2552  SrcReg = MI.getOperand(0).getReg();
2553  SrcReg2 = 0;
2554  CmpMask = MI.getOperand(1).getImm();
2555  CmpValue = 0;
2556  return true;
2557  }
2558 
2559  return false;
2560 }
2561 
2562 /// isSuitableForMask - Identify a suitable 'and' instruction that
2563 /// operates on the given source register and applies the same mask
2564 /// as a 'tst' instruction. Provide a limited look-through for copies.
2565 /// When successful, MI will hold the found instruction.
2566 static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
2567  int CmpMask, bool CommonUse) {
2568  switch (MI->getOpcode()) {
2569  case ARM::ANDri:
2570  case ARM::t2ANDri:
2571  if (CmpMask != MI->getOperand(2).getImm())
2572  return false;
2573  if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
2574  return true;
2575  break;
2576  }
2577 
2578  return false;
2579 }
2580 
2581 /// getSwappedCondition - assume the flags are set by MI(a,b), return
2582 /// the condition code if we modify the instructions such that flags are
2583 /// set by MI(b,a).
2585  switch (CC) {
2586  default: return ARMCC::AL;
2587  case ARMCC::EQ: return ARMCC::EQ;
2588  case ARMCC::NE: return ARMCC::NE;
2589  case ARMCC::HS: return ARMCC::LS;
2590  case ARMCC::LO: return ARMCC::HI;
2591  case ARMCC::HI: return ARMCC::LO;
2592  case ARMCC::LS: return ARMCC::HS;
2593  case ARMCC::GE: return ARMCC::LE;
2594  case ARMCC::LT: return ARMCC::GT;
2595  case ARMCC::GT: return ARMCC::LT;
2596  case ARMCC::LE: return ARMCC::GE;
2597  }
2598 }
2599 
2600 /// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
2601 /// the condition code if we modify the instructions such that flags are
2602 /// set by ADD(a,b,X).
2604  switch (CC) {
2605  default: return ARMCC::AL;
2606  case ARMCC::HS: return ARMCC::LO;
2607  case ARMCC::LO: return ARMCC::HS;
2608  case ARMCC::VS: return ARMCC::VS;
2609  case ARMCC::VC: return ARMCC::VC;
2610  }
2611 }
2612 
2613 /// isRedundantFlagInstr - check whether the first instruction, whose only
2614 /// purpose is to update flags, can be made redundant.
2615 /// CMPrr can be made redundant by SUBrr if the operands are the same.
2616 /// CMPri can be made redundant by SUBri if the operands are the same.
2617 /// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
2618 /// This function can be extended later on.
2619 inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
2620  unsigned SrcReg, unsigned SrcReg2,
2621  int ImmValue, const MachineInstr *OI) {
2622  if ((CmpI->getOpcode() == ARM::CMPrr ||
2623  CmpI->getOpcode() == ARM::t2CMPrr) &&
2624  (OI->getOpcode() == ARM::SUBrr ||
2625  OI->getOpcode() == ARM::t2SUBrr) &&
2626  ((OI->getOperand(1).getReg() == SrcReg &&
2627  OI->getOperand(2).getReg() == SrcReg2) ||
2628  (OI->getOperand(1).getReg() == SrcReg2 &&
2629  OI->getOperand(2).getReg() == SrcReg)))
2630  return true;
2631 
2632  if ((CmpI->getOpcode() == ARM::CMPri ||
2633  CmpI->getOpcode() == ARM::t2CMPri) &&
2634  (OI->getOpcode() == ARM::SUBri ||
2635  OI->getOpcode() == ARM::t2SUBri) &&
2636  OI->getOperand(1).getReg() == SrcReg &&
2637  OI->getOperand(2).getImm() == ImmValue)
2638  return true;
2639 
2640  if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2641  (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
2642  OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
2643  OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
2644  OI->getOperand(0).getReg() == SrcReg &&
2645  OI->getOperand(1).getReg() == SrcReg2)
2646  return true;
2647  return false;
2648 }
2649 
2650 static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
2651  switch (MI->getOpcode()) {
2652  default: return false;
2653  case ARM::tLSLri:
2654  case ARM::tLSRri:
2655  case ARM::tLSLrr:
2656  case ARM::tLSRrr:
2657  case ARM::tSUBrr:
2658  case ARM::tADDrr:
2659  case ARM::tADDi3:
2660  case ARM::tADDi8:
2661  case ARM::tSUBi3:
2662  case ARM::tSUBi8:
2663  case ARM::tMUL:
2664  IsThumb1 = true;
2666  case ARM::RSBrr:
2667  case ARM::RSBri:
2668  case ARM::RSCrr:
2669  case ARM::RSCri:
2670  case ARM::ADDrr:
2671  case ARM::ADDri:
2672  case ARM::ADCrr:
2673  case ARM::ADCri:
2674  case ARM::SUBrr:
2675  case ARM::SUBri:
2676  case ARM::SBCrr:
2677  case ARM::SBCri:
2678  case ARM::t2RSBri:
2679  case ARM::t2ADDrr:
2680  case ARM::t2ADDri:
2681  case ARM::t2ADCrr:
2682  case ARM::t2ADCri:
2683  case ARM::t2SUBrr:
2684  case ARM::t2SUBri:
2685  case ARM::t2SBCrr:
2686  case ARM::t2SBCri:
2687  case ARM::ANDrr:
2688  case ARM::ANDri:
2689  case ARM::t2ANDrr:
2690  case ARM::t2ANDri:
2691  case ARM::ORRrr:
2692  case ARM::ORRri:
2693  case ARM::t2ORRrr:
2694  case ARM::t2ORRri:
2695  case ARM::EORrr:
2696  case ARM::EORri:
2697  case ARM::t2EORrr:
2698  case ARM::t2EORri:
2699  case ARM::t2LSRri:
2700  case ARM::t2LSRrr:
2701  case ARM::t2LSLri:
2702  case ARM::t2LSLrr:
2703  return true;
2704  }
2705 }
2706 
2707 /// optimizeCompareInstr - Convert the instruction supplying the argument to the
2708 /// comparison into one that sets the zero bit in the flags register;
2709 /// Remove a redundant Compare instruction if an earlier instruction can set the
2710 /// flags in the same way as Compare.
2711 /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
2712 /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
2713 /// condition code of instructions which use the flags.
2715  MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
2716  int CmpValue, const MachineRegisterInfo *MRI) const {
2717  // Get the unique definition of SrcReg.
2718  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2719  if (!MI) return false;
2720 
2721  // Masked compares sometimes use the same register as the corresponding 'and'.
2722  if (CmpMask != ~0) {
2723  if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) {
2724  MI = nullptr;
2726  UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
2727  UI != UE; ++UI) {
2728  if (UI->getParent() != CmpInstr.getParent())
2729  continue;
2730  MachineInstr *PotentialAND = &*UI;
2731  if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
2732  isPredicated(*PotentialAND))
2733  continue;
2734  MI = PotentialAND;
2735  break;
2736  }
2737  if (!MI) return false;
2738  }
2739  }
2740 
2741  // Get ready to iterate backward from CmpInstr.
2742  MachineBasicBlock::iterator I = CmpInstr, E = MI,
2743  B = CmpInstr.getParent()->begin();
2744 
2745  // Early exit if CmpInstr is at the beginning of the BB.
2746  if (I == B) return false;
2747 
2748  // There are two possible candidates which can be changed to set CPSR:
2749  // One is MI, the other is a SUB or ADD instruction.
2750  // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
2751  // ADDr[ri](r1, r2, X).
2752  // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
2753  MachineInstr *SubAdd = nullptr;
2754  if (SrcReg2 != 0)
2755  // MI is not a candidate for CMPrr.
2756  MI = nullptr;
2757  else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
2758  // Conservatively refuse to convert an instruction which isn't in the same
2759  // BB as the comparison.
2760  // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
2761  // Thus we cannot return here.
2762  if (CmpInstr.getOpcode() == ARM::CMPri ||
2763  CmpInstr.getOpcode() == ARM::t2CMPri)
2764  MI = nullptr;
2765  else
2766  return false;
2767  }
2768 
2769  bool IsThumb1 = false;
2770  if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
2771  return false;
2772 
2773  // We also want to do this peephole for cases like this: if (a*b == 0),
2774  // and optimise away the CMP instruction from the generated code sequence:
2775  // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
2776  // resulting from the select instruction, but these MOVS instructions for
2777  // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
2778  // However, if we only have MOVS instructions in between the CMP and the
2779  // other instruction (the MULS in this example), then the CPSR is dead so we
2780  // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
2781  // reordering and then continue the analysis hoping we can eliminate the
2782  // CMP. This peephole works on the vregs, so is still in SSA form. As a
2783  // consequence, the movs won't redefine/kill the MUL operands which would
2784  // make this reordering illegal.
2785  if (MI && IsThumb1) {
2786  --I;
2787  bool CanReorder = true;
2788  const bool HasStmts = I != E;
2789  for (; I != E; --I) {
2790  if (I->getOpcode() != ARM::tMOVi8) {
2791  CanReorder = false;
2792  break;
2793  }
2794  }
2795  if (HasStmts && CanReorder) {
2796  MI = MI->removeFromParent();
2797  E = CmpInstr;
2798  CmpInstr.getParent()->insert(E, MI);
2799  }
2800  I = CmpInstr;
2801  E = MI;
2802  }
2803 
2804  // Check that CPSR isn't set between the comparison instruction and the one we
2805  // want to change. At the same time, search for SubAdd.
2807  do {
2808  const MachineInstr &Instr = *--I;
2809 
2810  // Check whether CmpInstr can be made redundant by the current instruction.
2811  if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr)) {
2812  SubAdd = &*I;
2813  break;
2814  }
2815 
2816  // Allow E (which was initially MI) to be SubAdd but do not search before E.
2817  if (I == E)
2818  break;
2819 
2820  if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
2821  Instr.readsRegister(ARM::CPSR, TRI))
2822  // This instruction modifies or uses CPSR after the one we want to
2823  // change. We can't do this transformation.
2824  return false;
2825 
2826  } while (I != B);
2827 
2828  // Return false if no candidates exist.
2829  if (!MI && !SubAdd)
2830  return false;
2831 
2832  // The single candidate is called MI.
2833  if (!MI) MI = SubAdd;
2834 
2835  // We can't use a predicated instruction - it doesn't always write the flags.
2836  if (isPredicated(*MI))
2837  return false;
2838 
2839  // Scan forward for the use of CPSR
2840  // When checking against MI: if it's a conditional code that requires
2841  // checking of the V bit or C bit, then this is not safe to do.
2842  // It is safe to remove CmpInstr if CPSR is redefined or killed.
2843  // If we are done with the basic block, we need to check whether CPSR is
2844  // live-out.
2846  OperandsToUpdate;
2847  bool isSafe = false;
2848  I = CmpInstr;
2849  E = CmpInstr.getParent()->end();
2850  while (!isSafe && ++I != E) {
2851  const MachineInstr &Instr = *I;
2852  for (unsigned IO = 0, EO = Instr.getNumOperands();
2853  !isSafe && IO != EO; ++IO) {
2854  const MachineOperand &MO = Instr.getOperand(IO);
2855  if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
2856  isSafe = true;
2857  break;
2858  }
2859  if (!MO.isReg() || MO.getReg() != ARM::CPSR)
2860  continue;
2861  if (MO.isDef()) {
2862  isSafe = true;
2863  break;
2864  }
2865  // Condition code is after the operand before CPSR except for VSELs.
2866  ARMCC::CondCodes CC;
2867  bool IsInstrVSel = true;
2868  switch (Instr.getOpcode()) {
2869  default:
2870  IsInstrVSel = false;
2871  CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
2872  break;
2873  case ARM::VSELEQD:
2874  case ARM::VSELEQS:
2875  CC = ARMCC::EQ;
2876  break;
2877  case ARM::VSELGTD:
2878  case ARM::VSELGTS:
2879  CC = ARMCC::GT;
2880  break;
2881  case ARM::VSELGED:
2882  case ARM::VSELGES:
2883  CC = ARMCC::GE;
2884  break;
2885  case ARM::VSELVSS:
2886  case ARM::VSELVSD:
2887  CC = ARMCC::VS;
2888  break;
2889  }
2890 
2891  if (SubAdd) {
2892  // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
2893  // on CMP needs to be updated to be based on SUB.
2894  // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
2895  // needs to be modified.
2896  // Push the condition code operands to OperandsToUpdate.
2897  // If it is safe to remove CmpInstr, the condition code of these
2898  // operands will be modified.
2899  unsigned Opc = SubAdd->getOpcode();
2900  bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
2901  Opc == ARM::SUBri || Opc == ARM::t2SUBri;
2902  if (!IsSub || (SrcReg2 != 0 && SubAdd->getOperand(1).getReg() == SrcReg2 &&
2903  SubAdd->getOperand(2).getReg() == SrcReg)) {
2904  // VSel doesn't support condition code update.
2905  if (IsInstrVSel)
2906  return false;
2907  // Ensure we can swap the condition.
2908  ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC));
2909  if (NewCC == ARMCC::AL)
2910  return false;
2911  OperandsToUpdate.push_back(
2912  std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
2913  }
2914  } else {
2915  // No SubAdd, so this is x = <op> y, z; cmp x, 0.
2916  switch (CC) {
2917  case ARMCC::EQ: // Z
2918  case ARMCC::NE: // Z
2919  case ARMCC::MI: // N
2920  case ARMCC::PL: // N
2921  case ARMCC::AL: // none
2922  // CPSR can be used multiple times, we should continue.
2923  break;
2924  case ARMCC::HS: // C
2925  case ARMCC::LO: // C
2926  case ARMCC::VS: // V
2927  case ARMCC::VC: // V
2928  case ARMCC::HI: // C Z
2929  case ARMCC::LS: // C Z
2930  case ARMCC::GE: // N V
2931  case ARMCC::LT: // N V
2932  case ARMCC::GT: // Z N V
2933  case ARMCC::LE: // Z N V
2934  // The instruction uses the V bit or C bit which is not safe.
2935  return false;
2936  }
2937  }
2938  }
2939  }
2940 
2941  // If CPSR is not killed nor re-defined, we should check whether it is
2942  // live-out. If it is live-out, do not optimize.
2943  if (!isSafe) {
2944  MachineBasicBlock *MBB = CmpInstr.getParent();
2946  SE = MBB->succ_end(); SI != SE; ++SI)
2947  if ((*SI)->isLiveIn(ARM::CPSR))
2948  return false;
2949  }
2950 
2951  // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
2952  // set CPSR so this is represented as an explicit output)
2953  if (!IsThumb1) {
2954  MI->getOperand(5).setReg(ARM::CPSR);
2955  MI->getOperand(5).setIsDef(true);
2956  }
2957  assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
2958  CmpInstr.eraseFromParent();
2959 
2960  // Modify the condition code of operands in OperandsToUpdate.
2961  // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
2962  // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
2963  for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
2964  OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
2965 
2966  MI->clearRegisterDeads(ARM::CPSR);
2967 
2968  return true;
2969 }
2970 
2972  // Do not sink MI if it might be used to optimize a redundant compare.
2973  // We heuristically only look at the instruction immediately following MI to
2974  // avoid potentially searching the entire basic block.
2975  if (isPredicated(MI))
2976  return true;
2978  ++Next;
2979  unsigned SrcReg, SrcReg2;
2980  int CmpMask, CmpValue;
2981  if (Next != MI.getParent()->end() &&
2982  analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
2983  isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI))
2984  return false;
2985  return true;
2986 }
2987 
2989  unsigned Reg,
2990  MachineRegisterInfo *MRI) const {
2991  // Fold large immediates into add, sub, or, xor.
2992  unsigned DefOpc = DefMI.getOpcode();
2993  if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
2994  return false;
2995  if (!DefMI.getOperand(1).isImm())
2996  // Could be t2MOVi32imm @xx
2997  return false;
2998 
2999  if (!MRI->hasOneNonDBGUse(Reg))
3000  return false;
3001 
3002  const MCInstrDesc &DefMCID = DefMI.getDesc();
3003  if (DefMCID.hasOptionalDef()) {
3004  unsigned NumOps = DefMCID.getNumOperands();
3005  const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
3006  if (MO.getReg() == ARM::CPSR && !MO.isDead())
3007  // If DefMI defines CPSR and it is not dead, it's obviously not safe
3008  // to delete DefMI.
3009  return false;
3010  }
3011 
3012  const MCInstrDesc &UseMCID = UseMI.getDesc();
3013  if (UseMCID.hasOptionalDef()) {
3014  unsigned NumOps = UseMCID.getNumOperands();
3015  if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
3016  // If the instruction sets the flag, do not attempt this optimization
3017  // since it may change the semantics of the code.
3018  return false;
3019  }
3020 
3021  unsigned UseOpc = UseMI.getOpcode();
3022  unsigned NewUseOpc = 0;
3023  uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();
3024  uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
3025  bool Commute = false;
3026  switch (UseOpc) {
3027  default: return false;
3028  case ARM::SUBrr:
3029  case ARM::ADDrr:
3030  case ARM::ORRrr:
3031  case ARM::EORrr:
3032  case ARM::t2SUBrr:
3033  case ARM::t2ADDrr:
3034  case ARM::t2ORRrr:
3035  case ARM::t2EORrr: {
3036  Commute = UseMI.getOperand(2).getReg() != Reg;
3037  switch (UseOpc) {
3038  default: break;
3039  case ARM::ADDrr:
3040  case ARM::SUBrr:
3041  if (UseOpc == ARM::SUBrr && Commute)
3042  return false;
3043 
3044  // ADD/SUB are special because they're essentially the same operation, so
3045  // we can handle a larger range of immediates.
3046  if (ARM_AM::isSOImmTwoPartVal(ImmVal))
3047  NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
3048  else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
3049  ImmVal = -ImmVal;
3050  NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
3051  } else
3052  return false;
3053  SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3054  SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3055  break;
3056  case ARM::ORRrr:
3057  case ARM::EORrr:
3058  if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
3059  return false;
3060  SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3061  SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3062  switch (UseOpc) {
3063  default: break;
3064  case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
3065  case ARM::EORrr: NewUseOpc = ARM::EORri; break;
3066  }
3067  break;
3068  case ARM::t2ADDrr:
3069  case ARM::t2SUBrr:
3070  if (UseOpc == ARM::t2SUBrr && Commute)
3071  return false;
3072 
3073  // ADD/SUB are special because they're essentially the same operation, so
3074  // we can handle a larger range of immediates.
3075  if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3076  NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2ADDri : ARM::t2SUBri;
3077  else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
3078  ImmVal = -ImmVal;
3079  NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2SUBri : ARM::t2ADDri;
3080  } else
3081  return false;
3082  SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3083  SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3084  break;
3085  case ARM::t2ORRrr:
3086  case ARM::t2EORrr:
3087  if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3088  return false;
3089  SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3090  SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3091  switch (UseOpc) {
3092  default: break;
3093  case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
3094  case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
3095  }
3096  break;
3097  }
3098  }
3099  }
3100 
3101  unsigned OpIdx = Commute ? 2 : 1;
3102  unsigned Reg1 = UseMI.getOperand(OpIdx).getReg();
3103  bool isKill = UseMI.getOperand(OpIdx).isKill();
3104  unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
3105  BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
3106  NewReg)
3107  .addReg(Reg1, getKillRegState(isKill))
3108  .addImm(SOImmValV1)
3109  .add(predOps(ARMCC::AL))
3110  .add(condCodeOp());
3111  UseMI.setDesc(get(NewUseOpc));
3112  UseMI.getOperand(1).setReg(NewReg);
3113  UseMI.getOperand(1).setIsKill();
3114  UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
3115  DefMI.eraseFromParent();
3116  return true;
3117 }
3118 
3119 static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
3120  const MachineInstr &MI) {
3121  switch (MI.getOpcode()) {
3122  default: {
3123  const MCInstrDesc &Desc = MI.getDesc();
3124  int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
3125  assert(UOps >= 0 && "bad # UOps");
3126  return UOps;
3127  }
3128 
3129  case ARM::LDRrs:
3130  case ARM::LDRBrs:
3131  case ARM::STRrs:
3132  case ARM::STRBrs: {
3133  unsigned ShOpVal = MI.getOperand(3).getImm();
3134  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3135  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3136  if (!isSub &&
3137  (ShImm == 0 ||
3138  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3139  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3140  return 1;
3141  return 2;
3142  }
3143 
3144  case ARM::LDRH:
3145  case ARM::STRH: {
3146  if (!MI.getOperand(2).getReg())
3147  return 1;
3148 
3149  unsigned ShOpVal = MI.getOperand(3).getImm();
3150  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3151  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3152  if (!isSub &&
3153  (ShImm == 0 ||
3154  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3155  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3156  return 1;
3157  return 2;
3158  }
3159 
3160  case ARM::LDRSB:
3161  case ARM::LDRSH:
3162  return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;
3163 
3164  case ARM::LDRSB_POST:
3165  case ARM::LDRSH_POST: {
3166  unsigned Rt = MI.getOperand(0).getReg();
3167  unsigned Rm = MI.getOperand(3).getReg();
3168  return (Rt == Rm) ? 4 : 3;
3169  }
3170 
3171  case ARM::LDR_PRE_REG:
3172  case ARM::LDRB_PRE_REG: {
3173  unsigned Rt = MI.getOperand(0).getReg();
3174  unsigned Rm = MI.getOperand(3).getReg();
3175  if (Rt == Rm)
3176  return 3;
3177  unsigned ShOpVal = MI.getOperand(4).getImm();
3178  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3179  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3180  if (!isSub &&
3181  (ShImm == 0 ||
3182  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3183  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3184  return 2;
3185  return 3;
3186  }
3187 
3188  case ARM::STR_PRE_REG:
3189  case ARM::STRB_PRE_REG: {
3190  unsigned ShOpVal = MI.getOperand(4).getImm();
3191  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3192  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3193  if (!isSub &&
3194  (ShImm == 0 ||
3195  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3196  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3197  return 2;
3198  return 3;
3199  }
3200 
3201  case ARM::LDRH_PRE:
3202  case ARM::STRH_PRE: {
3203  unsigned Rt = MI.getOperand(0).getReg();
3204  unsigned Rm = MI.getOperand(3).getReg();
3205  if (!Rm)
3206  return 2;
3207  if (Rt == Rm)
3208  return 3;
3209  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;
3210  }
3211 
3212  case ARM::LDR_POST_REG:
3213  case ARM::LDRB_POST_REG:
3214  case ARM::LDRH_POST: {
3215  unsigned Rt = MI.getOperand(0).getReg();
3216  unsigned Rm = MI.getOperand(3).getReg();
3217  return (Rt == Rm) ? 3 : 2;
3218  }
3219 
3220  case ARM::LDR_PRE_IMM:
3221  case ARM::LDRB_PRE_IMM:
3222  case ARM::LDR_POST_IMM:
3223  case ARM::LDRB_POST_IMM:
3224  case ARM::STRB_POST_IMM:
3225  case ARM::STRB_POST_REG:
3226  case ARM::STRB_PRE_IMM:
3227  case ARM::STRH_POST:
3228  case ARM::STR_POST_IMM:
3229  case ARM::STR_POST_REG:
3230  case ARM::STR_PRE_IMM:
3231  return 2;
3232 
3233  case ARM::LDRSB_PRE:
3234  case ARM::LDRSH_PRE: {
3235  unsigned Rm = MI.getOperand(3).getReg();
3236  if (Rm == 0)
3237  return 3;
3238  unsigned Rt = MI.getOperand(0).getReg();
3239  if (Rt == Rm)
3240  return 4;
3241  unsigned ShOpVal = MI.getOperand(4).getImm();
3242  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3243  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3244  if (!isSub &&
3245  (ShImm == 0 ||
3246  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3247  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3248  return 3;
3249  return 4;
3250  }
3251 
3252  case ARM::LDRD: {
3253  unsigned Rt = MI.getOperand(0).getReg();
3254  unsigned Rn = MI.getOperand(2).getReg();
3255  unsigned Rm = MI.getOperand(3).getReg();
3256  if (Rm)
3257  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3258  : 3;
3259  return (Rt == Rn) ? 3 : 2;
3260  }
3261 
3262  case ARM::STRD: {
3263  unsigned Rm = MI.getOperand(3).getReg();
3264  if (Rm)
3265  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3266  : 3;
3267  return 2;
3268  }
3269 
3270  case ARM::LDRD_POST:
3271  case ARM::t2LDRD_POST:
3272  return 3;
3273 
3274  case ARM::STRD_POST:
3275  case ARM::t2STRD_POST:
3276  return 4;
3277 
3278  case ARM::LDRD_PRE: {
3279  unsigned Rt = MI.getOperand(0).getReg();
3280  unsigned Rn = MI.getOperand(3).getReg();
3281  unsigned Rm = MI.getOperand(4).getReg();
3282  if (Rm)
3283  return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3284  : 4;
3285  return (Rt == Rn) ? 4 : 3;
3286  }
3287 
3288  case ARM::t2LDRD_PRE: {
3289  unsigned Rt = MI.getOperand(0).getReg();
3290  unsigned Rn = MI.getOperand(3).getReg();
3291  return (Rt == Rn) ? 4 : 3;
3292  }
3293 
3294  case ARM::STRD_PRE: {
3295  unsigned Rm = MI.getOperand(4).getReg();
3296  if (Rm)
3297  return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3298  : 4;
3299  return 3;
3300  }
3301 
3302  case ARM::t2STRD_PRE:
3303  return 3;
3304 
3305  case ARM::t2LDR_POST:
3306  case ARM::t2LDRB_POST:
3307  case ARM::t2LDRB_PRE:
3308  case ARM::t2LDRSBi12:
3309  case ARM::t2LDRSBi8:
3310  case ARM::t2LDRSBpci:
3311  case ARM::t2LDRSBs:
3312  case ARM::t2LDRH_POST:
3313  case ARM::t2LDRH_PRE:
3314  case ARM::t2LDRSBT:
3315  case ARM::t2LDRSB_POST:
3316  case ARM::t2LDRSB_PRE:
3317  case ARM::t2LDRSH_POST:
3318  case ARM::t2LDRSH_PRE:
3319  case ARM::t2LDRSHi12:
3320  case ARM::t2LDRSHi8:
3321  case ARM::t2LDRSHpci:
3322  case ARM::t2LDRSHs:
3323  return 2;
3324 
3325  case ARM::t2LDRDi8: {
3326  unsigned Rt = MI.getOperand(0).getReg();
3327  unsigned Rn = MI.getOperand(2).getReg();
3328  return (Rt == Rn) ? 3 : 2;
3329  }
3330 
3331  case ARM::t2STRB_POST:
3332  case ARM::t2STRB_PRE:
3333  case ARM::t2STRBs:
3334  case ARM::t2STRDi8:
3335  case ARM::t2STRH_POST:
3336  case ARM::t2STRH_PRE:
3337  case ARM::t2STRHs:
3338  case ARM::t2STR_POST:
3339  case ARM::t2STR_PRE:
3340  case ARM::t2STRs:
3341  return 2;
3342  }
3343 }
3344 
3345 // Return the number of 32-bit words loaded by LDM or stored by STM. If this
3346 // can't be easily determined return 0 (missing MachineMemOperand).
3347 //
3348 // FIXME: The current MachineInstr design does not support relying on machine
3349 // mem operands to determine the width of a memory access. Instead, we expect
3350 // the target to provide this information based on the instruction opcode and
3351 // operands. However, using MachineMemOperand is the best solution now for
3352 // two reasons:
3353 //
3354 // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
3355 // operands. This is much more dangerous than using the MachineMemOperand
3356 // sizes because CodeGen passes can insert/remove optional machine operands. In
3357 // fact, it's totally incorrect for preRA passes and appears to be wrong for
3358 // postRA passes as well.
3359 //
3360 // 2) getNumLDMAddresses is only used by the scheduling machine model and any
3361 // machine model that calls this should handle the unknown (zero size) case.
3362 //
3363 // Long term, we should require a target hook that verifies MachineMemOperand
3364 // sizes during MC lowering. That target hook should be local to MC lowering
3365 // because we can't ensure that it is aware of other MI forms. Doing this will
3366 // ensure that MachineMemOperands are correctly propagated through all passes.
3368  unsigned Size = 0;
3370  E = MI.memoperands_end();
3371  I != E; ++I) {
3372  Size += (*I)->getSize();
3373  }
3374  return Size / 4;
3375 }
3376 
3377 static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,
3378  unsigned NumRegs) {
3379  unsigned UOps = 1 + NumRegs; // 1 for address computation.
3380  switch (Opc) {
3381  default:
3382  break;
3383  case ARM::VLDMDIA_UPD:
3384  case ARM::VLDMDDB_UPD:
3385  case ARM::VLDMSIA_UPD:
3386  case ARM::VLDMSDB_UPD:
3387  case ARM::VSTMDIA_UPD:
3388  case ARM::VSTMDDB_UPD:
3389  case ARM::VSTMSIA_UPD:
3390  case ARM::VSTMSDB_UPD:
3391  case ARM::LDMIA_UPD:
3392  case ARM::LDMDA_UPD:
3393  case ARM::LDMDB_UPD:
3394  case ARM::LDMIB_UPD:
3395  case ARM::STMIA_UPD:
3396  case ARM::STMDA_UPD:
3397  case ARM::STMDB_UPD:
3398  case ARM::STMIB_UPD:
3399  case ARM::tLDMIA_UPD:
3400  case ARM::tSTMIA_UPD:
3401  case ARM::t2LDMIA_UPD:
3402  case ARM::t2LDMDB_UPD:
3403  case ARM::t2STMIA_UPD:
3404  case ARM::t2STMDB_UPD:
3405  ++UOps; // One for base register writeback.
3406  break;
3407  case ARM::LDMIA_RET:
3408  case ARM::tPOP_RET:
3409  case ARM::t2LDMIA_RET:
3410  UOps += 2; // One for base reg wb, one for write to pc.
3411  break;
3412  }
3413  return UOps;
3414 }
3415 
3417  const MachineInstr &MI) const {
3418  if (!ItinData || ItinData->isEmpty())
3419  return 1;
3420 
3421  const MCInstrDesc &Desc = MI.getDesc();
3422  unsigned Class = Desc.getSchedClass();
3423  int ItinUOps = ItinData->getNumMicroOps(Class);
3424  if (ItinUOps >= 0) {
3425  if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
3426  return getNumMicroOpsSwiftLdSt(ItinData, MI);
3427 
3428  return ItinUOps;
3429  }
3430 
3431  unsigned Opc = MI.getOpcode();
3432  switch (Opc) {
3433  default:
3434  llvm_unreachable("Unexpected multi-uops instruction!");
3435  case ARM::VLDMQIA:
3436  case ARM::VSTMQIA:
3437  return 2;
3438 
3439  // The number of uOps for load / store multiple are determined by the number
3440  // registers.
3441  //
3442  // On Cortex-A8, each pair of register loads / stores can be scheduled on the
3443  // same cycle. The scheduling for the first load / store must be done
3444  // separately by assuming the address is not 64-bit aligned.
3445  //
3446  // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
3447  // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
3448  // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
3449  case ARM::VLDMDIA:
3450  case ARM::VLDMDIA_UPD:
3451  case ARM::VLDMDDB_UPD:
3452  case ARM::VLDMSIA:
3453  case ARM::VLDMSIA_UPD:
3454  case ARM::VLDMSDB_UPD:
3455  case ARM::VSTMDIA:
3456  case ARM::VSTMDIA_UPD:
3457  case ARM::VSTMDDB_UPD:
3458  case ARM::VSTMSIA:
3459  case ARM::VSTMSIA_UPD:
3460  case ARM::VSTMSDB_UPD: {
3461  unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
3462  return (NumRegs / 2) + (NumRegs % 2) + 1;
3463  }
3464 
3465  case ARM::LDMIA_RET:
3466  case ARM::LDMIA:
3467  case ARM::LDMDA:
3468  case ARM::LDMDB:
3469  case ARM::LDMIB:
3470  case ARM::LDMIA_UPD:
3471  case ARM::LDMDA_UPD:
3472  case ARM::LDMDB_UPD:
3473  case ARM::LDMIB_UPD:
3474  case ARM::STMIA:
3475  case ARM::STMDA:
3476  case ARM::STMDB:
3477  case ARM::STMIB:
3478  case ARM::STMIA_UPD:
3479  case ARM::STMDA_UPD:
3480  case ARM::STMDB_UPD:
3481  case ARM::STMIB_UPD:
3482  case ARM::tLDMIA:
3483  case ARM::tLDMIA_UPD:
3484  case ARM::tSTMIA_UPD:
3485  case ARM::tPOP_RET:
3486  case ARM::tPOP:
3487  case ARM::tPUSH:
3488  case ARM::t2LDMIA_RET:
3489  case ARM::t2LDMIA:
3490  case ARM::t2LDMDB:
3491  case ARM::t2LDMIA_UPD:
3492  case ARM::t2LDMDB_UPD:
3493  case ARM::t2STMIA:
3494  case ARM::t2STMDB:
3495  case ARM::t2STMIA_UPD:
3496  case ARM::t2STMDB_UPD: {
3497  unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
3498  switch (Subtarget.getLdStMultipleTiming()) {
3500  return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs);
3502  // Assume the worst.
3503  return NumRegs;
3505  if (NumRegs < 4)
3506  return 2;
3507  // 4 registers would be issued: 2, 2.
3508  // 5 registers would be issued: 2, 2, 1.
3509  unsigned UOps = (NumRegs / 2);
3510  if (NumRegs % 2)
3511  ++UOps;
3512  return UOps;
3513  }
3515  unsigned UOps = (NumRegs / 2);
3516  // If there are odd number of registers or if it's not 64-bit aligned,
3517  // then it takes an extra AGU (Address Generation Unit) cycle.
3518  if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
3519  (*MI.memoperands_begin())->getAlignment() < 8)
3520  ++UOps;
3521  return UOps;
3522  }
3523  }
3524  }
3525  }
3526  llvm_unreachable("Didn't find the number of microops");
3527 }
3528 
3529 int
3530 ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
3531  const MCInstrDesc &DefMCID,
3532  unsigned DefClass,
3533  unsigned DefIdx, unsigned DefAlign) const {
3534  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3535  if (RegNo <= 0)
3536  // Def is the address writeback.
3537  return ItinData->getOperandCycle(DefClass, DefIdx);
3538 
3539  int DefCycle;
3540  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3541  // (regno / 2) + (regno % 2) + 1
3542  DefCycle = RegNo / 2 + 1;
3543  if (RegNo % 2)
3544  ++DefCycle;
3545  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3546  DefCycle = RegNo;
3547  bool isSLoad = false;
3548 
3549  switch (DefMCID.getOpcode()) {
3550  default: break;
3551  case ARM::VLDMSIA:
3552  case ARM::VLDMSIA_UPD:
3553  case ARM::VLDMSDB_UPD:
3554  isSLoad = true;
3555  break;
3556  }
3557 
3558  // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3559  // then it takes an extra cycle.
3560  if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
3561  ++DefCycle;
3562  } else {
3563  // Assume the worst.
3564  DefCycle = RegNo + 2;
3565  }
3566 
3567  return DefCycle;
3568 }
3569 
3571  unsigned BaseReg = MI.getOperand(0).getReg();
3572  for (unsigned i = 1, sz = MI.getNumOperands(); i < sz; ++i) {
3573  const auto &Op = MI.getOperand(i);
3574  if (Op.isReg() && Op.getReg() == BaseReg)
3575  return true;
3576  }
3577  return false;
3578 }
3579 unsigned
3581  // ins GPR:$Rn, $p (2xOp), reglist:$regs, variable_ops
3582  // (outs GPR:$wb), (ins GPR:$Rn, $p (2xOp), reglist:$regs, variable_ops)
3583  return MI.getNumOperands() + 1 - MI.getDesc().getNumOperands();
3584 }
3585 
3586 int
3587 ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
3588  const MCInstrDesc &DefMCID,
3589  unsigned DefClass,
3590  unsigned DefIdx, unsigned DefAlign) const {
3591  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3592  if (RegNo <= 0)
3593  // Def is the address writeback.
3594  return ItinData->getOperandCycle(DefClass, DefIdx);
3595 
3596  int DefCycle;
3597  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3598  // 4 registers would be issued: 1, 2, 1.
3599  // 5 registers would be issued: 1, 2, 2.
3600  DefCycle = RegNo / 2;
3601  if (DefCycle < 1)
3602  DefCycle = 1;
3603  // Result latency is issue cycle + 2: E2.
3604  DefCycle += 2;
3605  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3606  DefCycle = (RegNo / 2);
3607  // If there are odd number of registers or if it's not 64-bit aligned,
3608  // then it takes an extra AGU (Address Generation Unit) cycle.
3609  if ((RegNo % 2) || DefAlign < 8)
3610  ++DefCycle;
3611  // Result latency is AGU cycles + 2.
3612  DefCycle += 2;
3613  } else {
3614  // Assume the worst.
3615  DefCycle = RegNo + 2;
3616  }
3617 
3618  return DefCycle;
3619 }
3620 
3621 int
3622 ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
3623  const MCInstrDesc &UseMCID,
3624  unsigned UseClass,
3625  unsigned UseIdx, unsigned UseAlign) const {
3626  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3627  if (RegNo <= 0)
3628  return ItinData->getOperandCycle(UseClass, UseIdx);
3629 
3630  int UseCycle;
3631  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3632  // (regno / 2) + (regno % 2) + 1
3633  UseCycle = RegNo / 2 + 1;
3634  if (RegNo % 2)
3635  ++UseCycle;
3636  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3637  UseCycle = RegNo;
3638  bool isSStore = false;
3639 
3640  switch (UseMCID.getOpcode()) {
3641  default: break;
3642  case ARM::VSTMSIA:
3643  case ARM::VSTMSIA_UPD:
3644  case ARM::VSTMSDB_UPD:
3645  isSStore = true;
3646  break;
3647  }
3648 
3649  // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3650  // then it takes an extra cycle.
3651  if ((isSStore && (RegNo % 2)) || UseAlign < 8)
3652  ++UseCycle;
3653  } else {
3654  // Assume the worst.
3655  UseCycle = RegNo + 2;
3656  }
3657 
3658  return UseCycle;
3659 }
3660 
3661 int
3662 ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
3663  const MCInstrDesc &UseMCID,
3664  unsigned UseClass,
3665  unsigned UseIdx, unsigned UseAlign) const {
3666  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3667  if (RegNo <= 0)
3668  return ItinData->getOperandCycle(UseClass, UseIdx);
3669 
3670  int UseCycle;
3671  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3672  UseCycle = RegNo / 2;
3673  if (UseCycle < 2)
3674  UseCycle = 2;
3675  // Read in E3.
3676  UseCycle += 2;
3677  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3678  UseCycle = (RegNo / 2);
3679  // If there are odd number of registers or if it's not 64-bit aligned,
3680  // then it takes an extra AGU (Address Generation Unit) cycle.
3681  if ((RegNo % 2) || UseAlign < 8)
3682  ++UseCycle;
3683  } else {
3684  // Assume the worst.
3685  UseCycle = 1;
3686  }
3687  return UseCycle;
3688 }
3689 
3690 int
3692  const MCInstrDesc &DefMCID,
3693  unsigned DefIdx, unsigned DefAlign,
3694  const MCInstrDesc &UseMCID,
3695  unsigned UseIdx, unsigned UseAlign) const {
3696  unsigned DefClass = DefMCID.getSchedClass();
3697  unsigned UseClass = UseMCID.getSchedClass();
3698 
3699  if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
3700  return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
3701 
3702  // This may be a def / use of a variable_ops instruction, the operand
3703  // latency might be determinable dynamically. Let the target try to
3704  // figure it out.
3705  int DefCycle = -1;
3706  bool LdmBypass = false;
3707  switch (DefMCID.getOpcode()) {
3708  default:
3709  DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
3710  break;
3711 
3712  case ARM::VLDMDIA:
3713  case ARM::VLDMDIA_UPD:
3714  case ARM::VLDMDDB_UPD:
3715  case ARM::VLDMSIA:
3716  case ARM::VLDMSIA_UPD:
3717  case ARM::VLDMSDB_UPD:
3718  DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3719  break;
3720 
3721  case ARM::LDMIA_RET:
3722  case ARM::LDMIA:
3723  case ARM::LDMDA:
3724  case ARM::LDMDB:
3725  case ARM::LDMIB:
3726  case ARM::LDMIA_UPD:
3727  case ARM::LDMDA_UPD:
3728  case ARM::LDMDB_UPD:
3729  case ARM::LDMIB_UPD:
3730  case ARM::tLDMIA:
3731  case ARM::tLDMIA_UPD:
3732  case ARM::tPUSH:
3733  case ARM::t2LDMIA_RET:
3734  case ARM::t2LDMIA:
3735  case ARM::t2LDMDB:
3736  case ARM::t2LDMIA_UPD:
3737  case ARM::t2LDMDB_UPD:
3738  LdmBypass = true;
3739  DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3740  break;
3741  }
3742 
3743  if (DefCycle == -1)
3744  // We can't seem to determine the result latency of the def, assume it's 2.
3745  DefCycle = 2;
3746 
3747  int UseCycle = -1;
3748  switch (UseMCID.getOpcode()) {
3749  default:
3750  UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
3751  break;
3752 
3753  case ARM::VSTMDIA:
3754  case ARM::VSTMDIA_UPD:
3755  case ARM::VSTMDDB_UPD:
3756  case ARM::VSTMSIA:
3757  case ARM::VSTMSIA_UPD:
3758  case ARM::VSTMSDB_UPD:
3759  UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3760  break;
3761 
3762  case ARM::STMIA:
3763  case ARM::STMDA:
3764  case ARM::STMDB:
3765  case ARM::STMIB:
3766  case ARM::STMIA_UPD:
3767  case ARM::STMDA_UPD:
3768  case ARM::STMDB_UPD:
3769  case ARM::STMIB_UPD:
3770  case ARM::tSTMIA_UPD:
3771  case ARM::tPOP_RET:
3772  case ARM::tPOP:
3773  case ARM::t2STMIA:
3774  case ARM::t2STMDB:
3775  case ARM::t2STMIA_UPD:
3776  case ARM::t2STMDB_UPD:
3777  UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3778  break;
3779  }
3780 
3781  if (UseCycle == -1)
3782  // Assume it's read in the first stage.
3783  UseCycle = 1;
3784 
3785  UseCycle = DefCycle - UseCycle + 1;
3786  if (UseCycle > 0) {
3787  if (LdmBypass) {
3788  // It's a variable_ops instruction so we can't use DefIdx here. Just use
3789  // first def operand.
3790  if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
3791  UseClass, UseIdx))
3792  --UseCycle;
3793  } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
3794  UseClass, UseIdx)) {
3795  --UseCycle;
3796  }
3797  }
3798 
3799  return UseCycle;
3800 }
3801 
3803  const MachineInstr *MI, unsigned Reg,
3804  unsigned &DefIdx, unsigned &Dist) {
3805  Dist = 0;
3806 
3809  assert(II->isInsideBundle() && "Empty bundle?");
3810 
3811  int Idx = -1;
3812  while (II->isInsideBundle()) {
3813  Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
3814  if (Idx != -1)
3815  break;
3816  --II;
3817  ++Dist;
3818  }
3819 
3820  assert(Idx != -1 && "Cannot find bundled definition!");
3821  DefIdx = Idx;
3822  return &*II;
3823 }
3824 
3826  const MachineInstr &MI, unsigned Reg,
3827  unsigned &UseIdx, unsigned &Dist) {
3828  Dist = 0;
3829 
3831  assert(II->isInsideBundle() && "Empty bundle?");
3833 
3834  // FIXME: This doesn't properly handle multiple uses.
3835  int Idx = -1;
3836  while (II != E && II->isInsideBundle()) {
3837  Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
3838  if (Idx != -1)
3839  break;
3840  if (II->getOpcode() != ARM::t2IT)
3841  ++Dist;
3842  ++II;
3843  }
3844 
3845  if (Idx == -1) {
3846  Dist = 0;
3847  return nullptr;
3848  }
3849 
3850  UseIdx = Idx;
3851  return &*II;
3852 }
3853 
3854 /// Return the number of cycles to add to (or subtract from) the static
3855 /// itinerary based on the def opcode and alignment. The caller will ensure that
3856 /// adjusted latency is at least one cycle.
3857 static int adjustDefLatency(const ARMSubtarget &Subtarget,
3858  const MachineInstr &DefMI,
3859  const MCInstrDesc &DefMCID, unsigned DefAlign) {
3860  int Adjust = 0;
3861  if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
3862  // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
3863  // variants are one cycle cheaper.
3864  switch (DefMCID.getOpcode()) {
3865  default: break;
3866  case ARM::LDRrs:
3867  case ARM::LDRBrs: {
3868  unsigned ShOpVal = DefMI.getOperand(3).getImm();
3869  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3870  if (ShImm == 0 ||
3871  (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
3872  --Adjust;
3873  break;
3874  }
3875  case ARM::t2LDRs:
3876  case ARM::t2LDRBs:
3877  case ARM::t2LDRHs:
3878  case ARM::t2LDRSHs: {
3879  // Thumb2 mode: lsl only.
3880  unsigned ShAmt = DefMI.getOperand(3).getImm();
3881  if (ShAmt == 0 || ShAmt == 2)
3882  --Adjust;
3883  break;
3884  }
3885  }
3886  } else if (Subtarget.isSwift()) {
3887  // FIXME: Properly handle all of the latency adjustments for address
3888  // writeback.
3889  switch (DefMCID.getOpcode()) {
3890  default: break;
3891  case ARM::LDRrs:
3892  case ARM::LDRBrs: {
3893  unsigned ShOpVal = DefMI.getOperand(3).getImm();
3894  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3895  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3896  if (!isSub &&
3897  (ShImm == 0 ||
3898  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3899  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3900  Adjust -= 2;
3901  else if (!isSub &&
3902  ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
3903  --Adjust;
3904  break;
3905  }
3906  case ARM::t2LDRs:
3907  case ARM::t2LDRBs:
3908  case ARM::t2LDRHs:
3909  case ARM::t2LDRSHs: {
3910  // Thumb2 mode: lsl only.
3911  unsigned ShAmt = DefMI.getOperand(3).getImm();
3912  if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
3913  Adjust -= 2;
3914  break;
3915  }
3916  }
3917  }
3918 
3919  if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
3920  switch (DefMCID.getOpcode()) {
3921  default: break;
3922  case ARM::VLD1q8:
3923  case ARM::VLD1q16:
3924  case ARM::VLD1q32:
3925  case ARM::VLD1q64:
3926  case ARM::VLD1q8wb_fixed:
3927  case ARM::VLD1q16wb_fixed:
3928  case ARM::VLD1q32wb_fixed:
3929  case ARM::VLD1q64wb_fixed:
3930  case ARM::VLD1q8wb_register:
3931  case ARM::VLD1q16wb_register:
3932  case ARM::VLD1q32wb_register:
3933  case ARM::VLD1q64wb_register:
3934  case ARM::VLD2d8:
3935  case ARM::VLD2d16:
3936  case ARM::VLD2d32:
3937  case ARM::VLD2q8:
3938  case ARM::VLD2q16:
3939  case ARM::VLD2q32:
3940  case ARM::VLD2d8wb_fixed:
3941  case ARM::VLD2d16wb_fixed:
3942  case ARM::VLD2d32wb_fixed:
3943  case ARM::VLD2q8wb_fixed:
3944  case ARM::VLD2q16wb_fixed:
3945  case ARM::VLD2q32wb_fixed:
3946  case ARM::VLD2d8wb_register:
3947  case ARM::VLD2d16wb_register:
3948  case ARM::VLD2d32wb_register:
3949  case ARM::VLD2q8wb_register:
3950  case ARM::VLD2q16wb_register:
3951  case ARM::VLD2q32wb_register:
3952  case ARM::VLD3d8:
3953  case ARM::VLD3d16:
3954  case ARM::VLD3d32:
3955  case ARM::VLD1d64T:
3956  case ARM::VLD3d8_UPD:
3957  case ARM::VLD3d16_UPD:
3958  case ARM::VLD3d32_UPD:
3959  case ARM::VLD1d64Twb_fixed:
3960  case ARM::VLD1d64Twb_register:
3961  case ARM::VLD3q8_UPD:
3962  case ARM::VLD3q16_UPD:
3963  case ARM::VLD3q32_UPD:
3964  case ARM::VLD4d8:
3965  case ARM::VLD4d16:
3966  case ARM::VLD4d32:
3967  case ARM::VLD1d64Q:
3968  case ARM::VLD4d8_UPD:
3969  case ARM::VLD4d16_UPD:
3970  case ARM::VLD4d32_UPD:
3971  case ARM::VLD1d64Qwb_fixed:
3972  case ARM::VLD1d64Qwb_register:
3973  case ARM::VLD4q8_UPD:
3974  case ARM::VLD4q16_UPD:
3975  case ARM::VLD4q32_UPD:
3976  case ARM::VLD1DUPq8:
3977  case ARM::VLD1DUPq16:
3978  case ARM::VLD1DUPq32:
3979  case ARM::VLD1DUPq8wb_fixed:
3980  case ARM::VLD1DUPq16wb_fixed:
3981  case ARM::VLD1DUPq32wb_fixed:
3982  case ARM::VLD1DUPq8wb_register:
3983  case ARM::VLD1DUPq16wb_register:
3984  case ARM::VLD1DUPq32wb_register:
3985  case ARM::VLD2DUPd8:
3986  case ARM::VLD2DUPd16:
3987  case ARM::VLD2DUPd32:
3988  case ARM::VLD2DUPd8wb_fixed:
3989  case ARM::VLD2DUPd16wb_fixed:
3990  case ARM::VLD2DUPd32wb_fixed:
3991  case ARM::VLD2DUPd8wb_register:
3992  case ARM::VLD2DUPd16wb_register:
3993  case ARM::VLD2DUPd32wb_register:
3994  case ARM::VLD4DUPd8:
3995  case ARM::VLD4DUPd16:
3996  case ARM::VLD4DUPd32:
3997  case ARM::VLD4DUPd8_UPD:
3998  case ARM::VLD4DUPd16_UPD:
3999  case ARM::VLD4DUPd32_UPD:
4000  case ARM::VLD1LNd8:
4001  case ARM::VLD1LNd16:
4002  case ARM::VLD1LNd32:
4003  case ARM::VLD1LNd8_UPD:
4004  case ARM::VLD1LNd16_UPD:
4005  case ARM::VLD1LNd32_UPD:
4006  case ARM::VLD2LNd8:
4007  case ARM::VLD2LNd16:
4008  case ARM::VLD2LNd32:
4009  case ARM::VLD2LNq16:
4010  case ARM::VLD2LNq32:
4011  case ARM::VLD2LNd8_UPD:
4012  case ARM::VLD2LNd16_UPD:
4013  case ARM::VLD2LNd32_UPD:
4014  case ARM::VLD2LNq16_UPD:
4015  case ARM::VLD2LNq32_UPD:
4016  case ARM::VLD4LNd8:
4017  case ARM::VLD4LNd16:
4018  case ARM::VLD4LNd32:
4019  case ARM::VLD4LNq16:
4020  case ARM::VLD4LNq32:
4021  case ARM::VLD4LNd8_UPD:
4022  case ARM::VLD4LNd16_UPD:
4023  case ARM::VLD4LNd32_UPD:
4024  case ARM::VLD4LNq16_UPD:
4025  case ARM::VLD4LNq32_UPD:
4026  // If the address is not 64-bit aligned, the latencies of these
4027  // instructions increases by one.
4028  ++Adjust;
4029  break;
4030  }
4031  }
4032  return Adjust;
4033 }
4034 
4036  const MachineInstr &DefMI,
4037  unsigned DefIdx,
4038  const MachineInstr &UseMI,
4039  unsigned UseIdx) const {
4040  // No operand latency. The caller may fall back to getInstrLatency.
4041  if (!ItinData || ItinData->isEmpty())
4042  return -1;
4043 
4044  const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
4045  unsigned Reg = DefMO.getReg();
4046 
4047  const MachineInstr *ResolvedDefMI = &DefMI;
4048  unsigned DefAdj = 0;
4049  if (DefMI.isBundle())
4050  ResolvedDefMI =
4051  getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);
4052  if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
4053  ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
4054  return 1;
4055  }
4056 
4057  const MachineInstr *ResolvedUseMI = &UseMI;
4058  unsigned UseAdj = 0;
4059  if (UseMI.isBundle()) {
4060  ResolvedUseMI =
4061  getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
4062  if (!ResolvedUseMI)
4063  return -1;
4064  }
4065 
4066  return getOperandLatencyImpl(
4067  ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
4068  Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
4069 }
4070 
4071 int ARMBaseInstrInfo::getOperandLatencyImpl(
4072  const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4073  unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
4074  const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
4075  unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
4076  if (Reg == ARM::CPSR) {
4077  if (DefMI.getOpcode() == ARM::FMSTAT) {
4078  // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
4079  return Subtarget.isLikeA9() ? 1 : 20;
4080  }
4081 
4082  // CPSR set and branch can be paired in the same cycle.
4083  if (UseMI.isBranch())
4084  return 0;
4085 
4086  // Otherwise it takes the instruction latency (generally one).
4087  unsigned Latency = getInstrLatency(ItinData, DefMI);
4088 
4089  // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
4090  // its uses. Instructions which are otherwise scheduled between them may
4091  // incur a code size penalty (not able to use the CPSR setting 16-bit
4092  // instructions).
4093  if (Latency > 0 && Subtarget.isThumb2()) {
4094  const MachineFunction *MF = DefMI.getParent()->getParent();
4095  // FIXME: Use Function::optForSize().
4096  if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize))
4097  --Latency;
4098  }
4099  return Latency;
4100  }
4101 
4102  if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
4103  return -1;
4104 
4105  unsigned DefAlign = DefMI.hasOneMemOperand()
4106  ? (*DefMI.memoperands_begin())->getAlignment()
4107  : 0;
4108  unsigned UseAlign = UseMI.hasOneMemOperand()
4109  ? (*UseMI.memoperands_begin())->getAlignment()
4110  : 0;
4111 
4112  // Get the itinerary's latency if possible, and handle variable_ops.
4113  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, UseMCID,
4114  UseIdx, UseAlign);
4115  // Unable to find operand latency. The caller may resort to getInstrLatency.
4116  if (Latency < 0)
4117  return Latency;
4118 
4119  // Adjust for IT block position.
4120  int Adj = DefAdj + UseAdj;
4121 
4122  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4123  Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
4124  if (Adj >= 0 || (int)Latency > -Adj) {
4125  return Latency + Adj;
4126  }
4127  // Return the itinerary latency, which may be zero but not less than zero.
4128  return Latency;
4129 }
4130 
4131 int
4133  SDNode *DefNode, unsigned DefIdx,
4134  SDNode *UseNode, unsigned UseIdx) const {
4135  if (!DefNode->isMachineOpcode())
4136  return 1;
4137 
4138  const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
4139 
4140  if (isZeroCost(DefMCID.Opcode))
4141  return 0;
4142 
4143  if (!ItinData || ItinData->isEmpty())
4144  return DefMCID.mayLoad() ? 3 : 1;
4145 
4146  if (!UseNode->isMachineOpcode()) {
4147  int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
4148  int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
4149  int Threshold = 1 + Adj;
4150  return Latency <= Threshold ? 1 : Latency - Adj;
4151  }
4152 
4153  const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
4154  const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
4155  unsigned DefAlign = !DefMN->memoperands_empty()
4156  ? (*DefMN->memoperands_begin())->getAlignment() : 0;
4157  const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
4158  unsigned UseAlign = !UseMN->memoperands_empty()
4159  ? (*UseMN->memoperands_begin())->getAlignment() : 0;
4160  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
4161  UseMCID, UseIdx, UseAlign);
4162 
4163  if (Latency > 1 &&
4164  (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
4165  Subtarget.isCortexA7())) {
4166  // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4167  // variants are one cycle cheaper.
4168  switch (DefMCID.getOpcode()) {
4169  default: break;
4170  case ARM::LDRrs:
4171  case ARM::LDRBrs: {
4172  unsigned ShOpVal =
4173  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4174  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4175  if (ShImm == 0 ||
4176  (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4177  --Latency;
4178  break;
4179  }
4180  case ARM::t2LDRs:
4181  case ARM::t2LDRBs:
4182  case ARM::t2LDRHs:
4183  case ARM::t2LDRSHs: {
4184  // Thumb2 mode: lsl only.
4185  unsigned ShAmt =
4186  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4187  if (ShAmt == 0 || ShAmt == 2)
4188  --Latency;
4189  break;
4190  }
4191  }
4192  } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) {
4193  // FIXME: Properly handle all of the latency adjustments for address
4194  // writeback.
4195  switch (DefMCID.getOpcode()) {
4196  default: break;
4197  case ARM::LDRrs:
4198  case ARM::LDRBrs: {
4199  unsigned ShOpVal =
4200  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4201  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4202  if (ShImm == 0 ||
4203  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4204  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4205  Latency -= 2;
4206  else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4207  --Latency;
4208  break;
4209  }
4210  case ARM::t2LDRs:
4211  case ARM::t2LDRBs:
4212  case ARM::t2LDRHs:
4213  case ARM::t2LDRSHs:
4214  // Thumb2 mode: lsl 0-3 only.
4215  Latency -= 2;
4216  break;
4217  }
4218  }
4219 
4220  if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
4221  switch (DefMCID.getOpcode()) {
4222  default: break;
4223  case ARM::VLD1q8:
4224  case ARM::VLD1q16:
4225  case ARM::VLD1q32:
4226  case ARM::VLD1q64:
4227  case ARM::VLD1q8wb_register:
4228  case ARM::VLD1q16wb_register:
4229  case ARM::VLD1q32wb_register:
4230  case ARM::VLD1q64wb_register:
4231  case ARM::VLD1q8wb_fixed:
4232  case ARM::VLD1q16wb_fixed:
4233  case ARM::VLD1q32wb_fixed:
4234  case ARM::VLD1q64wb_fixed:
4235  case ARM::VLD2d8:
4236  case ARM::VLD2d16:
4237  case ARM::VLD2d32:
4238  case ARM::VLD2q8Pseudo:
4239  case ARM::VLD2q16Pseudo:
4240  case ARM::VLD2q32Pseudo:
4241  case ARM::VLD2d8wb_fixed:
4242  case ARM::VLD2d16wb_fixed:
4243  case ARM::VLD2d32wb_fixed:
4244  case ARM::VLD2q8PseudoWB_fixed:
4245  case ARM::VLD2q16PseudoWB_fixed:
4246  case ARM::VLD2q32PseudoWB_fixed:
4247  case ARM::VLD2d8wb_register:
4248  case ARM::VLD2d16wb_register:
4249  case ARM::VLD2d32wb_register:
4250  case ARM::VLD2q8PseudoWB_register:
4251  case ARM::VLD2q16PseudoWB_register:
4252  case ARM::VLD2q32PseudoWB_register:
4253  case ARM::VLD3d8Pseudo:
4254  case ARM::VLD3d16Pseudo:
4255  case ARM::VLD3d32Pseudo:
4256  case ARM::VLD1d8TPseudo:
4257  case ARM::VLD1d16TPseudo:
4258  case ARM::VLD1d32TPseudo:
4259  case ARM::VLD1d64TPseudo:
4260  case ARM::VLD1d64TPseudoWB_fixed:
4261  case ARM::VLD1d64TPseudoWB_register:
4262  case ARM::VLD3d8Pseudo_UPD:
4263  case ARM::VLD3d16Pseudo_UPD:
4264  case ARM::VLD3d32Pseudo_UPD:
4265  case ARM::VLD3q8Pseudo_UPD:
4266  case ARM::VLD3q16Pseudo_UPD:
4267  case ARM::VLD3q32Pseudo_UPD:
4268  case ARM::VLD3q8oddPseudo:
4269  case ARM::VLD3q16oddPseudo:
4270  case ARM::VLD3q32oddPseudo:
4271  case ARM::VLD3q8oddPseudo_UPD:
4272  case ARM::VLD3q16oddPseudo_UPD:
4273  case ARM::VLD3q32oddPseudo_UPD:
4274  case ARM::VLD4d8Pseudo:
4275  case ARM::VLD4d16Pseudo:
4276  case ARM::VLD4d32Pseudo:
4277  case ARM::VLD1d8QPseudo:
4278  case ARM::VLD1d16QPseudo:
4279  case ARM::VLD1d32QPseudo:
4280  case ARM::VLD1d64QPseudo:
4281  case ARM::VLD1d64QPseudoWB_fixed:
4282  case ARM::VLD1d64QPseudoWB_register:
4283  case ARM::VLD1q8HighQPseudo:
4284  case ARM::VLD1q8LowQPseudo_UPD:
4285  case ARM::VLD1q8HighTPseudo:
4286  case ARM::VLD1q8LowTPseudo_UPD:
4287  case ARM::VLD1q16HighQPseudo:
4288  case ARM::VLD1q16LowQPseudo_UPD:
4289  case ARM::VLD1q16HighTPseudo:
4290  case ARM::VLD1q16LowTPseudo_UPD:
4291  case ARM::VLD1q32HighQPseudo:
4292  case ARM::VLD1q32LowQPseudo_UPD:
4293  case ARM::VLD1q32HighTPseudo:
4294  case ARM::VLD1q32LowTPseudo_UPD:
4295  case ARM::VLD1q64HighQPseudo:
4296  case ARM::VLD1q64LowQPseudo_UPD:
4297  case ARM::VLD1q64HighTPseudo:
4298  case ARM::VLD1q64LowTPseudo_UPD:
4299  case ARM::VLD4d8Pseudo_UPD:
4300  case ARM::VLD4d16Pseudo_UPD:
4301  case ARM::VLD4d32Pseudo_UPD:
4302  case ARM::VLD4q8Pseudo_UPD:
4303  case ARM::VLD4q16Pseudo_UPD:
4304  case ARM::VLD4q32Pseudo_UPD:
4305  case ARM::VLD4q8oddPseudo:
4306  case ARM::VLD4q16oddPseudo:
4307  case ARM::VLD4q32oddPseudo:
4308  case ARM::VLD4q8oddPseudo_UPD:
4309  case ARM::VLD4q16oddPseudo_UPD:
4310  case ARM::VLD4q32oddPseudo_UPD:
4311  case ARM::VLD1DUPq8:
4312  case ARM::VLD1DUPq16:
4313  case ARM::VLD1DUPq32:
4314  case ARM::VLD1DUPq8wb_fixed:
4315  case ARM::VLD1DUPq16wb_fixed:
4316  case ARM::VLD1DUPq32wb_fixed:
4317  case ARM::VLD1DUPq8wb_register:
4318  case ARM::VLD1DUPq16wb_register:
4319  case ARM::VLD1DUPq32wb_register:
4320  case ARM::VLD2DUPd8:
4321  case ARM::VLD2DUPd16:
4322  case ARM::VLD2DUPd32:
4323  case ARM::VLD2DUPd8wb_fixed:
4324  case ARM::VLD2DUPd16wb_fixed:
4325  case ARM::VLD2DUPd32wb_fixed:
4326  case ARM::VLD2DUPd8wb_register:
4327  case ARM::VLD2DUPd16wb_register:
4328  case ARM::VLD2DUPd32wb_register:
4329  case ARM::VLD2DUPq8EvenPseudo:
4330  case ARM::VLD2DUPq8OddPseudo:
4331  case ARM::VLD2DUPq16EvenPseudo:
4332  case ARM::VLD2DUPq16OddPseudo:
4333  case ARM::VLD2DUPq32EvenPseudo:
4334  case ARM::VLD2DUPq32OddPseudo:
4335  case ARM::VLD3DUPq8EvenPseudo:
4336  case ARM::VLD3DUPq8OddPseudo:
4337  case ARM::VLD3DUPq16EvenPseudo:
4338  case ARM::VLD3DUPq16OddPseudo:
4339  case ARM::VLD3DUPq32EvenPseudo:
4340  case ARM::VLD3DUPq32OddPseudo:
4341  case ARM::VLD4DUPd8Pseudo:
4342  case ARM::VLD4DUPd16Pseudo:
4343  case ARM::VLD4DUPd32Pseudo:
4344  case ARM::VLD4DUPd8Pseudo_UPD:
4345  case ARM::VLD4DUPd16Pseudo_UPD:
4346  case ARM::VLD4DUPd32Pseudo_UPD:
4347  case ARM::VLD4DUPq8EvenPseudo:
4348  case ARM::VLD4DUPq8OddPseudo:
4349  case ARM::VLD4DUPq16EvenPseudo:
4350  case ARM::VLD4DUPq16OddPseudo:
4351  case ARM::VLD4DUPq32EvenPseudo:
4352  case ARM::VLD4DUPq32OddPseudo:
4353  case ARM::VLD1LNq8Pseudo:
4354  case ARM::VLD1LNq16Pseudo:
4355  case ARM::VLD1LNq32Pseudo:
4356  case ARM::VLD1LNq8Pseudo_UPD:
4357  case ARM::VLD1LNq16Pseudo_UPD:
4358  case ARM::VLD1LNq32Pseudo_UPD:
4359  case ARM::VLD2LNd8Pseudo:
4360  case ARM::VLD2LNd16Pseudo:
4361  case ARM::VLD2LNd32Pseudo:
4362  case ARM::VLD2LNq16Pseudo:
4363  case ARM::VLD2LNq32Pseudo:
4364  case ARM::VLD2LNd8Pseudo_UPD:
4365  case ARM::VLD2LNd16Pseudo_UPD:
4366  case ARM::VLD2LNd32Pseudo_UPD:
4367  case ARM::VLD2LNq16Pseudo_UPD:
4368  case ARM::VLD2LNq32Pseudo_UPD:
4369  case ARM::VLD4LNd8Pseudo:
4370  case ARM::VLD4LNd16Pseudo:
4371  case ARM::VLD4LNd32Pseudo:
4372  case ARM::VLD4LNq16Pseudo:
4373  case ARM::VLD4LNq32Pseudo:
4374  case ARM::VLD4LNd8Pseudo_UPD:
4375  case ARM::VLD4LNd16Pseudo_UPD:
4376  case ARM::VLD4LNd32Pseudo_UPD:
4377  case ARM::VLD4LNq16Pseudo_UPD:
4378  case ARM::VLD4LNq32Pseudo_UPD:
4379  // If the address is not 64-bit aligned, the latencies of these
4380  // instructions increases by one.
4381  ++Latency;
4382  break;
4383  }
4384 
4385  return Latency;
4386 }
4387 
4388 unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
4389  if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4390  MI.isImplicitDef())
4391  return 0;
4392 
4393  if (MI.isBundle())
4394  return 0;
4395 
4396  const MCInstrDesc &MCID = MI.getDesc();
4397 
4398  if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4399  !Subtarget.cheapPredicableCPSRDef())) {
4400  // When predicated, CPSR is an additional source operand for CPSR updating
4401  // instructions, this apparently increases their latencies.
4402  return 1;
4403  }
4404  return 0;
4405 }
4406 
4407 unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4408  const MachineInstr &MI,
4409  unsigned *PredCost) const {
4410  if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4411  MI.isImplicitDef())
4412  return 1;
4413 
4414  // An instruction scheduler typically runs on unbundled instructions, however
4415  // other passes may query the latency of a bundled instruction.
4416  if (MI.isBundle()) {
4417  unsigned Latency = 0;
4420  while (++I != E && I->isInsideBundle()) {
4421  if (I->getOpcode() != ARM::t2IT)
4422  Latency += getInstrLatency(ItinData, *I, PredCost);
4423  }
4424  return Latency;
4425  }
4426 
4427  const MCInstrDesc &MCID = MI.getDesc();
4428  if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4429  !Subtarget.cheapPredicableCPSRDef()))) {
4430  // When predicated, CPSR is an additional source operand for CPSR updating
4431  // instructions, this apparently increases their latencies.
4432  *PredCost = 1;
4433  }
4434  // Be sure to call getStageLatency for an empty itinerary in case it has a
4435  // valid MinLatency property.
4436  if (!ItinData)
4437  return MI.mayLoad() ? 3 : 1;
4438 
4439  unsigned Class = MCID.getSchedClass();
4440 
4441  // For instructions with variable uops, use uops as latency.
4442  if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
4443  return getNumMicroOps(ItinData, MI);
4444 
4445  // For the common case, fall back on the itinerary's latency.
4446  unsigned Latency = ItinData->getStageLatency(Class);
4447 
4448  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4449  unsigned DefAlign =
4450  MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlignment() : 0;
4451  int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
4452  if (Adj >= 0 || (int)Latency > -Adj) {
4453  return Latency + Adj;
4454  }
4455  return Latency;
4456 }
4457 
4458 int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4459  SDNode *Node) const {
4460  if (!Node->isMachineOpcode())
4461  return 1;
4462 
4463  if (!ItinData || ItinData->isEmpty())
4464  return 1;
4465 
4466  unsigned Opcode = Node->getMachineOpcode();
4467  switch (Opcode) {
4468  default:
4469  return ItinData->getStageLatency(get(Opcode).getSchedClass());
4470  case ARM::VLDMQIA:
4471  case ARM::VSTMQIA:
4472  return 2;
4473  }
4474 }
4475 
4476 bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
4477  const MachineRegisterInfo *MRI,
4478  const MachineInstr &DefMI,
4479  unsigned DefIdx,
4480  const MachineInstr &UseMI,
4481  unsigned UseIdx) const {
4482  unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4483  unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
4484  if (Subtarget.nonpipelinedVFP() &&
4485  (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
4486  return true;
4487 
4488  // Hoist VFP / NEON instructions with 4 or higher latency.
4489  unsigned Latency =
4490  SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);
4491  if (Latency <= 3)
4492  return false;
4493  return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
4494  UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
4495 }
4496 
4497 bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
4498  const MachineInstr &DefMI,
4499  unsigned DefIdx) const {
4500  const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
4501  if (!ItinData || ItinData->isEmpty())
4502  return false;
4503 
4504  unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4505  if (DDomain == ARMII::DomainGeneral) {
4506  unsigned DefClass = DefMI.getDesc().getSchedClass();
4507  int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
4508  return (DefCycle != -1 && DefCycle <= 2);
4509  }
4510  return false;
4511 }
4512 
4513 bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
4514  StringRef &ErrInfo) const {
4515  if (convertAddSubFlagsOpcode(MI.getOpcode())) {
4516  ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
4517  return false;
4518  }
4519  return true;
4520 }
4521 
4522 // LoadStackGuard has so far only been implemented for MachO. Different code
4523 // sequence is needed for other targets.
4525  unsigned LoadImmOpc,
4526  unsigned LoadOpc) const {
4527  assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
4528  "ROPI/RWPI not currently supported with stack guard");
4529 
4530  MachineBasicBlock &MBB = *MI->getParent();
4531  DebugLoc DL = MI->getDebugLoc();
4532  unsigned Reg = MI->getOperand(0).getReg();
4533  const GlobalValue *GV =
4534  cast<GlobalValue>((*MI->memoperands_begin())->getValue());
4535  MachineInstrBuilder MIB;
4536 
4537  BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4539 
4540  if (Subtarget.isGVIndirectSymbol(GV)) {
4541  MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4542  MIB.addReg(Reg, RegState::Kill).addImm(0);
4543  auto Flags = MachineMemOperand::MOLoad |
4546  MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
4547  MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4);
4548  MIB.addMemOperand(MMO).add(predOps(ARMCC::AL));
4549  }
4550 
4551  MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4552  MIB.addReg(Reg, RegState::Kill)
4553  .addImm(0)
4554  .cloneMemRefs(*MI)
4555  .add(predOps(ARMCC::AL));
4556 }
4557 
4558 bool
4559 ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
4560  unsigned &AddSubOpc,
4561  bool &NegAcc, bool &HasLane) const {
4562  DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
4563  if (I == MLxEntryMap.end())
4564  return false;
4565 
4566  const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
4567  MulOpc = Entry.MulOpc;
4568  AddSubOpc = Entry.AddSubOpc;
4569  NegAcc = Entry.NegAcc;
4570  HasLane = Entry.HasLane;
4571  return true;
4572 }
4573 
4574 //===----------------------------------------------------------------------===//
4575 // Execution domains.
4576 //===----------------------------------------------------------------------===//
4577 //
4578 // Some instructions go down the NEON pipeline, some go down the VFP pipeline,
4579 // and some can go down both. The vmov instructions go down the VFP pipeline,
4580 // but they can be changed to vorr equivalents that are executed by the NEON
4581 // pipeline.
4582 //
4583 // We use the following execution domain numbering:
4584 //
4587  ExeVFP = 1,
4589 };
4590 
4591 //
4592 // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
4593 //
4594 std::pair<uint16_t, uint16_t>
4596  // If we don't have access to NEON instructions then we won't be able
4597  // to swizzle anything to the NEON domain. Check to make sure.
4598  if (Subtarget.hasNEON()) {
4599  // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
4600  // if they are not predicated.
4601  if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI))
4602  return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4603 
4604  // CortexA9 is particularly picky about mixing the two and wants these
4605  // converted.
4606  if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) &&
4607  (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR ||
4608  MI.getOpcode() == ARM::VMOVS))
4609  return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4610  }
4611  // No other instructions can be swizzled, so just determine their domain.
4612  unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;
4613 
4614  if (Domain & ARMII::DomainNEON)
4615  return std::make_pair(ExeNEON, 0);
4616 
4617  // Certain instructions can go either way on Cortex-A8.
4618  // Treat them as NEON instructions.
4619  if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
4620  return std::make_pair(ExeNEON, 0);
4621 
4622  if (Domain & ARMII::DomainVFP)
4623  return std::make_pair(ExeVFP, 0);
4624 
4625  return std::make_pair(ExeGeneric, 0);
4626 }
4627 
4629  unsigned SReg, unsigned &Lane) {
4630  unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
4631  Lane = 0;
4632 
4633  if (DReg != ARM::NoRegister)
4634  return DReg;
4635 
4636  Lane = 1;
4637  DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
4638 
4639  assert(DReg && "S-register with no D super-register?");
4640  return DReg;
4641 }
4642 
4643 /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
4644 /// set ImplicitSReg to a register number that must be marked as implicit-use or
4645 /// zero if no register needs to be defined as implicit-use.
4646 ///
4647 /// If the function cannot determine if an SPR should be marked implicit use or
4648 /// not, it returns false.
4649 ///
4650 /// This function handles cases where an instruction is being modified from taking
4651 /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
4652 /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
4653 /// lane of the DPR).
4654 ///
4655 /// If the other SPR is defined, an implicit-use of it should be added. Else,
4656 /// (including the case where the DPR itself is defined), it should not.
4657 ///
4659  MachineInstr &MI, unsigned DReg,
4660  unsigned Lane, unsigned &ImplicitSReg) {
4661  // If the DPR is defined or used already, the other SPR lane will be chained
4662  // correctly, so there is nothing to be done.
4663  if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) {
4664  ImplicitSReg = 0;
4665  return true;
4666  }
4667 
4668  // Otherwise we need to go searching to see if the SPR is set explicitly.
4669  ImplicitSReg = TRI->getSubReg(DReg,
4670  (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
4672  MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
4673 
4674  if (LQR == MachineBasicBlock::LQR_Live)
4675  return true;
4676  else if (LQR == MachineBasicBlock::LQR_Unknown)
4677  return false;
4678 
4679  // If the register is known not to be live, there is no need to add an
4680  // implicit-use.
4681  ImplicitSReg = 0;
4682  return true;
4683 }
4684 
4686  unsigned Domain) const {
4687  unsigned DstReg, SrcReg, DReg;
4688  unsigned Lane;
4689  MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
4691  switch (MI.getOpcode()) {
4692  default:
4693  llvm_unreachable("cannot handle opcode!");
4694  break;
4695  case ARM::VMOVD:
4696  if (Domain != ExeNEON)
4697  break;
4698 
4699  // Zap the predicate operands.
4700  assert(!isPredicated(MI) && "Cannot predicate a VORRd");
4701 
4702  // Make sure we've got NEON instructions.
4703  assert(Subtarget.hasNEON() && "VORRd requires NEON");
4704 
4705  // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
4706  DstReg = MI.getOperand(0).getReg();
4707  SrcReg = MI.getOperand(1).getReg();
4708 
4709  for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4710  MI.RemoveOperand(i - 1);
4711 
4712  // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
4713  MI.setDesc(get(ARM::VORRd));
4714  MIB.addReg(DstReg, RegState::Define)
4715  .addReg(SrcReg)
4716  .addReg(SrcReg)
4717  .add(predOps(ARMCC::AL));
4718  break;
4719  case ARM::VMOVRS:
4720  if (Domain != ExeNEON)
4721  break;
4722  assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
4723 
4724  // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
4725  DstReg = MI.getOperand(0).getReg();
4726  SrcReg = MI.getOperand(1).getReg();
4727 
4728  for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4729  MI.RemoveOperand(i - 1);
4730 
4731  DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
4732 
4733  // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
4734  // Note that DSrc has been widened and the other lane may be undef, which
4735  // contaminates the entire register.
4736  MI.setDesc(get(ARM::VGETLNi32));
4737  MIB.addReg(DstReg, RegState::Define)
4738  .addReg(DReg, RegState::Undef)
4739  .addImm(Lane)
4740  .add(predOps(ARMCC::AL));
4741 
4742  // The old source should be an implicit use, otherwise we might think it
4743  // was dead before here.
4744  MIB.addReg(SrcReg, RegState::Implicit);
4745  break;
4746  case ARM::VMOVSR: {
4747  if (Domain != ExeNEON)
4748  break;
4749  assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
4750 
4751  // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
4752  DstReg = MI.getOperand(0).getReg();
4753  SrcReg = MI.getOperand(1).getReg();
4754 
4755  DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
4756 
4757  unsigned ImplicitSReg;
4758  if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
4759  break;
4760 
4761  for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4762  MI.RemoveOperand(i - 1);
4763 
4764  // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
4765  // Again DDst may be undefined at the beginning of this instruction.
4766  MI.setDesc(get(ARM::VSETLNi32));
4767  MIB.addReg(DReg, RegState::Define)
4768  .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI)))
4769  .addReg(SrcReg)
4770  .addImm(Lane)
4771  .add(predOps(ARMCC::AL));
4772 
4773  // The narrower destination must be marked as set to keep previous chains
4774  // in place.
4775  MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
4776  if (ImplicitSReg != 0)
4777  MIB.addReg(ImplicitSReg, RegState::Implicit);
4778  break;
4779  }
4780  case ARM::VMOVS: {
4781  if (Domain != ExeNEON)
4782  break;
4783 
4784  // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
4785  DstReg = MI.getOperand(0).getReg();
4786  SrcReg = MI.getOperand(1).getReg();
4787 
4788  unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
4789  DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
4790  DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
4791 
4792  unsigned ImplicitSReg;
4793  if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
4794  break;
4795 
4796  for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4797  MI.RemoveOperand(i - 1);
4798 
4799  if (DSrc == DDst) {
4800  // Destination can be:
4801  // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
4802  MI.setDesc(get(ARM::VDUPLN32d));
4803  MIB.addReg(DDst, RegState::Define)
4804  .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI)))
4805  .addImm(SrcLane)
4806  .add(predOps(ARMCC::AL));
4807 
4808  // Neither the source or the destination are naturally represented any
4809  // more, so add them in manually.
4810  MIB.addReg(DstReg, RegState::Implicit | RegState::Define);
4811  MIB.addReg(SrcReg, RegState::Implicit);
4812  if (ImplicitSReg != 0)
4813  MIB.addReg(ImplicitSReg, RegState::Implicit);
4814  break;
4815  }
4816 
4817  // In general there's no single instruction that can perform an S <-> S
4818  // move in NEON space, but a pair of VEXT instructions *can* do the
4819  // job. It turns out that the VEXTs needed will only use DSrc once, with
4820  // the position based purely on the combination of lane-0 and lane-1
4821  // involved. For example
4822  // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
4823  // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
4824  // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
4825  // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
4826  //
4827  // Pattern of the MachineInstrs is:
4828  // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
4829  MachineInstrBuilder NewMIB;
4830  NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32),
4831  DDst);
4832 
4833  // On the first instruction, both DSrc and DDst may be undef if present.
4834  // Specifically when the original instruction didn't have them as an
4835  // <imp-use>.
4836  unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
4837  bool CurUndef = !MI.readsRegister(CurReg, TRI);
4838  NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
4839 
4840  CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
4841  CurUndef = !MI.readsRegister(CurReg, TRI);
4842  NewMIB.addReg(CurReg, getUndefRegState(CurUndef))
4843  .addImm(1)
4844  .add(predOps(ARMCC::AL));
4845 
4846  if (SrcLane == DstLane)
4847  NewMIB.addReg(SrcReg, RegState::Implicit);
4848 
4849  MI.setDesc(get(ARM::VEXTd32));
4850  MIB.addReg(DDst, RegState::Define);
4851 
4852  // On the second instruction, DDst has definitely been defined above, so
4853  // it is not undef. DSrc, if present, can be undef as above.
4854  CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
4855  CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
4856  MIB.addReg(CurReg, getUndefRegState(CurUndef));
4857 
4858  CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
4859  CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
4860  MIB.addReg(CurReg, getUndefRegState(CurUndef))
4861  .addImm(1)
4862  .add(predOps(ARMCC::AL));
4863 
4864  if (SrcLane != DstLane)
4865  MIB.addReg(SrcReg, RegState::Implicit);
4866 
4867  // As before, the original destination is no longer represented, add it
4868  // implicitly.
4869  MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
4870  if (ImplicitSReg != 0)
4871  MIB.addReg(ImplicitSReg, RegState::Implicit);
4872  break;
4873  }
4874  }
4875 }
4876 
4877 //===----------------------------------------------------------------------===//
4878 // Partial register updates
4879 //===----------------------------------------------------------------------===//
4880 //
4881 // Swift renames NEON registers with 64-bit granularity. That means any
4882 // instruction writing an S-reg implicitly reads the containing D-reg. The
4883 // problem is mostly avoided by translating f32 operations to v2f32 operations
4884 // on D-registers, but f32 loads are still a problem.
4885 //
4886 // These instructions can load an f32 into a NEON register:
4887 //
4888 // VLDRS - Only writes S, partial D update.
4889 // VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
4890 // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
4891 //
4892 // FCONSTD can be used as a dependency-breaking instruction.
4894  const MachineInstr &MI, unsigned OpNum,
4895  const TargetRegisterInfo *TRI) const {
4896  auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance();
4897  if (!PartialUpdateClearance)
4898  return 0;
4899 
4900  assert(TRI && "Need TRI instance");
4901 
4902  const MachineOperand &MO = MI.getOperand(OpNum);
4903  if (MO.readsReg())
4904  return 0;
4905  unsigned Reg = MO.getReg();
4906  int UseOp = -1;
4907 
4908  switch (MI.getOpcode()) {
4909  // Normal instructions writing only an S-register.
4910  case ARM::VLDRS:
4911  case ARM::FCONSTS:
4912  case ARM::VMOVSR:
4913  case ARM::VMOVv8i8:
4914  case ARM::VMOVv4i16:
4915  case ARM::VMOVv2i32:
4916  case ARM::VMOVv2f32:
4917  case ARM::VMOVv1i64:
4918  UseOp = MI.findRegisterUseOperandIdx(Reg, false, TRI);
4919  break;
4920 
4921  // Explicitly reads the dependency.
4922  case ARM::VLD1LNd32:
4923  UseOp = 3;
4924  break;
4925  default:
4926  return 0;
4927  }
4928 
4929  // If this instruction actually reads a value from Reg, there is no unwanted
4930  // dependency.
4931  if (UseOp != -1 && MI.getOperand(UseOp).readsReg())
4932  return 0;
4933 
4934  // We must be able to clobber the whole D-reg.
4936  // Virtual register must be a def undef foo:ssub_0 operand.
4937  if (!MO.getSubReg() || MI.readsVirtualRegister(Reg))
4938  return 0;
4939  } else if (ARM::SPRRegClass.contains(Reg)) {
4940  // Physical register: MI must define the full D-reg.
4941  unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
4942  &ARM::DPRRegClass);
4943  if (!DReg || !MI.definesRegister(DReg, TRI))
4944  return 0;
4945  }
4946 
4947  // MI has an unwanted D-register dependency.
4948  // Avoid defs in the previous N instructrions.
4949  return PartialUpdateClearance;
4950 }
4951 
4952 // Break a partial register dependency after getPartialRegUpdateClearance
4953 // returned non-zero.
4955  MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
4956  assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def");
4957  assert(TRI && "Need TRI instance");
4958 
4959  const MachineOperand &MO = MI.getOperand(OpNum);
4960  unsigned Reg = MO.getReg();
4962  "Can't break virtual register dependencies.");
4963  unsigned DReg = Reg;
4964 
4965  // If MI defines an S-reg, find the corresponding D super-register.
4966  if (ARM::SPRRegClass.contains(Reg)) {
4967  DReg = ARM::D0 + (Reg - ARM::S0) / 2;
4968  assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
4969  }
4970 
4971  assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
4972  assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
4973 
4974  // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
4975  // the full D-register by loading the same value to both lanes. The
4976  // instruction is micro-coded with 2 uops, so don't do this until we can
4977  // properly schedule micro-coded instructions. The dispatcher stalls cause
4978  // too big regressions.
4979 
4980  // Insert the dependency-breaking FCONSTD before MI.
4981  // 96 is the encoding of 0.5, but the actual value doesn't matter here.
4982  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
4983  .addImm(96)
4984  .add(predOps(ARMCC::AL));
4985  MI.addRegisterKilled(DReg, TRI, true);
4986 }
4987 
4989  return Subtarget.getFeatureBits()[ARM::HasV6KOps];
4990 }
4991 
4993  if (MI->getNumOperands() < 4)
4994  return true;
4995  unsigned ShOpVal = MI->getOperand(3).getImm();
4996  unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
4997  // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
4998  if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
4999  ((ShImm == 1 || ShImm == 2) &&
5000  ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
5001  return true;
5002 
5003  return false;
5004 }
5005 
5007  const MachineInstr &MI, unsigned DefIdx,
5008  SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
5009  assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5010  assert(MI.isRegSequenceLike() && "Invalid kind of instruction");
5011 
5012  switch (MI.getOpcode()) {
5013  case ARM::VMOVDRR:
5014  // dX = VMOVDRR rY, rZ
5015  // is the same as:
5016  // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
5017  // Populate the InputRegs accordingly.
5018  // rY
5019  const MachineOperand *MOReg = &MI.getOperand(1);
5020  if (!MOReg->isUndef())
5021  InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5022  MOReg->getSubReg(), ARM::ssub_0));
5023  // rZ
5024  MOReg = &MI.getOperand(2);
5025  if (!MOReg->isUndef())
5026  InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5027  MOReg->getSubReg(), ARM::ssub_1));
5028  return true;
5029  }
5030  llvm_unreachable("Target dependent opcode missing");
5031 }
5032 
5034  const MachineInstr &MI, unsigned DefIdx,
5035  RegSubRegPairAndIdx &InputReg) const {
5036  assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5037  assert(MI.isExtractSubregLike() && "Invalid kind of instruction");
5038 
5039  switch (MI.getOpcode()) {
5040  case ARM::VMOVRRD:
5041  // rX, rY = VMOVRRD dZ
5042  // is the same as:
5043  // rX = EXTRACT_SUBREG dZ, ssub_0
5044  // rY = EXTRACT_SUBREG dZ, ssub_1
5045  const MachineOperand &MOReg = MI.getOperand(2);
5046  if (MOReg.isUndef())
5047  return false;
5048  InputReg.Reg = MOReg.getReg();
5049  InputReg.SubReg = MOReg.getSubReg();
5050  InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
5051  return true;
5052  }
5053  llvm_unreachable("Target dependent opcode missing");
5054 }
5055 
5057  const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
5058  RegSubRegPairAndIdx &InsertedReg) const {
5059  assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5060  assert(MI.isInsertSubregLike() && "Invalid kind of instruction");
5061 
5062  switch (MI.getOpcode()) {
5063  case ARM::VSETLNi32:
5064  // dX = VSETLNi32 dY, rZ, imm
5065  const MachineOperand &MOBaseReg = MI.getOperand(1);
5066  const MachineOperand &MOInsertedReg = MI.getOperand(2);
5067  if (MOInsertedReg.isUndef())
5068  return false;
5069  const MachineOperand &MOIndex = MI.getOperand(3);
5070  BaseReg.Reg = MOBaseReg.getReg();
5071  BaseReg.SubReg = MOBaseReg.getSubReg();
5072 
5073  InsertedReg.Reg = MOInsertedReg.getReg();
5074  InsertedReg.SubReg = MOInsertedReg.getSubReg();
5075  InsertedReg.SubIdx = MOIndex.getImm() == 0 ? ARM::ssub_0 : ARM::ssub_1;
5076  return true;
5077  }
5078  llvm_unreachable("Target dependent opcode missing");
5079 }
5080 
5081 std::pair<unsigned, unsigned>
5083  const unsigned Mask = ARMII::MO_OPTION_MASK;
5084  return std::make_pair(TF & Mask, TF & ~Mask);
5085 }
5086 
5089  using namespace ARMII;
5090 
5091  static const std::pair<unsigned, const char *> TargetFlags[] = {
5092  {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"}};
5093  return makeArrayRef(TargetFlags);
5094 }
5095 
5098  using namespace ARMII;
5099 
5100  static const std::pair<unsigned, const char *> TargetFlags[] = {
5101  {MO_COFFSTUB, "arm-coffstub"},
5102  {MO_GOT, "arm-got"},
5103  {MO_SBREL, "arm-sbrel"},
5104  {MO_DLLIMPORT, "arm-dllimport"},
5105  {MO_SECREL, "arm-secrel"},
5106  {MO_NONLAZY, "arm-nonlazy"}};
5107  return makeArrayRef(TargetFlags);
5108 }
bool isLdstSoMinusReg(const MachineInstr &MI, unsigned Op) const
MachineConstantPoolValue * MachineCPVal
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const override
bool getInsertSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const override
Build the equivalent inputs of a INSERT_SUBREG for the given MI and DefIdx.
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
bool checkVLDnAccessAlignment() const
Definition: ARMSubtarget.h:618
BranchProbability getCompl() const
int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate...
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:632
instr_iterator instr_end()
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
unsigned getT2SOImmTwoPartFirst(unsigned Imm)
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isThumb() const
Definition: ARMSubtarget.h:711
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool DefinesPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred) const override
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, unsigned Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before...
unsigned getRegister(unsigned i) const
Return the specified register in the class.
bool isExtractSubregLike(QueryType Type=IgnoreBundle) const
Return true if this instruction behaves the same way as the generic EXTRACT_SUBREG instructions...
Definition: MachineInstr.h:781
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:382
ARMConstantPoolValue - ARM specific constantpool value.
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
bool expandPostRAPseudo(MachineInstr &MI) const override
void setIsUndef(bool Val=true)
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, int CmpMask, bool CommonUse)
isSuitableForMask - Identify a suitable &#39;and&#39; instruction that operates on the given source register ...
unsigned Reg
bool isPredicated(const MachineInstr &MI) const override
virtual MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const
Clones instruction or the whole instruction bundle Orig and insert into MBB before InsertBefore...
unsigned getSubReg() const
bool isPredicable(QueryType Type=AllInBundle) const
Return true if this instruction has a predicate operand that controls execution.
Definition: MachineInstr.h:686
bool isV8EligibleForIT(const InstrType *Instr)
Definition: ARMFeatures.h:24
static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, unsigned SReg, unsigned &Lane)
bool isRegSequence() const
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:320
bool isSOImmTwoPartVal(unsigned V)
isSOImmTwoPartVal - Return true if the specified value can be obtained by or&#39;ing together two SOImmVa...
VarInfo - This represents the regions where a virtual register is live in the program.
Definition: LiveVariables.h:78
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
void setIsDead(bool Val=true)
unsigned getPartialRegUpdateClearance(const MachineInstr &, unsigned, const TargetRegisterInfo *) const override
setjmp/longjmp based exceptions
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:684
bool isLdstScaledRegNotPlusLsl2(const MachineInstr &MI, unsigned Op) const
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
bool mayLoad() const
Return true if this instruction could possibly read memory.
Definition: MCInstrDesc.h:398
unsigned getSOImmTwoPartSecond(unsigned V)
getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal, return the second chunk of ...
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:458
bool isCopyLike() const
Return true if the instruction behaves like a copy.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
static bool isThumb(const MCSubtargetInfo &STI)
unsigned getSpillSize(const TargetRegisterClass &RC) const
Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...
static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1)
return AArch64::GPR64RegClass contains(Reg)
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool removeKill(MachineInstr &MI)
removeKill - Delete a kill corresponding to the specified machine instruction.
Definition: LiveVariables.h:93
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
void clearKillInfo()
Clears kill flags on all operands.
static bool isCPSRDefined(const MachineInstr &MI)
Definition: