LLVM  9.0.0svn
ARMBaseInstrInfo.cpp
Go to the documentation of this file.
1 //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the Base ARM implementation of the TargetInstrInfo class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "ARMBaseInstrInfo.h"
14 #include "ARMBaseRegisterInfo.h"
15 #include "ARMConstantPoolValue.h"
16 #include "ARMFeatures.h"
17 #include "ARMHazardRecognizer.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMSubtarget.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Triple.h"
42 #include "llvm/IR/Attributes.h"
43 #include "llvm/IR/Constants.h"
44 #include "llvm/IR/DebugLoc.h"
45 #include "llvm/IR/Function.h"
46 #include "llvm/IR/GlobalValue.h"
47 #include "llvm/MC/MCAsmInfo.h"
48 #include "llvm/MC/MCInstrDesc.h"
51 #include "llvm/Support/Casting.h"
53 #include "llvm/Support/Compiler.h"
54 #include "llvm/Support/Debug.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <cstdint>
61 #include <iterator>
62 #include <new>
63 #include <utility>
64 #include <vector>
65 
66 using namespace llvm;
67 
68 #define DEBUG_TYPE "arm-instrinfo"
69 
70 #define GET_INSTRINFO_CTOR_DTOR
71 #include "ARMGenInstrInfo.inc"
72 
73 static cl::opt<bool>
74 EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
75  cl::desc("Enable ARM 2-addr to 3-addr conv"));
76 
77 /// ARM_MLxEntry - Record information about MLA / MLS instructions.
78 struct ARM_MLxEntry {
79  uint16_t MLxOpc; // MLA / MLS opcode
80  uint16_t MulOpc; // Expanded multiplication opcode
81  uint16_t AddSubOpc; // Expanded add / sub opcode
82  bool NegAcc; // True if the acc is negated before the add / sub.
83  bool HasLane; // True if instruction has an extra "lane" operand.
84 };
85 
86 static const ARM_MLxEntry ARM_MLxTable[] = {
87  // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
88  // fp scalar ops
89  { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
90  { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
91  { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
92  { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
93  { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
94  { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
95  { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
96  { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
97 
98  // fp SIMD ops
99  { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
100  { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
101  { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
102  { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
103  { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
104  { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
105  { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
106  { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
107 };
108 
110  : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
111  Subtarget(STI) {
112  for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
113  if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
114  llvm_unreachable("Duplicated entries?");
115  MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
116  MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
117  }
118 }
119 
120 // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
121 // currently defaults to no prepass hazard recognizer.
124  const ScheduleDAG *DAG) const {
125  if (usePreRAHazardRecognizer()) {
126  const InstrItineraryData *II =
127  static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
128  return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
129  }
131 }
132 
135  const ScheduleDAG *DAG) const {
136  if (Subtarget.isThumb2() || Subtarget.hasVFP2())
137  return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG);
139 }
140 
143  // FIXME: Thumb2 support.
144 
145  if (!EnableARM3Addr)
146  return nullptr;
147 
148  MachineFunction &MF = *MI.getParent()->getParent();
149  uint64_t TSFlags = MI.getDesc().TSFlags;
150  bool isPre = false;
151  switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
152  default: return nullptr;
153  case ARMII::IndexModePre:
154  isPre = true;
155  break;
157  break;
158  }
159 
160  // Try splitting an indexed load/store to an un-indexed one plus an add/sub
161  // operation.
162  unsigned MemOpc = getUnindexedOpcode(MI.getOpcode());
163  if (MemOpc == 0)
164  return nullptr;
165 
166  MachineInstr *UpdateMI = nullptr;
167  MachineInstr *MemMI = nullptr;
168  unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
169  const MCInstrDesc &MCID = MI.getDesc();
170  unsigned NumOps = MCID.getNumOperands();
171  bool isLoad = !MI.mayStore();
172  const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0);
173  const MachineOperand &Base = MI.getOperand(2);
174  const MachineOperand &Offset = MI.getOperand(NumOps - 3);
175  unsigned WBReg = WB.getReg();
176  unsigned BaseReg = Base.getReg();
177  unsigned OffReg = Offset.getReg();
178  unsigned OffImm = MI.getOperand(NumOps - 2).getImm();
179  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm();
180  switch (AddrMode) {
181  default: llvm_unreachable("Unknown indexed op!");
182  case ARMII::AddrMode2: {
183  bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
184  unsigned Amt = ARM_AM::getAM2Offset(OffImm);
185  if (OffReg == 0) {
186  if (ARM_AM::getSOImmVal(Amt) == -1)
187  // Can't encode it in a so_imm operand. This transformation will
188  // add more than 1 instruction. Abandon!
189  return nullptr;
190  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
191  get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
192  .addReg(BaseReg)
193  .addImm(Amt)
194  .add(predOps(Pred))
195  .add(condCodeOp());
196  } else if (Amt != 0) {
198  unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
199  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
200  get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
201  .addReg(BaseReg)
202  .addReg(OffReg)
203  .addReg(0)
204  .addImm(SOOpc)
205  .add(predOps(Pred))
206  .add(condCodeOp());
207  } else
208  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
209  get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
210  .addReg(BaseReg)
211  .addReg(OffReg)
212  .add(predOps(Pred))
213  .add(condCodeOp());
214  break;
215  }
216  case ARMII::AddrMode3 : {
217  bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
218  unsigned Amt = ARM_AM::getAM3Offset(OffImm);
219  if (OffReg == 0)
220  // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
221  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
222  get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
223  .addReg(BaseReg)
224  .addImm(Amt)
225  .add(predOps(Pred))
226  .add(condCodeOp());
227  else
228  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
229  get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
230  .addReg(BaseReg)
231  .addReg(OffReg)
232  .add(predOps(Pred))
233  .add(condCodeOp());
234  break;
235  }
236  }
237 
238  std::vector<MachineInstr*> NewMIs;
239  if (isPre) {
240  if (isLoad)
241  MemMI =
242  BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
243  .addReg(WBReg)
244  .addImm(0)
245  .addImm(Pred);
246  else
247  MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
248  .addReg(MI.getOperand(1).getReg())
249  .addReg(WBReg)
250  .addReg(0)
251  .addImm(0)
252  .addImm(Pred);
253  NewMIs.push_back(MemMI);
254  NewMIs.push_back(UpdateMI);
255  } else {
256  if (isLoad)
257  MemMI =
258  BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
259  .addReg(BaseReg)
260  .addImm(0)
261  .addImm(Pred);
262  else
263  MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
264  .addReg(MI.getOperand(1).getReg())
265  .addReg(BaseReg)
266  .addReg(0)
267  .addImm(0)
268  .addImm(Pred);
269  if (WB.isDead())
270  UpdateMI->getOperand(0).setIsDead();
271  NewMIs.push_back(UpdateMI);
272  NewMIs.push_back(MemMI);
273  }
274 
275  // Transfer LiveVariables states, kill / dead info.
276  if (LV) {
277  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
278  MachineOperand &MO = MI.getOperand(i);
280  unsigned Reg = MO.getReg();
281 
283  if (MO.isDef()) {
284  MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
285  if (MO.isDead())
286  LV->addVirtualRegisterDead(Reg, *NewMI);
287  }
288  if (MO.isUse() && MO.isKill()) {
289  for (unsigned j = 0; j < 2; ++j) {
290  // Look at the two new MI's in reverse order.
291  MachineInstr *NewMI = NewMIs[j];
292  if (!NewMI->readsRegister(Reg))
293  continue;
294  LV->addVirtualRegisterKilled(Reg, *NewMI);
295  if (VI.removeKill(MI))
296  VI.Kills.push_back(NewMI);
297  break;
298  }
299  }
300  }
301  }
302  }
303 
305  MFI->insert(MBBI, NewMIs[1]);
306  MFI->insert(MBBI, NewMIs[0]);
307  return NewMIs[0];
308 }
309 
310 // Branch analysis.
312  MachineBasicBlock *&TBB,
313  MachineBasicBlock *&FBB,
315  bool AllowModify) const {
316  TBB = nullptr;
317  FBB = nullptr;
318 
320  if (I == MBB.begin())
321  return false; // Empty blocks are easy.
322  --I;
323 
324  // Walk backwards from the end of the basic block until the branch is
325  // analyzed or we give up.
326  while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
327  // Flag to be raised on unanalyzeable instructions. This is useful in cases
328  // where we want to clean up on the end of the basic block before we bail
329  // out.
330  bool CantAnalyze = false;
331 
332  // Skip over DEBUG values and predicated nonterminators.
333  while (I->isDebugInstr() || !I->isTerminator()) {
334  if (I == MBB.begin())
335  return false;
336  --I;
337  }
338 
339  if (isIndirectBranchOpcode(I->getOpcode()) ||
340  isJumpTableBranchOpcode(I->getOpcode())) {
341  // Indirect branches and jump tables can't be analyzed, but we still want
342  // to clean up any instructions at the tail of the basic block.
343  CantAnalyze = true;
344  } else if (isUncondBranchOpcode(I->getOpcode())) {
345  TBB = I->getOperand(0).getMBB();
346  } else if (isCondBranchOpcode(I->getOpcode())) {
347  // Bail out if we encounter multiple conditional branches.
348  if (!Cond.empty())
349  return true;
350 
351  assert(!FBB && "FBB should have been null.");
352  FBB = TBB;
353  TBB = I->getOperand(0).getMBB();
354  Cond.push_back(I->getOperand(1));
355  Cond.push_back(I->getOperand(2));
356  } else if (I->isReturn()) {
357  // Returns can't be analyzed, but we should run cleanup.
358  CantAnalyze = !isPredicated(*I);
359  } else {
360  // We encountered other unrecognized terminator. Bail out immediately.
361  return true;
362  }
363 
364  // Cleanup code - to be run for unpredicated unconditional branches and
365  // returns.
366  if (!isPredicated(*I) &&
367  (isUncondBranchOpcode(I->getOpcode()) ||
368  isIndirectBranchOpcode(I->getOpcode()) ||
369  isJumpTableBranchOpcode(I->getOpcode()) ||
370  I->isReturn())) {
371  // Forget any previous condition branch information - it no longer applies.
372  Cond.clear();
373  FBB = nullptr;
374 
375  // If we can modify the function, delete everything below this
376  // unconditional branch.
377  if (AllowModify) {
378  MachineBasicBlock::iterator DI = std::next(I);
379  while (DI != MBB.end()) {
380  MachineInstr &InstToDelete = *DI;
381  ++DI;
382  InstToDelete.eraseFromParent();
383  }
384  }
385  }
386 
387  if (CantAnalyze)
388  return true;
389 
390  if (I == MBB.begin())
391  return false;
392 
393  --I;
394  }
395 
396  // We made it past the terminators without bailing out - we must have
397  // analyzed this branch successfully.
398  return false;
399 }
400 
402  int *BytesRemoved) const {
403  assert(!BytesRemoved && "code size not handled");
404 
406  if (I == MBB.end())
407  return 0;
408 
409  if (!isUncondBranchOpcode(I->getOpcode()) &&
410  !isCondBranchOpcode(I->getOpcode()))
411  return 0;
412 
413  // Remove the branch.
414  I->eraseFromParent();
415 
416  I = MBB.end();
417 
418  if (I == MBB.begin()) return 1;
419  --I;
420  if (!isCondBranchOpcode(I->getOpcode()))
421  return 1;
422 
423  // Remove the branch.
424  I->eraseFromParent();
425  return 2;
426 }
427 
429  MachineBasicBlock *TBB,
430  MachineBasicBlock *FBB,
432  const DebugLoc &DL,
433  int *BytesAdded) const {
434  assert(!BytesAdded && "code size not handled");
436  int BOpc = !AFI->isThumbFunction()
437  ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
438  int BccOpc = !AFI->isThumbFunction()
439  ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
440  bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
441 
442  // Shouldn't be a fall through.
443  assert(TBB && "insertBranch must not be told to insert a fallthrough");
444  assert((Cond.size() == 2 || Cond.size() == 0) &&
445  "ARM branch conditions have two components!");
446 
447  // For conditional branches, we use addOperand to preserve CPSR flags.
448 
449  if (!FBB) {
450  if (Cond.empty()) { // Unconditional branch?
451  if (isThumb)
452  BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL));
453  else
454  BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
455  } else
456  BuildMI(&MBB, DL, get(BccOpc))
457  .addMBB(TBB)
458  .addImm(Cond[0].getImm())
459  .add(Cond[1]);
460  return 1;
461  }
462 
463  // Two-way conditional branch.
464  BuildMI(&MBB, DL, get(BccOpc))
465  .addMBB(TBB)
466  .addImm(Cond[0].getImm())
467  .add(Cond[1]);
468  if (isThumb)
469  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
470  else
471  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
472  return 2;
473 }
474 
477  ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
478  Cond[0].setImm(ARMCC::getOppositeCondition(CC));
479  return false;
480 }
481 
483  if (MI.isBundle()) {
486  while (++I != E && I->isInsideBundle()) {
487  int PIdx = I->findFirstPredOperandIdx();
488  if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
489  return true;
490  }
491  return false;
492  }
493 
494  int PIdx = MI.findFirstPredOperandIdx();
495  return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
496 }
497 
500  unsigned Opc = MI.getOpcode();
501  if (isUncondBranchOpcode(Opc)) {
502  MI.setDesc(get(getMatchingCondBranchOpcode(Opc)));
504  .addImm(Pred[0].getImm())
505  .addReg(Pred[1].getReg());
506  return true;
507  }
508 
509  int PIdx = MI.findFirstPredOperandIdx();
510  if (PIdx != -1) {
511  MachineOperand &PMO = MI.getOperand(PIdx);
512  PMO.setImm(Pred[0].getImm());
513  MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
514  return true;
515  }
516  return false;
517 }
518 
520  ArrayRef<MachineOperand> Pred2) const {
521  if (Pred1.size() > 2 || Pred2.size() > 2)
522  return false;
523 
524  ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
525  ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
526  if (CC1 == CC2)
527  return true;
528 
529  switch (CC1) {
530  default:
531  return false;
532  case ARMCC::AL:
533  return true;
534  case ARMCC::HS:
535  return CC2 == ARMCC::HI;
536  case ARMCC::LS:
537  return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
538  case ARMCC::GE:
539  return CC2 == ARMCC::GT;
540  case ARMCC::LE:
541  return CC2 == ARMCC::LT;
542  }
543 }
544 
546  MachineInstr &MI, std::vector<MachineOperand> &Pred) const {
547  bool Found = false;
548  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
549  const MachineOperand &MO = MI.getOperand(i);
550  if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) ||
551  (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) {
552  Pred.push_back(MO);
553  Found = true;
554  }
555  }
556 
557  return Found;
558 }
559 
561  for (const auto &MO : MI.operands())
562  if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
563  return true;
564  return false;
565 }
566 
568  unsigned Op) const {
569  const MachineOperand &Offset = MI.getOperand(Op + 1);
570  return Offset.getReg() != 0;
571 }
572 
573 // Load with negative register offset requires additional 1cyc and +I unit
574 // for Cortex A57
576  unsigned Op) const {
577  const MachineOperand &Offset = MI.getOperand(Op + 1);
578  const MachineOperand &Opc = MI.getOperand(Op + 2);
579  assert(Opc.isImm());
580  assert(Offset.isReg());
581  int64_t OpcImm = Opc.getImm();
582 
583  bool isSub = ARM_AM::getAM3Op(OpcImm) == ARM_AM::sub;
584  return (isSub && Offset.getReg() != 0);
585 }
586 
588  unsigned Op) const {
589  const MachineOperand &Opc = MI.getOperand(Op + 2);
590  unsigned OffImm = Opc.getImm();
591  return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
592 }
593 
594 // Load, scaled register offset, not plus LSL2
596  unsigned Op) const {
597  const MachineOperand &Opc = MI.getOperand(Op + 2);
598  unsigned OffImm = Opc.getImm();
599 
600  bool isAdd = ARM_AM::getAM2Op(OffImm) == ARM_AM::add;
601  unsigned Amt = ARM_AM::getAM2Offset(OffImm);
603  if (ShiftOpc == ARM_AM::no_shift) return false; // not scaled
604  bool SimpleScaled = (isAdd && ShiftOpc == ARM_AM::lsl && Amt == 2);
605  return !SimpleScaled;
606 }
607 
608 // Minus reg for ldstso addr mode
610  unsigned Op) const {
611  unsigned OffImm = MI.getOperand(Op + 2).getImm();
612  return ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
613 }
614 
615 // Load, scaled register offset
617  unsigned Op) const {
618  unsigned OffImm = MI.getOperand(Op + 2).getImm();
619  return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
620 }
621 
622 static bool isEligibleForITBlock(const MachineInstr *MI) {
623  switch (MI->getOpcode()) {
624  default: return true;
625  case ARM::tADC: // ADC (register) T1
626  case ARM::tADDi3: // ADD (immediate) T1
627  case ARM::tADDi8: // ADD (immediate) T2
628  case ARM::tADDrr: // ADD (register) T1
629  case ARM::tAND: // AND (register) T1
630  case ARM::tASRri: // ASR (immediate) T1
631  case ARM::tASRrr: // ASR (register) T1
632  case ARM::tBIC: // BIC (register) T1
633  case ARM::tEOR: // EOR (register) T1
634  case ARM::tLSLri: // LSL (immediate) T1
635  case ARM::tLSLrr: // LSL (register) T1
636  case ARM::tLSRri: // LSR (immediate) T1
637  case ARM::tLSRrr: // LSR (register) T1
638  case ARM::tMUL: // MUL T1
639  case ARM::tMVN: // MVN (register) T1
640  case ARM::tORR: // ORR (register) T1
641  case ARM::tROR: // ROR (register) T1
642  case ARM::tRSB: // RSB (immediate) T1
643  case ARM::tSBC: // SBC (register) T1
644  case ARM::tSUBi3: // SUB (immediate) T1
645  case ARM::tSUBi8: // SUB (immediate) T2
646  case ARM::tSUBrr: // SUB (register) T1
647  return !ARMBaseInstrInfo::isCPSRDefined(*MI);
648  }
649 }
650 
651 /// isPredicable - Return true if the specified instruction can be predicated.
652 /// By default, this returns true for every instruction with a
653 /// PredicateOperand.
655  if (!MI.isPredicable())
656  return false;
657 
658  if (MI.isBundle())
659  return false;
660 
661  if (!isEligibleForITBlock(&MI))
662  return false;
663 
664  const ARMFunctionInfo *AFI =
666 
667  // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
668  // In their ARM encoding, they can't be encoded in a conditional form.
670  return false;
671 
672  if (AFI->isThumb2Function()) {
673  if (getSubtarget().restrictIT())
674  return isV8EligibleForIT(&MI);
675  }
676 
677  return true;
678 }
679 
680 namespace llvm {
681 
682 template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
683  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
684  const MachineOperand &MO = MI->getOperand(i);
685  if (!MO.isReg() || MO.isUndef() || MO.isUse())
686  continue;
687  if (MO.getReg() != ARM::CPSR)
688  continue;
689  if (!MO.isDead())
690  return false;
691  }
692  // all definitions of CPSR are dead
693  return true;
694 }
695 
696 } // end namespace llvm
697 
698 /// GetInstSize - Return the size of the specified MachineInstr.
699 ///
701  const MachineBasicBlock &MBB = *MI.getParent();
702  const MachineFunction *MF = MBB.getParent();
703  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
704 
705  const MCInstrDesc &MCID = MI.getDesc();
706  if (MCID.getSize())
707  return MCID.getSize();
708 
709  // If this machine instr is an inline asm, measure it.
710  if (MI.getOpcode() == ARM::INLINEASM) {
711  unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
712  if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction())
713  Size = alignTo(Size, 4);
714  return Size;
715  }
716  unsigned Opc = MI.getOpcode();
717  switch (Opc) {
718  default:
719  // pseudo-instruction sizes are zero.
720  return 0;
721  case TargetOpcode::BUNDLE:
722  return getInstBundleLength(MI);
723  case ARM::MOVi16_ga_pcrel:
724  case ARM::MOVTi16_ga_pcrel:
725  case ARM::t2MOVi16_ga_pcrel:
726  case ARM::t2MOVTi16_ga_pcrel:
727  return 4;
728  case ARM::MOVi32imm:
729  case ARM::t2MOVi32imm:
730  return 8;
731  case ARM::CONSTPOOL_ENTRY:
732  case ARM::JUMPTABLE_INSTS:
733  case ARM::JUMPTABLE_ADDRS:
734  case ARM::JUMPTABLE_TBB:
735  case ARM::JUMPTABLE_TBH:
736  // If this machine instr is a constant pool entry, its size is recorded as
737  // operand #2.
738  return MI.getOperand(2).getImm();
739  case ARM::Int_eh_sjlj_longjmp:
740  return 16;
741  case ARM::tInt_eh_sjlj_longjmp:
742  return 10;
743  case ARM::tInt_WIN_eh_sjlj_longjmp:
744  return 12;
745  case ARM::Int_eh_sjlj_setjmp:
746  case ARM::Int_eh_sjlj_setjmp_nofp:
747  return 20;
748  case ARM::tInt_eh_sjlj_setjmp:
749  case ARM::t2Int_eh_sjlj_setjmp:
750  case ARM::t2Int_eh_sjlj_setjmp_nofp:
751  return 12;
752  case ARM::SPACE:
753  return MI.getOperand(1).getImm();
754  }
755 }
756 
757 unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
758  unsigned Size = 0;
761  while (++I != E && I->isInsideBundle()) {
762  assert(!I->isBundle() && "No nested bundle!");
763  Size += getInstSizeInBytes(*I);
764  }
765  return Size;
766 }
767 
770  unsigned DestReg, bool KillSrc,
771  const ARMSubtarget &Subtarget) const {
772  unsigned Opc = Subtarget.isThumb()
773  ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
774  : ARM::MRS;
775 
776  MachineInstrBuilder MIB =
777  BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
778 
779  // There is only 1 A/R class MRS instruction, and it always refers to
780  // APSR. However, there are lots of other possibilities on M-class cores.
781  if (Subtarget.isMClass())
782  MIB.addImm(0x800);
783 
784  MIB.add(predOps(ARMCC::AL))
785  .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
786 }
787 
790  unsigned SrcReg, bool KillSrc,
791  const ARMSubtarget &Subtarget) const {
792  unsigned Opc = Subtarget.isThumb()
793  ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
794  : ARM::MSR;
795 
796  MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
797 
798  if (Subtarget.isMClass())
799  MIB.addImm(0x800);
800  else
801  MIB.addImm(8);
802 
803  MIB.addReg(SrcReg, getKillRegState(KillSrc))
806 }
807 
810  const DebugLoc &DL, unsigned DestReg,
811  unsigned SrcReg, bool KillSrc) const {
812  bool GPRDest = ARM::GPRRegClass.contains(DestReg);
813  bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
814 
815  if (GPRDest && GPRSrc) {
816  BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
817  .addReg(SrcReg, getKillRegState(KillSrc))
819  .add(condCodeOp());
820  return;
821  }
822 
823  bool SPRDest = ARM::SPRRegClass.contains(DestReg);
824  bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
825 
826  unsigned Opc = 0;
827  if (SPRDest && SPRSrc)
828  Opc = ARM::VMOVS;
829  else if (GPRDest && SPRSrc)
830  Opc = ARM::VMOVRS;
831  else if (SPRDest && GPRSrc)
832  Opc = ARM::VMOVSR;
833  else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP())
834  Opc = ARM::VMOVD;
835  else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
836  Opc = ARM::VORRq;
837 
838  if (Opc) {
839  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
840  MIB.addReg(SrcReg, getKillRegState(KillSrc));
841  if (Opc == ARM::VORRq)
842  MIB.addReg(SrcReg, getKillRegState(KillSrc));
843  MIB.add(predOps(ARMCC::AL));
844  return;
845  }
846 
847  // Handle register classes that require multiple instructions.
848  unsigned BeginIdx = 0;
849  unsigned SubRegs = 0;
850  int Spacing = 1;
851 
852  // Use VORRq when possible.
853  if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
854  Opc = ARM::VORRq;
855  BeginIdx = ARM::qsub_0;
856  SubRegs = 2;
857  } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
858  Opc = ARM::VORRq;
859  BeginIdx = ARM::qsub_0;
860  SubRegs = 4;
861  // Fall back to VMOVD.
862  } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
863  Opc = ARM::VMOVD;
864  BeginIdx = ARM::dsub_0;
865  SubRegs = 2;
866  } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
867  Opc = ARM::VMOVD;
868  BeginIdx = ARM::dsub_0;
869  SubRegs = 3;
870  } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
871  Opc = ARM::VMOVD;
872  BeginIdx = ARM::dsub_0;
873  SubRegs = 4;
874  } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
875  Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
876  BeginIdx = ARM::gsub_0;
877  SubRegs = 2;
878  } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
879  Opc = ARM::VMOVD;
880  BeginIdx = ARM::dsub_0;
881  SubRegs = 2;
882  Spacing = 2;
883  } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
884  Opc = ARM::VMOVD;
885  BeginIdx = ARM::dsub_0;
886  SubRegs = 3;
887  Spacing = 2;
888  } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
889  Opc = ARM::VMOVD;
890  BeginIdx = ARM::dsub_0;
891  SubRegs = 4;
892  Spacing = 2;
893  } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) {
894  Opc = ARM::VMOVS;
895  BeginIdx = ARM::ssub_0;
896  SubRegs = 2;
897  } else if (SrcReg == ARM::CPSR) {
898  copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
899  return;
900  } else if (DestReg == ARM::CPSR) {
901  copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
902  return;
903  }
904 
905  assert(Opc && "Impossible reg-to-reg copy");
906 
909 
910  // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
911  if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
912  BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
913  Spacing = -Spacing;
914  }
915 #ifndef NDEBUG
916  SmallSet<unsigned, 4> DstRegs;
917 #endif
918  for (unsigned i = 0; i != SubRegs; ++i) {
919  unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
920  unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
921  assert(Dst && Src && "Bad sub-register");
922 #ifndef NDEBUG
923  assert(!DstRegs.count(Src) && "destructive vector copy");
924  DstRegs.insert(Dst);
925 #endif
926  Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
927  // VORR takes two source operands.
928  if (Opc == ARM::VORRq)
929  Mov.addReg(Src);
930  Mov = Mov.add(predOps(ARMCC::AL));
931  // MOVr can set CC.
932  if (Opc == ARM::MOVr)
933  Mov = Mov.add(condCodeOp());
934  }
935  // Add implicit super-register defs and kills to the last instruction.
936  Mov->addRegisterDefined(DestReg, TRI);
937  if (KillSrc)
938  Mov->addRegisterKilled(SrcReg, TRI);
939 }
940 
942  const MachineOperand *&Src,
943  const MachineOperand *&Dest) const {
944  // VMOVRRD is also a copy instruction but it requires
945  // special way of handling. It is more complex copy version
946  // and since that we are not considering it. For recognition
947  // of such instruction isExtractSubregLike MI interface fuction
948  // could be used.
949  // VORRq is considered as a move only if two inputs are
950  // the same register.
951  if (!MI.isMoveReg() ||
952  (MI.getOpcode() == ARM::VORRq &&
953  MI.getOperand(1).getReg() != MI.getOperand(2).getReg()))
954  return false;
955  Dest = &MI.getOperand(0);
956  Src = &MI.getOperand(1);
957  return true;
958 }
959 
960 const MachineInstrBuilder &
962  unsigned SubIdx, unsigned State,
963  const TargetRegisterInfo *TRI) const {
964  if (!SubIdx)
965  return MIB.addReg(Reg, State);
966 
968  return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
969  return MIB.addReg(Reg, State, SubIdx);
970 }
971 
974  unsigned SrcReg, bool isKill, int FI,
975  const TargetRegisterClass *RC,
976  const TargetRegisterInfo *TRI) const {
977  MachineFunction &MF = *MBB.getParent();
978  MachineFrameInfo &MFI = MF.getFrameInfo();
979  unsigned Align = MFI.getObjectAlignment(FI);
980 
983  MFI.getObjectSize(FI), Align);
984 
985  switch (TRI->getSpillSize(*RC)) {
986  case 2:
987  if (ARM::HPRRegClass.hasSubClassEq(RC)) {
988  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH))
989  .addReg(SrcReg, getKillRegState(isKill))
990  .addFrameIndex(FI)
991  .addImm(0)
992  .addMemOperand(MMO)
993  .add(predOps(ARMCC::AL));
994  } else
995  llvm_unreachable("Unknown reg class!");
996  break;
997  case 4:
998  if (ARM::GPRRegClass.hasSubClassEq(RC)) {
999  BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12))
1000  .addReg(SrcReg, getKillRegState(isKill))
1001  .addFrameIndex(FI)
1002  .addImm(0)
1003  .addMemOperand(MMO)
1004  .add(predOps(ARMCC::AL));
1005  } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1006  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS))
1007  .addReg(SrcReg, getKillRegState(isKill))
1008  .addFrameIndex(FI)
1009  .addImm(0)
1010  .addMemOperand(MMO)
1011  .add(predOps(ARMCC::AL));
1012  } else
1013  llvm_unreachable("Unknown reg class!");
1014  break;
1015  case 8:
1016  if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1017  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD))
1018  .addReg(SrcReg, getKillRegState(isKill))
1019  .addFrameIndex(FI)
1020  .addImm(0)
1021  .addMemOperand(MMO)
1022  .add(predOps(ARMCC::AL));
1023  } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1024  if (Subtarget.hasV5TEOps()) {
1025  MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD));
1026  AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1027  AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1028  MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1029  .add(predOps(ARMCC::AL));
1030  } else {
1031  // Fallback to STM instruction, which has existed since the dawn of
1032  // time.
1033  MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA))
1034  .addFrameIndex(FI)
1035  .addMemOperand(MMO)
1036  .add(predOps(ARMCC::AL));
1037  AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1038  AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1039  }
1040  } else
1041  llvm_unreachable("Unknown reg class!");
1042  break;
1043  case 16:
1044  if (ARM::DPairRegClass.hasSubClassEq(RC)) {
1045  // Use aligned spills if the stack can be realigned.
1046  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1047  BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
1048  .addFrameIndex(FI)
1049  .addImm(16)
1050  .addReg(SrcReg, getKillRegState(isKill))
1051  .addMemOperand(MMO)
1052  .add(predOps(ARMCC::AL));
1053  } else {
1054  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA))
1055  .addReg(SrcReg, getKillRegState(isKill))
1056  .addFrameIndex(FI)
1057  .addMemOperand(MMO)
1058  .add(predOps(ARMCC::AL));
1059  }
1060  } else
1061  llvm_unreachable("Unknown reg class!");
1062  break;
1063  case 24:
1064  if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1065  // Use aligned spills if the stack can be realigned.
1066  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1067  BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))
1068  .addFrameIndex(FI)
1069  .addImm(16)
1070  .addReg(SrcReg, getKillRegState(isKill))
1071  .addMemOperand(MMO)
1072  .add(predOps(ARMCC::AL));
1073  } else {
1074  MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(),
1075  get(ARM::VSTMDIA))
1076  .addFrameIndex(FI)
1077  .add(predOps(ARMCC::AL))
1078  .addMemOperand(MMO);
1079  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1080  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1081  AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1082  }
1083  } else
1084  llvm_unreachable("Unknown reg class!");
1085  break;
1086  case 32:
1087  if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
1088  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1089  // FIXME: It's possible to only store part of the QQ register if the
1090  // spilled def has a sub-register index.
1091  BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo))
1092  .addFrameIndex(FI)
1093  .addImm(16)
1094  .addReg(SrcReg, getKillRegState(isKill))
1095  .addMemOperand(MMO)
1096  .add(predOps(ARMCC::AL));
1097  } else {
1098  MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(),
1099  get(ARM::VSTMDIA))
1100  .addFrameIndex(FI)
1101  .add(predOps(ARMCC::AL))
1102  .addMemOperand(MMO);
1103  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1104  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1105  MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1106  AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1107  }
1108  } else
1109  llvm_unreachable("Unknown reg class!");
1110  break;
1111  case 64:
1112  if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1113  MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA))
1114  .addFrameIndex(FI)
1115  .add(predOps(ARMCC::AL))
1116  .addMemOperand(MMO);
1117  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1118  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1119  MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1120  MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1121  MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
1122  MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
1123  MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
1124  AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
1125  } else
1126  llvm_unreachable("Unknown reg class!");
1127  break;
1128  default:
1129  llvm_unreachable("Unknown reg class!");
1130  }
1131 }
1132 
1134  int &FrameIndex) const {
1135  switch (MI.getOpcode()) {
1136  default: break;
1137  case ARM::STRrs:
1138  case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
1139  if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1140  MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1141  MI.getOperand(3).getImm() == 0) {
1142  FrameIndex = MI.getOperand(1).getIndex();
1143  return MI.getOperand(0).getReg();
1144  }
1145  break;
1146  case ARM::STRi12:
1147  case ARM::t2STRi12:
1148  case ARM::tSTRspi:
1149  case ARM::VSTRD:
1150  case ARM::VSTRS:
1151  if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1152  MI.getOperand(2).getImm() == 0) {
1153  FrameIndex = MI.getOperand(1).getIndex();
1154  return MI.getOperand(0).getReg();
1155  }
1156  break;
1157  case ARM::VST1q64:
1158  case ARM::VST1d64TPseudo:
1159  case ARM::VST1d64QPseudo:
1160  if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
1161  FrameIndex = MI.getOperand(0).getIndex();
1162  return MI.getOperand(2).getReg();
1163  }
1164  break;
1165  case ARM::VSTMQIA:
1166  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1167  FrameIndex = MI.getOperand(1).getIndex();
1168  return MI.getOperand(0).getReg();
1169  }
1170  break;
1171  }
1172 
1173  return 0;
1174 }
1175 
1177  int &FrameIndex) const {
1179  if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses)) {
1180  FrameIndex =
1181  cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1182  ->getFrameIndex();
1183  return true;
1184  }
1185  return false;
1186 }
1187 
1188 void ARMBaseInstrInfo::
1190  unsigned DestReg, int FI,
1191  const TargetRegisterClass *RC,
1192  const TargetRegisterInfo *TRI) const {
1193  DebugLoc DL;
1194  if (I != MBB.end()) DL = I->getDebugLoc();
1195  MachineFunction &MF = *MBB.getParent();
1196  MachineFrameInfo &MFI = MF.getFrameInfo();
1197  unsigned Align = MFI.getObjectAlignment(FI);
1200  MFI.getObjectSize(FI), Align);
1201 
1202  switch (TRI->getSpillSize(*RC)) {
1203  case 2:
1204  if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1205  BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg)
1206  .addFrameIndex(FI)
1207  .addImm(0)
1208  .addMemOperand(MMO)
1209  .add(predOps(ARMCC::AL));
1210  } else
1211  llvm_unreachable("Unknown reg class!");
1212  break;
1213  case 4:
1214  if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1215  BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
1216  .addFrameIndex(FI)
1217  .addImm(0)
1218  .addMemOperand(MMO)
1219  .add(predOps(ARMCC::AL));
1220  } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1221  BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
1222  .addFrameIndex(FI)
1223  .addImm(0)
1224  .addMemOperand(MMO)
1225  .add(predOps(ARMCC::AL));
1226  } else
1227  llvm_unreachable("Unknown reg class!");
1228  break;
1229  case 8:
1230  if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1231  BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
1232  .addFrameIndex(FI)
1233  .addImm(0)
1234  .addMemOperand(MMO)
1235  .add(predOps(ARMCC::AL));
1236  } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1237  MachineInstrBuilder MIB;
1238 
1239  if (Subtarget.hasV5TEOps()) {
1240  MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
1241  AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1242  AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1243  MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1244  .add(predOps(ARMCC::AL));
1245  } else {
1246  // Fallback to LDM instruction, which has existed since the dawn of
1247  // time.
1248  MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
1249  .addFrameIndex(FI)
1250  .addMemOperand(MMO)
1251  .add(predOps(ARMCC::AL));
1252  MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1253  MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1254  }
1255 
1257  MIB.addReg(DestReg, RegState::ImplicitDefine);
1258  } else
1259  llvm_unreachable("Unknown reg class!");
1260  break;
1261  case 16:
1262  if (ARM::DPairRegClass.hasSubClassEq(RC)) {
1263  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1264  BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
1265  .addFrameIndex(FI)
1266  .addImm(16)
1267  .addMemOperand(MMO)
1268  .add(predOps(ARMCC::AL));
1269  } else {
1270  BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
1271  .addFrameIndex(FI)
1272  .addMemOperand(MMO)
1273  .add(predOps(ARMCC::AL));
1274  }
1275  } else
1276  llvm_unreachable("Unknown reg class!");
1277  break;
1278  case 24:
1279  if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1280  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1281  BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
1282  .addFrameIndex(FI)
1283  .addImm(16)
1284  .addMemOperand(MMO)
1285  .add(predOps(ARMCC::AL));
1286  } else {
1287  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1288  .addFrameIndex(FI)
1289  .addMemOperand(MMO)
1290  .add(predOps(ARMCC::AL));
1291  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1292  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1293  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1295  MIB.addReg(DestReg, RegState::ImplicitDefine);
1296  }
1297  } else
1298  llvm_unreachable("Unknown reg class!");
1299  break;
1300  case 32:
1301  if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
1302  if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1303  BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
1304  .addFrameIndex(FI)
1305  .addImm(16)
1306  .addMemOperand(MMO)
1307  .add(predOps(ARMCC::AL));
1308  } else {
1309  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1310  .addFrameIndex(FI)
1311  .add(predOps(ARMCC::AL))
1312  .addMemOperand(MMO);
1313  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1314  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1315  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1316  MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1318  MIB.addReg(DestReg, RegState::ImplicitDefine);
1319  }
1320  } else
1321  llvm_unreachable("Unknown reg class!");
1322  break;
1323  case 64:
1324  if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1325  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1326  .addFrameIndex(FI)
1327  .add(predOps(ARMCC::AL))
1328  .addMemOperand(MMO);
1329  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1330  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1331  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1332  MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1333  MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
1334  MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
1335  MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
1336  MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
1338  MIB.addReg(DestReg, RegState::ImplicitDefine);
1339  } else
1340  llvm_unreachable("Unknown reg class!");
1341  break;
1342  default:
1343  llvm_unreachable("Unknown regclass!");
1344  }
1345 }
1346 
1348  int &FrameIndex) const {
1349  switch (MI.getOpcode()) {
1350  default: break;
1351  case ARM::LDRrs:
1352  case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
1353  if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1354  MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1355  MI.getOperand(3).getImm() == 0) {
1356  FrameIndex = MI.getOperand(1).getIndex();
1357  return MI.getOperand(0).getReg();
1358  }
1359  break;
1360  case ARM::LDRi12:
1361  case ARM::t2LDRi12:
1362  case ARM::tLDRspi:
1363  case ARM::VLDRD:
1364  case ARM::VLDRS:
1365  if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1366  MI.getOperand(2).getImm() == 0) {
1367  FrameIndex = MI.getOperand(1).getIndex();
1368  return MI.getOperand(0).getReg();
1369  }
1370  break;
1371  case ARM::VLD1q64:
1372  case ARM::VLD1d8TPseudo:
1373  case ARM::VLD1d16TPseudo:
1374  case ARM::VLD1d32TPseudo:
1375  case ARM::VLD1d64TPseudo:
1376  case ARM::VLD1d8QPseudo:
1377  case ARM::VLD1d16QPseudo:
1378  case ARM::VLD1d32QPseudo:
1379  case ARM::VLD1d64QPseudo:
1380  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1381  FrameIndex = MI.getOperand(1).getIndex();
1382  return MI.getOperand(0).getReg();
1383  }
1384  break;
1385  case ARM::VLDMQIA:
1386  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1387  FrameIndex = MI.getOperand(1).getIndex();
1388  return MI.getOperand(0).getReg();
1389  }
1390  break;
1391  }
1392 
1393  return 0;
1394 }
1395 
1397  int &FrameIndex) const {
1399  if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses)) {
1400  FrameIndex =
1401  cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1402  ->getFrameIndex();
1403  return true;
1404  }
1405  return false;
1406 }
1407 
1408 /// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
1409 /// depending on whether the result is used.
1410 void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
1411  bool isThumb1 = Subtarget.isThumb1Only();
1412  bool isThumb2 = Subtarget.isThumb2();
1413  const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
1414 
1415  DebugLoc dl = MI->getDebugLoc();
1416  MachineBasicBlock *BB = MI->getParent();
1417 
1418  MachineInstrBuilder LDM, STM;
1419  if (isThumb1 || !MI->getOperand(1).isDead()) {
1420  MachineOperand LDWb(MI->getOperand(1));
1421  LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
1422  : isThumb1 ? ARM::tLDMIA_UPD
1423  : ARM::LDMIA_UPD))
1424  .add(LDWb);
1425  } else {
1426  LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
1427  }
1428 
1429  if (isThumb1 || !MI->getOperand(0).isDead()) {
1430  MachineOperand STWb(MI->getOperand(0));
1431  STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
1432  : isThumb1 ? ARM::tSTMIA_UPD
1433  : ARM::STMIA_UPD))
1434  .add(STWb);
1435  } else {
1436  STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
1437  }
1438 
1439  MachineOperand LDBase(MI->getOperand(3));
1440  LDM.add(LDBase).add(predOps(ARMCC::AL));
1441 
1442  MachineOperand STBase(MI->getOperand(2));
1443  STM.add(STBase).add(predOps(ARMCC::AL));
1444 
1445  // Sort the scratch registers into ascending order.
1447  SmallVector<unsigned, 6> ScratchRegs;
1448  for(unsigned I = 5; I < MI->getNumOperands(); ++I)
1449  ScratchRegs.push_back(MI->getOperand(I).getReg());
1450  llvm::sort(ScratchRegs,
1451  [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {
1452  return TRI.getEncodingValue(Reg1) <
1453  TRI.getEncodingValue(Reg2);
1454  });
1455 
1456  for (const auto &Reg : ScratchRegs) {
1457  LDM.addReg(Reg, RegState::Define);
1458  STM.addReg(Reg, RegState::Kill);
1459  }
1460 
1461  BB->erase(MI);
1462 }
1463 
1465  if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
1466  assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() &&
1467  "LOAD_STACK_GUARD currently supported only for MachO.");
1468  expandLoadStackGuard(MI);
1469  MI.getParent()->erase(MI);
1470  return true;
1471  }
1472 
1473  if (MI.getOpcode() == ARM::MEMCPY) {
1474  expandMEMCPY(MI);
1475  return true;
1476  }
1477 
1478  // This hook gets to expand COPY instructions before they become
1479  // copyPhysReg() calls. Look for VMOVS instructions that can legally be
1480  // widened to VMOVD. We prefer the VMOVD when possible because it may be
1481  // changed into a VORR that can go down the NEON pipeline.
1482  if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || Subtarget.isFPOnlySP())
1483  return false;
1484 
1485  // Look for a copy between even S-registers. That is where we keep floats
1486  // when using NEON v2f32 instructions for f32 arithmetic.
1487  unsigned DstRegS = MI.getOperand(0).getReg();
1488  unsigned SrcRegS = MI.getOperand(1).getReg();
1489  if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
1490  return false;
1491 
1493  unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
1494  &ARM::DPRRegClass);
1495  unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
1496  &ARM::DPRRegClass);
1497  if (!DstRegD || !SrcRegD)
1498  return false;
1499 
1500  // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
1501  // legal if the COPY already defines the full DstRegD, and it isn't a
1502  // sub-register insertion.
1503  if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
1504  return false;
1505 
1506  // A dead copy shouldn't show up here, but reject it just in case.
1507  if (MI.getOperand(0).isDead())
1508  return false;
1509 
1510  // All clear, widen the COPY.
1511  LLVM_DEBUG(dbgs() << "widening: " << MI);
1512  MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
1513 
1514  // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg
1515  // or some other super-register.
1516  int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD);
1517  if (ImpDefIdx != -1)
1518  MI.RemoveOperand(ImpDefIdx);
1519 
1520  // Change the opcode and operands.
1521  MI.setDesc(get(ARM::VMOVD));
1522  MI.getOperand(0).setReg(DstRegD);
1523  MI.getOperand(1).setReg(SrcRegD);
1524  MIB.add(predOps(ARMCC::AL));
1525 
1526  // We are now reading SrcRegD instead of SrcRegS. This may upset the
1527  // register scavenger and machine verifier, so we need to indicate that we
1528  // are reading an undefined value from SrcRegD, but a proper value from
1529  // SrcRegS.
1530  MI.getOperand(1).setIsUndef();
1531  MIB.addReg(SrcRegS, RegState::Implicit);
1532 
1533  // SrcRegD may actually contain an unrelated value in the ssub_1
1534  // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
1535  if (MI.getOperand(1).isKill()) {
1536  MI.getOperand(1).setIsKill(false);
1537  MI.addRegisterKilled(SrcRegS, TRI, true);
1538  }
1539 
1540  LLVM_DEBUG(dbgs() << "replaced by: " << MI);
1541  return true;
1542 }
1543 
1544 /// Create a copy of a const pool value. Update CPI to the new index and return
1545 /// the label UID.
1546 static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1549 
1550  const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1551  assert(MCPE.isMachineConstantPoolEntry() &&
1552  "Expecting a machine constantpool entry!");
1553  ARMConstantPoolValue *ACPV =
1554  static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1555 
1556  unsigned PCLabelId = AFI->createPICLabelUId();
1557  ARMConstantPoolValue *NewCPV = nullptr;
1558 
1559  // FIXME: The below assumes PIC relocation model and that the function
1560  // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1561  // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1562  // instructions, so that's probably OK, but is PIC always correct when
1563  // we get here?
1564  if (ACPV->isGlobalValue())
1566  cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
1567  4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
1568  else if (ACPV->isExtSymbol())
1569  NewCPV = ARMConstantPoolSymbol::
1570  Create(MF.getFunction().getContext(),
1571  cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1572  else if (ACPV->isBlockAddress())
1573  NewCPV = ARMConstantPoolConstant::
1574  Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1576  else if (ACPV->isLSDA())
1577  NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId,
1578  ARMCP::CPLSDA, 4);
1579  else if (ACPV->isMachineBasicBlock())
1580  NewCPV = ARMConstantPoolMBB::
1581  Create(MF.getFunction().getContext(),
1582  cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1583  else
1584  llvm_unreachable("Unexpected ARM constantpool value type!!");
1585  CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
1586  return PCLabelId;
1587 }
1588 
1591  unsigned DestReg, unsigned SubIdx,
1592  const MachineInstr &Orig,
1593  const TargetRegisterInfo &TRI) const {
1594  unsigned Opcode = Orig.getOpcode();
1595  switch (Opcode) {
1596  default: {
1597  MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
1598  MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1599  MBB.insert(I, MI);
1600  break;
1601  }
1602  case ARM::tLDRpci_pic:
1603  case ARM::t2LDRpci_pic: {
1604  MachineFunction &MF = *MBB.getParent();
1605  unsigned CPI = Orig.getOperand(1).getIndex();
1606  unsigned PCLabelId = duplicateCPV(MF, CPI);
1607  BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
1608  .addConstantPoolIndex(CPI)
1609  .addImm(PCLabelId)
1610  .cloneMemRefs(Orig);
1611  break;
1612  }
1613  }
1614 }
1615 
1616 MachineInstr &
1618  MachineBasicBlock::iterator InsertBefore,
1619  const MachineInstr &Orig) const {
1620  MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
1622  for (;;) {
1623  switch (I->getOpcode()) {
1624  case ARM::tLDRpci_pic:
1625  case ARM::t2LDRpci_pic: {
1626  MachineFunction &MF = *MBB.getParent();
1627  unsigned CPI = I->getOperand(1).getIndex();
1628  unsigned PCLabelId = duplicateCPV(MF, CPI);
1629  I->getOperand(1).setIndex(CPI);
1630  I->getOperand(2).setImm(PCLabelId);
1631  break;
1632  }
1633  }
1634  if (!I->isBundledWithSucc())
1635  break;
1636  ++I;
1637  }
1638  return Cloned;
1639 }
1640 
1642  const MachineInstr &MI1,
1643  const MachineRegisterInfo *MRI) const {
1644  unsigned Opcode = MI0.getOpcode();
1645  if (Opcode == ARM::t2LDRpci ||
1646  Opcode == ARM::t2LDRpci_pic ||
1647  Opcode == ARM::tLDRpci ||
1648  Opcode == ARM::tLDRpci_pic ||
1649  Opcode == ARM::LDRLIT_ga_pcrel ||
1650  Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1651  Opcode == ARM::tLDRLIT_ga_pcrel ||
1652  Opcode == ARM::MOV_ga_pcrel ||
1653  Opcode == ARM::MOV_ga_pcrel_ldr ||
1654  Opcode == ARM::t2MOV_ga_pcrel) {
1655  if (MI1.getOpcode() != Opcode)
1656  return false;
1657  if (MI0.getNumOperands() != MI1.getNumOperands())
1658  return false;
1659 
1660  const MachineOperand &MO0 = MI0.getOperand(1);
1661  const MachineOperand &MO1 = MI1.getOperand(1);
1662  if (MO0.getOffset() != MO1.getOffset())
1663  return false;
1664 
1665  if (Opcode == ARM::LDRLIT_ga_pcrel ||
1666  Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1667  Opcode == ARM::tLDRLIT_ga_pcrel ||
1668  Opcode == ARM::MOV_ga_pcrel ||
1669  Opcode == ARM::MOV_ga_pcrel_ldr ||
1670  Opcode == ARM::t2MOV_ga_pcrel)
1671  // Ignore the PC labels.
1672  return MO0.getGlobal() == MO1.getGlobal();
1673 
1674  const MachineFunction *MF = MI0.getParent()->getParent();
1675  const MachineConstantPool *MCP = MF->getConstantPool();
1676  int CPI0 = MO0.getIndex();
1677  int CPI1 = MO1.getIndex();
1678  const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1679  const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1680  bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1681  bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1682  if (isARMCP0 && isARMCP1) {
1683  ARMConstantPoolValue *ACPV0 =
1684  static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1685  ARMConstantPoolValue *ACPV1 =
1686  static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1687  return ACPV0->hasSameValue(ACPV1);
1688  } else if (!isARMCP0 && !isARMCP1) {
1689  return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1690  }
1691  return false;
1692  } else if (Opcode == ARM::PICLDR) {
1693  if (MI1.getOpcode() != Opcode)
1694  return false;
1695  if (MI0.getNumOperands() != MI1.getNumOperands())
1696  return false;
1697 
1698  unsigned Addr0 = MI0.getOperand(1).getReg();
1699  unsigned Addr1 = MI1.getOperand(1).getReg();
1700  if (Addr0 != Addr1) {
1701  if (!MRI ||
1704  return false;
1705 
1706  // This assumes SSA form.
1707  MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1708  MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1709  // Check if the loaded value, e.g. a constantpool of a global address, are
1710  // the same.
1711  if (!produceSameValue(*Def0, *Def1, MRI))
1712  return false;
1713  }
1714 
1715  for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
1716  // %12 = PICLDR %11, 0, 14, %noreg
1717  const MachineOperand &MO0 = MI0.getOperand(i);
1718  const MachineOperand &MO1 = MI1.getOperand(i);
1719  if (!MO0.isIdenticalTo(MO1))
1720  return false;
1721  }
1722  return true;
1723  }
1724 
1726 }
1727 
1728 /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1729 /// determine if two loads are loading from the same base address. It should
1730 /// only return true if the base pointers are the same and the only differences
1731 /// between the two addresses is the offset. It also returns the offsets by
1732 /// reference.
1733 ///
1734 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1735 /// is permanently disabled.
1737  int64_t &Offset1,
1738  int64_t &Offset2) const {
1739  // Don't worry about Thumb: just ARM and Thumb2.
1740  if (Subtarget.isThumb1Only()) return false;
1741 
1742  if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1743  return false;
1744 
1745  switch (Load1->getMachineOpcode()) {
1746  default:
1747  return false;
1748  case ARM::LDRi12:
1749  case ARM::LDRBi12:
1750  case ARM::LDRD:
1751  case ARM::LDRH:
1752  case ARM::LDRSB:
1753  case ARM::LDRSH:
1754  case ARM::VLDRD:
1755  case ARM::VLDRS:
1756  case ARM::t2LDRi8:
1757  case ARM::t2LDRBi8:
1758  case ARM::t2LDRDi8:
1759  case ARM::t2LDRSHi8:
1760  case ARM::t2LDRi12:
1761  case ARM::t2LDRBi12:
1762  case ARM::t2LDRSHi12:
1763  break;
1764  }
1765 
1766  switch (Load2->getMachineOpcode()) {
1767  default:
1768  return false;
1769  case ARM::LDRi12:
1770  case ARM::LDRBi12:
1771  case ARM::LDRD:
1772  case ARM::LDRH:
1773  case ARM::LDRSB:
1774  case ARM::LDRSH:
1775  case ARM::VLDRD:
1776  case ARM::VLDRS:
1777  case ARM::t2LDRi8:
1778  case ARM::t2LDRBi8:
1779  case ARM::t2LDRSHi8:
1780  case ARM::t2LDRi12:
1781  case ARM::t2LDRBi12:
1782  case ARM::t2LDRSHi12:
1783  break;
1784  }
1785 
1786  // Check if base addresses and chain operands match.
1787  if (Load1->getOperand(0) != Load2->getOperand(0) ||
1788  Load1->getOperand(4) != Load2->getOperand(4))
1789  return false;
1790 
1791  // Index should be Reg0.
1792  if (Load1->getOperand(3) != Load2->getOperand(3))
1793  return false;
1794 
1795  // Determine the offsets.
1796  if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
1797  isa<ConstantSDNode>(Load2->getOperand(1))) {
1798  Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
1799  Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
1800  return true;
1801  }
1802 
1803  return false;
1804 }
1805 
1806 /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
1807 /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
1808 /// be scheduled togther. On some targets if two loads are loading from
1809 /// addresses in the same cache line, it's better if they are scheduled
1810 /// together. This function takes two integers that represent the load offsets
1811 /// from the common base address. It returns true if it decides it's desirable
1812 /// to schedule the two loads together. "NumLoads" is the number of loads that
1813 /// have already been scheduled after Load1.
1814 ///
1815 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1816 /// is permanently disabled.
1818  int64_t Offset1, int64_t Offset2,
1819  unsigned NumLoads) const {
1820  // Don't worry about Thumb: just ARM and Thumb2.
1821  if (Subtarget.isThumb1Only()) return false;
1822 
1823  assert(Offset2 > Offset1);
1824 
1825  if ((Offset2 - Offset1) / 8 > 64)
1826  return false;
1827 
1828  // Check if the machine opcodes are different. If they are different
1829  // then we consider them to not be of the same base address,
1830  // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
1831  // In this case, they are considered to be the same because they are different
1832  // encoding forms of the same basic instruction.
1833  if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
1834  !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
1835  Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
1836  (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
1837  Load2->getMachineOpcode() == ARM::t2LDRBi8)))
1838  return false; // FIXME: overly conservative?
1839 
1840  // Four loads in a row should be sufficient.
1841  if (NumLoads >= 3)
1842  return false;
1843 
1844  return true;
1845 }
1846 
1848  const MachineBasicBlock *MBB,
1849  const MachineFunction &MF) const {
1850  // Debug info is never a scheduling boundary. It's necessary to be explicit
1851  // due to the special treatment of IT instructions below, otherwise a
1852  // dbg_value followed by an IT will result in the IT instruction being
1853  // considered a scheduling hazard, which is wrong. It should be the actual
1854  // instruction preceding the dbg_value instruction(s), just like it is
1855  // when debug info is not present.
1856  if (MI.isDebugInstr())
1857  return false;
1858 
1859  // Terminators and labels can't be scheduled around.
1860  if (MI.isTerminator() || MI.isPosition())
1861  return true;
1862 
1863  // Treat the start of the IT block as a scheduling boundary, but schedule
1864  // t2IT along with all instructions following it.
1865  // FIXME: This is a big hammer. But the alternative is to add all potential
1866  // true and anti dependencies to IT block instructions as implicit operands
1867  // to the t2IT instruction. The added compile time and complexity does not
1868  // seem worth it.
1870  // Make sure to skip any debug instructions
1871  while (++I != MBB->end() && I->isDebugInstr())
1872  ;
1873  if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
1874  return true;
1875 
1876  // Don't attempt to schedule around any instruction that defines
1877  // a stack-oriented pointer, as it's unlikely to be profitable. This
1878  // saves compile time, because it doesn't require every single
1879  // stack slot reference to depend on the instruction that does the
1880  // modification.
1881  // Calls don't actually change the stack pointer, even if they have imp-defs.
1882  // No ARM calling conventions change the stack pointer. (X86 calling
1883  // conventions sometimes do).
1884  if (!MI.isCall() && MI.definesRegister(ARM::SP))
1885  return true;
1886 
1887  return false;
1888 }
1889 
1890 bool ARMBaseInstrInfo::
1892  unsigned NumCycles, unsigned ExtraPredCycles,
1893  BranchProbability Probability) const {
1894  if (!NumCycles)
1895  return false;
1896 
1897  // If we are optimizing for size, see if the branch in the predecessor can be
1898  // lowered to cbn?z by the constant island lowering pass, and return false if
1899  // so. This results in a shorter instruction sequence.
1900  if (MBB.getParent()->getFunction().optForSize()) {
1901  MachineBasicBlock *Pred = *MBB.pred_begin();
1902  if (!Pred->empty()) {
1903  MachineInstr *LastMI = &*Pred->rbegin();
1904  if (LastMI->getOpcode() == ARM::t2Bcc) {
1905  MachineBasicBlock::iterator CmpMI = LastMI;
1906  if (CmpMI != Pred->begin()) {
1907  --CmpMI;
1908  if (CmpMI->getOpcode() == ARM::tCMPi8 ||
1909  CmpMI->getOpcode() == ARM::t2CMPri) {
1910  unsigned Reg = CmpMI->getOperand(0).getReg();
1911  unsigned PredReg = 0;
1912  ARMCC::CondCodes P = getInstrPredicate(*CmpMI, PredReg);
1913  if (P == ARMCC::AL && CmpMI->getOperand(1).getImm() == 0 &&
1914  isARMLowRegister(Reg))
1915  return false;
1916  }
1917  }
1918  }
1919  }
1920  }
1921  return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
1922  MBB, 0, 0, Probability);
1923 }
1924 
1925 bool ARMBaseInstrInfo::
1927  unsigned TCycles, unsigned TExtra,
1928  MachineBasicBlock &FBB,
1929  unsigned FCycles, unsigned FExtra,
1930  BranchProbability Probability) const {
1931  if (!TCycles)
1932  return false;
1933 
1934  // Attempt to estimate the relative costs of predication versus branching.
1935  // Here we scale up each component of UnpredCost to avoid precision issue when
1936  // scaling TCycles/FCycles by Probability.
1937  const unsigned ScalingUpFactor = 1024;
1938 
1939  unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
1940  unsigned UnpredCost;
1941  if (!Subtarget.hasBranchPredictor()) {
1942  // When we don't have a branch predictor it's always cheaper to not take a
1943  // branch than take it, so we have to take that into account.
1944  unsigned NotTakenBranchCost = 1;
1945  unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
1946  unsigned TUnpredCycles, FUnpredCycles;
1947  if (!FCycles) {
1948  // Triangle: TBB is the fallthrough
1949  TUnpredCycles = TCycles + NotTakenBranchCost;
1950  FUnpredCycles = TakenBranchCost;
1951  } else {
1952  // Diamond: TBB is the block that is branched to, FBB is the fallthrough
1953  TUnpredCycles = TCycles + TakenBranchCost;
1954  FUnpredCycles = FCycles + NotTakenBranchCost;
1955  // The branch at the end of FBB will disappear when it's predicated, so
1956  // discount it from PredCost.
1957  PredCost -= 1 * ScalingUpFactor;
1958  }
1959  // The total cost is the cost of each path scaled by their probabilites
1960  unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
1961  unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
1962  UnpredCost = TUnpredCost + FUnpredCost;
1963  // When predicating assume that the first IT can be folded away but later
1964  // ones cost one cycle each
1965  if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
1966  PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
1967  }
1968  } else {
1969  unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
1970  unsigned FUnpredCost =
1971  Probability.getCompl().scale(FCycles * ScalingUpFactor);
1972  UnpredCost = TUnpredCost + FUnpredCost;
1973  UnpredCost += 1 * ScalingUpFactor; // The branch itself
1974  UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
1975  }
1976 
1977  return PredCost <= UnpredCost;
1978 }
1979 
1980 bool
1982  MachineBasicBlock &FMBB) const {
1983  // Reduce false anti-dependencies to let the target's out-of-order execution
1984  // engine do its thing.
1985  return Subtarget.isProfitableToUnpredicate();
1986 }
1987 
1988 /// getInstrPredicate - If instruction is predicated, returns its predicate
1989 /// condition, otherwise returns AL. It also returns the condition code
1990 /// register by reference.
1992  unsigned &PredReg) {
1993  int PIdx = MI.findFirstPredOperandIdx();
1994  if (PIdx == -1) {
1995  PredReg = 0;
1996  return ARMCC::AL;
1997  }
1998 
1999  PredReg = MI.getOperand(PIdx+1).getReg();
2000  return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
2001 }
2002 
2003 unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {
2004  if (Opc == ARM::B)
2005  return ARM::Bcc;
2006  if (Opc == ARM::tB)
2007  return ARM::tBcc;
2008  if (Opc == ARM::t2B)
2009  return ARM::t2Bcc;
2010 
2011  llvm_unreachable("Unknown unconditional branch opcode!");
2012 }
2013 
2015  bool NewMI,
2016  unsigned OpIdx1,
2017  unsigned OpIdx2) const {
2018  switch (MI.getOpcode()) {
2019  case ARM::MOVCCr:
2020  case ARM::t2MOVCCr: {
2021  // MOVCC can be commuted by inverting the condition.
2022  unsigned PredReg = 0;
2023  ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
2024  // MOVCC AL can't be inverted. Shouldn't happen.
2025  if (CC == ARMCC::AL || PredReg != ARM::CPSR)
2026  return nullptr;
2027  MachineInstr *CommutedMI =
2028  TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2029  if (!CommutedMI)
2030  return nullptr;
2031  // After swapping the MOVCC operands, also invert the condition.
2032  CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
2033  .setImm(ARMCC::getOppositeCondition(CC));
2034  return CommutedMI;
2035  }
2036  }
2037  return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2038 }
2039 
2040 /// Identify instructions that can be folded into a MOVCC instruction, and
2041 /// return the defining instruction.
2043  const MachineRegisterInfo &MRI,
2044  const TargetInstrInfo *TII) {
2046  return nullptr;
2047  if (!MRI.hasOneNonDBGUse(Reg))
2048  return nullptr;
2049  MachineInstr *MI = MRI.getVRegDef(Reg);
2050  if (!MI)
2051  return nullptr;
2052  // MI is folded into the MOVCC by predicating it.
2053  if (!MI->isPredicable())
2054  return nullptr;
2055  // Check if MI has any non-dead defs or physreg uses. This also detects
2056  // predicated instructions which will be reading CPSR.
2057  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
2058  const MachineOperand &MO = MI->getOperand(i);
2059  // Reject frame index operands, PEI can't handle the predicated pseudos.
2060  if (MO.isFI() || MO.isCPI() || MO.isJTI())
2061  return nullptr;
2062  if (!MO.isReg())
2063  continue;
2064  // MI can't have any tied operands, that would conflict with predication.
2065  if (MO.isTied())
2066  return nullptr;
2068  return nullptr;
2069  if (MO.isDef() && !MO.isDead())
2070  return nullptr;
2071  }
2072  bool DontMoveAcrossStores = true;
2073  if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
2074  return nullptr;
2075  return MI;
2076 }
2077 
2080  unsigned &TrueOp, unsigned &FalseOp,
2081  bool &Optimizable) const {
2082  assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2083  "Unknown select instruction");
2084  // MOVCC operands:
2085  // 0: Def.
2086  // 1: True use.
2087  // 2: False use.
2088  // 3: Condition code.
2089  // 4: CPSR use.
2090  TrueOp = 1;
2091  FalseOp = 2;
2092  Cond.push_back(MI.getOperand(3));
2093  Cond.push_back(MI.getOperand(4));
2094  // We can always fold a def.
2095  Optimizable = true;
2096  return false;
2097 }
2098 
2099 MachineInstr *
2102  bool PreferFalse) const {
2103  assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2104  "Unknown select instruction");
2107  bool Invert = !DefMI;
2108  if (!DefMI)
2109  DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
2110  if (!DefMI)
2111  return nullptr;
2112 
2113  // Find new register class to use.
2114  MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
2115  unsigned DestReg = MI.getOperand(0).getReg();
2116  const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
2117  if (!MRI.constrainRegClass(DestReg, PreviousClass))
2118  return nullptr;
2119 
2120  // Create a new predicated version of DefMI.
2121  // Rfalse is the first use.
2122  MachineInstrBuilder NewMI =
2123  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
2124 
2125  // Copy all the DefMI operands, excluding its (null) predicate.
2126  const MCInstrDesc &DefDesc = DefMI->getDesc();
2127  for (unsigned i = 1, e = DefDesc.getNumOperands();
2128  i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
2129  NewMI.add(DefMI->getOperand(i));
2130 
2131  unsigned CondCode = MI.getOperand(3).getImm();
2132  if (Invert)
2134  else
2135  NewMI.addImm(CondCode);
2136  NewMI.add(MI.getOperand(4));
2137 
2138  // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
2139  if (NewMI->hasOptionalDef())
2140  NewMI.add(condCodeOp());
2141 
2142  // The output register value when the predicate is false is an implicit
2143  // register operand tied to the first def.
2144  // The tie makes the register allocator ensure the FalseReg is allocated the
2145  // same register as operand 0.
2146  FalseReg.setImplicit();
2147  NewMI.add(FalseReg);
2148  NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
2149 
2150  // Update SeenMIs set: register newly created MI and erase removed DefMI.
2151  SeenMIs.insert(NewMI);
2152  SeenMIs.erase(DefMI);
2153 
2154  // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
2155  // DefMI would be invalid when tranferred inside the loop. Checking for a
2156  // loop is expensive, but at least remove kill flags if they are in different
2157  // BBs.
2158  if (DefMI->getParent() != MI.getParent())
2159  NewMI->clearKillInfo();
2160 
2161  // The caller will erase MI, but not DefMI.
2162  DefMI->eraseFromParent();
2163  return NewMI;
2164 }
2165 
2166 /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
2167 /// instruction is encoded with an 'S' bit is determined by the optional CPSR
2168 /// def operand.
2169 ///
2170 /// This will go away once we can teach tblgen how to set the optional CPSR def
2171 /// operand itself.
2173  uint16_t PseudoOpc;
2174  uint16_t MachineOpc;
2175 };
2176 
2178  {ARM::ADDSri, ARM::ADDri},
2179  {ARM::ADDSrr, ARM::ADDrr},
2180  {ARM::ADDSrsi, ARM::ADDrsi},
2181  {ARM::ADDSrsr, ARM::ADDrsr},
2182 
2183  {ARM::SUBSri, ARM::SUBri},
2184  {ARM::SUBSrr, ARM::SUBrr},
2185  {ARM::SUBSrsi, ARM::SUBrsi},
2186  {ARM::SUBSrsr, ARM::SUBrsr},
2187 
2188  {ARM::RSBSri, ARM::RSBri},
2189  {ARM::RSBSrsi, ARM::RSBrsi},
2190  {ARM::RSBSrsr, ARM::RSBrsr},
2191 
2192  {ARM::tADDSi3, ARM::tADDi3},
2193  {ARM::tADDSi8, ARM::tADDi8},
2194  {ARM::tADDSrr, ARM::tADDrr},
2195  {ARM::tADCS, ARM::tADC},
2196 
2197  {ARM::tSUBSi3, ARM::tSUBi3},
2198  {ARM::tSUBSi8, ARM::tSUBi8},
2199  {ARM::tSUBSrr, ARM::tSUBrr},
2200  {ARM::tSBCS, ARM::tSBC},
2201  {ARM::tRSBS, ARM::tRSB},
2202 
2203  {ARM::t2ADDSri, ARM::t2ADDri},
2204  {ARM::t2ADDSrr, ARM::t2ADDrr},
2205  {ARM::t2ADDSrs, ARM::t2ADDrs},
2206 
2207  {ARM::t2SUBSri, ARM::t2SUBri},
2208  {ARM::t2SUBSrr, ARM::t2SUBrr},
2209  {ARM::t2SUBSrs, ARM::t2SUBrs},
2210 
2211  {ARM::t2RSBSri, ARM::t2RSBri},
2212  {ARM::t2RSBSrs, ARM::t2RSBrs},
2213 };
2214 
2215 unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
2216  for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i)
2217  if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc)
2218  return AddSubFlagsOpcodeMap[i].MachineOpc;
2219  return 0;
2220 }
2221 
2224  const DebugLoc &dl, unsigned DestReg,
2225  unsigned BaseReg, int NumBytes,
2226  ARMCC::CondCodes Pred, unsigned PredReg,
2227  const ARMBaseInstrInfo &TII,
2228  unsigned MIFlags) {
2229  if (NumBytes == 0 && DestReg != BaseReg) {
2230  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
2231  .addReg(BaseReg, RegState::Kill)
2232  .add(predOps(Pred, PredReg))
2233  .add(condCodeOp())
2234  .setMIFlags(MIFlags);
2235  return;
2236  }
2237 
2238  bool isSub = NumBytes < 0;
2239  if (isSub) NumBytes = -NumBytes;
2240 
2241  while (NumBytes) {
2242  unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
2243  unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
2244  assert(ThisVal && "Didn't extract field correctly");
2245 
2246  // We will handle these bits from offset, clear them.
2247  NumBytes &= ~ThisVal;
2248 
2249  assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
2250 
2251  // Build the new ADD / SUB.
2252  unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
2253  BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
2254  .addReg(BaseReg, RegState::Kill)
2255  .addImm(ThisVal)
2256  .add(predOps(Pred, PredReg))
2257  .add(condCodeOp())
2258  .setMIFlags(MIFlags);
2259  BaseReg = DestReg;
2260  }
2261 }
2262 
2264  MachineFunction &MF, MachineInstr *MI,
2265  unsigned NumBytes) {
2266  // This optimisation potentially adds lots of load and store
2267  // micro-operations, it's only really a great benefit to code-size.
2268  if (!Subtarget.optForMinSize())
2269  return false;
2270 
2271  // If only one register is pushed/popped, LLVM can use an LDR/STR
2272  // instead. We can't modify those so make sure we're dealing with an
2273  // instruction we understand.
2274  bool IsPop = isPopOpcode(MI->getOpcode());
2275  bool IsPush = isPushOpcode(MI->getOpcode());
2276  if (!IsPush && !IsPop)
2277  return false;
2278 
2279  bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
2280  MI->getOpcode() == ARM::VLDMDIA_UPD;
2281  bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
2282  MI->getOpcode() == ARM::tPOP ||
2283  MI->getOpcode() == ARM::tPOP_RET;
2284 
2285  assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
2286  MI->getOperand(1).getReg() == ARM::SP)) &&
2287  "trying to fold sp update into non-sp-updating push/pop");
2288 
2289  // The VFP push & pop act on D-registers, so we can only fold an adjustment
2290  // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
2291  // if this is violated.
2292  if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
2293  return false;
2294 
2295  // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
2296  // pred) so the list starts at 4. Thumb1 starts after the predicate.
2297  int RegListIdx = IsT1PushPop ? 2 : 4;
2298 
2299  // Calculate the space we'll need in terms of registers.
2300  unsigned RegsNeeded;
2301  const TargetRegisterClass *RegClass;
2302  if (IsVFPPushPop) {
2303  RegsNeeded = NumBytes / 8;
2304  RegClass = &ARM::DPRRegClass;
2305  } else {
2306  RegsNeeded = NumBytes / 4;
2307  RegClass = &ARM::GPRRegClass;
2308  }
2309 
2310  // We're going to have to strip all list operands off before
2311  // re-adding them since the order matters, so save the existing ones
2312  // for later.
2314 
2315  // We're also going to need the first register transferred by this
2316  // instruction, which won't necessarily be the first register in the list.
2317  unsigned FirstRegEnc = -1;
2318 
2320  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
2321  MachineOperand &MO = MI->getOperand(i);
2322  RegList.push_back(MO);
2323 
2324  if (MO.isReg() && TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
2325  FirstRegEnc = TRI->getEncodingValue(MO.getReg());
2326  }
2327 
2328  const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
2329 
2330  // Now try to find enough space in the reglist to allocate NumBytes.
2331  for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
2332  --CurRegEnc) {
2333  unsigned CurReg = RegClass->getRegister(CurRegEnc);
2334  if (!IsPop) {
2335  // Pushing any register is completely harmless, mark the register involved
2336  // as undef since we don't care about its value and must not restore it
2337  // during stack unwinding.
2338  RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
2339  false, false, true));
2340  --RegsNeeded;
2341  continue;
2342  }
2343 
2344  // However, we can only pop an extra register if it's not live. For
2345  // registers live within the function we might clobber a return value
2346  // register; the other way a register can be live here is if it's
2347  // callee-saved.
2348  if (isCalleeSavedRegister(CurReg, CSRegs) ||
2349  MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
2351  // VFP pops don't allow holes in the register list, so any skip is fatal
2352  // for our transformation. GPR pops do, so we should just keep looking.
2353  if (IsVFPPushPop)
2354  return false;
2355  else
2356  continue;
2357  }
2358 
2359  // Mark the unimportant registers as <def,dead> in the POP.
2360  RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
2361  true));
2362  --RegsNeeded;
2363  }
2364 
2365  if (RegsNeeded > 0)
2366  return false;
2367 
2368  // Finally we know we can profitably perform the optimisation so go
2369  // ahead: strip all existing registers off and add them back again
2370  // in the right order.
2371  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
2372  MI->RemoveOperand(i);
2373 
2374  // Add the complete list back in.
2375  MachineInstrBuilder MIB(MF, &*MI);
2376  for (int i = RegList.size() - 1; i >= 0; --i)
2377  MIB.add(RegList[i]);
2378 
2379  return true;
2380 }
2381 
2382 bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2383  unsigned FrameReg, int &Offset,
2384  const ARMBaseInstrInfo &TII) {
2385  unsigned Opcode = MI.getOpcode();
2386  const MCInstrDesc &Desc = MI.getDesc();
2387  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2388  bool isSub = false;
2389 
2390  // Memory operands in inline assembly always use AddrMode2.
2391  if (Opcode == ARM::INLINEASM)
2392  AddrMode = ARMII::AddrMode2;
2393 
2394  if (Opcode == ARM::ADDri) {
2395  Offset += MI.getOperand(FrameRegIdx+1).getImm();
2396  if (Offset == 0) {
2397  // Turn it into a move.
2398  MI.setDesc(TII.get(ARM::MOVr));
2399  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2400  MI.RemoveOperand(FrameRegIdx+1);
2401  Offset = 0;
2402  return true;
2403  } else if (Offset < 0) {
2404  Offset = -Offset;
2405  isSub = true;
2406  MI.setDesc(TII.get(ARM::SUBri));
2407  }
2408 
2409  // Common case: small offset, fits into instruction.
2410  if (ARM_AM::getSOImmVal(Offset) != -1) {
2411  // Replace the FrameIndex with sp / fp
2412  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2413  MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
2414  Offset = 0;
2415  return true;
2416  }
2417 
2418  // Otherwise, pull as much of the immedidate into this ADDri/SUBri
2419  // as possible.
2420  unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
2421  unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
2422 
2423  // We will handle these bits from offset, clear them.
2424  Offset &= ~ThisImmVal;
2425 
2426  // Get the properly encoded SOImmVal field.
2427  assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2428  "Bit extraction didn't work?");
2429  MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
2430  } else {
2431  unsigned ImmIdx = 0;
2432  int InstrOffs = 0;
2433  unsigned NumBits = 0;
2434  unsigned Scale = 1;
2435  switch (AddrMode) {
2436  case ARMII::AddrMode_i12:
2437  ImmIdx = FrameRegIdx + 1;
2438  InstrOffs = MI.getOperand(ImmIdx).getImm();
2439  NumBits = 12;
2440  break;
2441  case ARMII::AddrMode2:
2442  ImmIdx = FrameRegIdx+2;
2443  InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
2444  if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2445  InstrOffs *= -1;
2446  NumBits = 12;
2447  break;
2448  case ARMII::AddrMode3:
2449  ImmIdx = FrameRegIdx+2;
2450  InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
2451  if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2452  InstrOffs *= -1;
2453  NumBits = 8;
2454  break;
2455  case ARMII::AddrMode4:
2456  case ARMII::AddrMode6:
2457  // Can't fold any offset even if it's zero.
2458  return false;
2459  case ARMII::AddrMode5:
2460  ImmIdx = FrameRegIdx+1;
2461  InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2462  if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2463  InstrOffs *= -1;
2464  NumBits = 8;
2465  Scale = 4;
2466  break;
2467  case ARMII::AddrMode5FP16:
2468  ImmIdx = FrameRegIdx+1;
2469  InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2470  if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2471  InstrOffs *= -1;
2472  NumBits = 8;
2473  Scale = 2;
2474  break;
2475  default:
2476  llvm_unreachable("Unsupported addressing mode!");
2477  }
2478 
2479  Offset += InstrOffs * Scale;
2480  assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2481  if (Offset < 0) {
2482  Offset = -Offset;
2483  isSub = true;
2484  }
2485 
2486  // Attempt to fold address comp. if opcode has offset bits
2487  if (NumBits > 0) {
2488  // Common case: small offset, fits into instruction.
2489  MachineOperand &ImmOp = MI.getOperand(ImmIdx);
2490  int ImmedOffset = Offset / Scale;
2491  unsigned Mask = (1 << NumBits) - 1;
2492  if ((unsigned)Offset <= Mask * Scale) {
2493  // Replace the FrameIndex with sp
2494  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2495  // FIXME: When addrmode2 goes away, this will simplify (like the
2496  // T2 version), as the LDR.i12 versions don't need the encoding
2497  // tricks for the offset value.
2498  if (isSub) {
2499  if (AddrMode == ARMII::AddrMode_i12)
2500  ImmedOffset = -ImmedOffset;
2501  else
2502  ImmedOffset |= 1 << NumBits;
2503  }
2504  ImmOp.ChangeToImmediate(ImmedOffset);
2505  Offset = 0;
2506  return true;
2507  }
2508 
2509  // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2510  ImmedOffset = ImmedOffset & Mask;
2511  if (isSub) {
2512  if (AddrMode == ARMII::AddrMode_i12)
2513  ImmedOffset = -ImmedOffset;
2514  else
2515  ImmedOffset |= 1 << NumBits;
2516  }
2517  ImmOp.ChangeToImmediate(ImmedOffset);
2518  Offset &= ~(Mask*Scale);
2519  }
2520  }
2521 
2522  Offset = (isSub) ? -Offset : Offset;
2523  return Offset == 0;
2524 }
2525 
2526 /// analyzeCompare - For a comparison instruction, return the source registers
2527 /// in SrcReg and SrcReg2 if having two register operands, and the value it
2528 /// compares against in CmpValue. Return true if the comparison instruction
2529 /// can be analyzed.
2530 bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
2531  unsigned &SrcReg2, int &CmpMask,
2532  int &CmpValue) const {
2533  switch (MI.getOpcode()) {
2534  default: break;
2535  case ARM::CMPri:
2536  case ARM::t2CMPri:
2537  case ARM::tCMPi8:
2538  SrcReg = MI.getOperand(0).getReg();
2539  SrcReg2 = 0;
2540  CmpMask = ~0;
2541  CmpValue = MI.getOperand(1).getImm();
2542  return true;
2543  case ARM::CMPrr:
2544  case ARM::t2CMPrr:
2545  case ARM::tCMPr:
2546  SrcReg = MI.getOperand(0).getReg();
2547  SrcReg2 = MI.getOperand(1).getReg();
2548  CmpMask = ~0;
2549  CmpValue = 0;
2550  return true;
2551  case ARM::TSTri:
2552  case ARM::t2TSTri:
2553  SrcReg = MI.getOperand(0).getReg();
2554  SrcReg2 = 0;
2555  CmpMask = MI.getOperand(1).getImm();
2556  CmpValue = 0;
2557  return true;
2558  }
2559 
2560  return false;
2561 }
2562 
2563 /// isSuitableForMask - Identify a suitable 'and' instruction that
2564 /// operates on the given source register and applies the same mask
2565 /// as a 'tst' instruction. Provide a limited look-through for copies.
2566 /// When successful, MI will hold the found instruction.
2567 static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
2568  int CmpMask, bool CommonUse) {
2569  switch (MI->getOpcode()) {
2570  case ARM::ANDri:
2571  case ARM::t2ANDri:
2572  if (CmpMask != MI->getOperand(2).getImm())
2573  return false;
2574  if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
2575  return true;
2576  break;
2577  }
2578 
2579  return false;
2580 }
2581 
2582 /// getSwappedCondition - assume the flags are set by MI(a,b), return
2583 /// the condition code if we modify the instructions such that flags are
2584 /// set by MI(b,a).
2586  switch (CC) {
2587  default: return ARMCC::AL;
2588  case ARMCC::EQ: return ARMCC::EQ;
2589  case ARMCC::NE: return ARMCC::NE;
2590  case ARMCC::HS: return ARMCC::LS;
2591  case ARMCC::LO: return ARMCC::HI;
2592  case ARMCC::HI: return ARMCC::LO;
2593  case ARMCC::LS: return ARMCC::HS;
2594  case ARMCC::GE: return ARMCC::LE;
2595  case ARMCC::LT: return ARMCC::GT;
2596  case ARMCC::GT: return ARMCC::LT;
2597  case ARMCC::LE: return ARMCC::GE;
2598  }
2599 }
2600 
2601 /// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
2602 /// the condition code if we modify the instructions such that flags are
2603 /// set by ADD(a,b,X).
2605  switch (CC) {
2606  default: return ARMCC::AL;
2607  case ARMCC::HS: return ARMCC::LO;
2608  case ARMCC::LO: return ARMCC::HS;
2609  case ARMCC::VS: return ARMCC::VS;
2610  case ARMCC::VC: return ARMCC::VC;
2611  }
2612 }
2613 
2614 /// isRedundantFlagInstr - check whether the first instruction, whose only
2615 /// purpose is to update flags, can be made redundant.
2616 /// CMPrr can be made redundant by SUBrr if the operands are the same.
2617 /// CMPri can be made redundant by SUBri if the operands are the same.
2618 /// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
2619 /// This function can be extended later on.
2620 inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
2621  unsigned SrcReg, unsigned SrcReg2,
2622  int ImmValue, const MachineInstr *OI,
2623  bool &IsThumb1) {
2624  if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2625  (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) &&
2626  ((OI->getOperand(1).getReg() == SrcReg &&
2627  OI->getOperand(2).getReg() == SrcReg2) ||
2628  (OI->getOperand(1).getReg() == SrcReg2 &&
2629  OI->getOperand(2).getReg() == SrcReg))) {
2630  IsThumb1 = false;
2631  return true;
2632  }
2633 
2634  if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr &&
2635  ((OI->getOperand(2).getReg() == SrcReg &&
2636  OI->getOperand(3).getReg() == SrcReg2) ||
2637  (OI->getOperand(2).getReg() == SrcReg2 &&
2638  OI->getOperand(3).getReg() == SrcReg))) {
2639  IsThumb1 = true;
2640  return true;
2641  }
2642 
2643  if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) &&
2644  (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) &&
2645  OI->getOperand(1).getReg() == SrcReg &&
2646  OI->getOperand(2).getImm() == ImmValue) {
2647  IsThumb1 = false;
2648  return true;
2649  }
2650 
2651  if (CmpI->getOpcode() == ARM::tCMPi8 &&
2652  (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) &&
2653  OI->getOperand(2).getReg() == SrcReg &&
2654  OI->getOperand(3).getImm() == ImmValue) {
2655  IsThumb1 = true;
2656  return true;
2657  }
2658 
2659  if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2660  (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
2661  OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
2662  OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
2663  OI->getOperand(0).getReg() == SrcReg &&
2664  OI->getOperand(1).getReg() == SrcReg2) {
2665  IsThumb1 = false;
2666  return true;
2667  }
2668 
2669  if (CmpI->getOpcode() == ARM::tCMPr &&
2670  (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 ||
2671  OI->getOpcode() == ARM::tADDrr) &&
2672  OI->getOperand(0).getReg() == SrcReg &&
2673  OI->getOperand(2).getReg() == SrcReg2) {
2674  IsThumb1 = true;
2675  return true;
2676  }
2677 
2678  return false;
2679 }
2680 
2681 static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
2682  switch (MI->getOpcode()) {
2683  default: return false;
2684  case ARM::tLSLri:
2685  case ARM::tLSRri:
2686  case ARM::tLSLrr:
2687  case ARM::tLSRrr:
2688  case ARM::tSUBrr:
2689  case ARM::tADDrr:
2690  case ARM::tADDi3:
2691  case ARM::tADDi8:
2692  case ARM::tSUBi3:
2693  case ARM::tSUBi8:
2694  case ARM::tMUL:
2695  case ARM::tADC:
2696  case ARM::tSBC:
2697  case ARM::tRSB:
2698  case ARM::tAND:
2699  case ARM::tORR:
2700  case ARM::tEOR:
2701  case ARM::tBIC:
2702  case ARM::tMVN:
2703  case ARM::tASRri:
2704  case ARM::tASRrr:
2705  case ARM::tROR:
2706  IsThumb1 = true;
2708  case ARM::RSBrr:
2709  case ARM::RSBri:
2710  case ARM::RSCrr:
2711  case ARM::RSCri:
2712  case ARM::ADDrr:
2713  case ARM::ADDri:
2714  case ARM::ADCrr:
2715  case ARM::ADCri:
2716  case ARM::SUBrr:
2717  case ARM::SUBri:
2718  case ARM::SBCrr:
2719  case ARM::SBCri:
2720  case ARM::t2RSBri:
2721  case ARM::t2ADDrr:
2722  case ARM::t2ADDri:
2723  case ARM::t2ADCrr:
2724  case ARM::t2ADCri:
2725  case ARM::t2SUBrr:
2726  case ARM::t2SUBri:
2727  case ARM::t2SBCrr:
2728  case ARM::t2SBCri:
2729  case ARM::ANDrr:
2730  case ARM::ANDri:
2731  case ARM::t2ANDrr:
2732  case ARM::t2ANDri:
2733  case ARM::ORRrr:
2734  case ARM::ORRri:
2735  case ARM::t2ORRrr:
2736  case ARM::t2ORRri:
2737  case ARM::EORrr:
2738  case ARM::EORri:
2739  case ARM::t2EORrr:
2740  case ARM::t2EORri:
2741  case ARM::t2LSRri:
2742  case ARM::t2LSRrr:
2743  case ARM::t2LSLri:
2744  case ARM::t2LSLrr:
2745  return true;
2746  }
2747 }
2748 
2749 /// optimizeCompareInstr - Convert the instruction supplying the argument to the
2750 /// comparison into one that sets the zero bit in the flags register;
2751 /// Remove a redundant Compare instruction if an earlier instruction can set the
2752 /// flags in the same way as Compare.
2753 /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
2754 /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
2755 /// condition code of instructions which use the flags.
2757  MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
2758  int CmpValue, const MachineRegisterInfo *MRI) const {
2759  // Get the unique definition of SrcReg.
2760  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2761  if (!MI) return false;
2762 
2763  // Masked compares sometimes use the same register as the corresponding 'and'.
2764  if (CmpMask != ~0) {
2765  if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) {
2766  MI = nullptr;
2768  UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
2769  UI != UE; ++UI) {
2770  if (UI->getParent() != CmpInstr.getParent())
2771  continue;
2772  MachineInstr *PotentialAND = &*UI;
2773  if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
2774  isPredicated(*PotentialAND))
2775  continue;
2776  MI = PotentialAND;
2777  break;
2778  }
2779  if (!MI) return false;
2780  }
2781  }
2782 
2783  // Get ready to iterate backward from CmpInstr.
2784  MachineBasicBlock::iterator I = CmpInstr, E = MI,
2785  B = CmpInstr.getParent()->begin();
2786 
2787  // Early exit if CmpInstr is at the beginning of the BB.
2788  if (I == B) return false;
2789 
2790  // There are two possible candidates which can be changed to set CPSR:
2791  // One is MI, the other is a SUB or ADD instruction.
2792  // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
2793  // ADDr[ri](r1, r2, X).
2794  // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
2795  MachineInstr *SubAdd = nullptr;
2796  if (SrcReg2 != 0)
2797  // MI is not a candidate for CMPrr.
2798  MI = nullptr;
2799  else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
2800  // Conservatively refuse to convert an instruction which isn't in the same
2801  // BB as the comparison.
2802  // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
2803  // Thus we cannot return here.
2804  if (CmpInstr.getOpcode() == ARM::CMPri ||
2805  CmpInstr.getOpcode() == ARM::t2CMPri ||
2806  CmpInstr.getOpcode() == ARM::tCMPi8)
2807  MI = nullptr;
2808  else
2809  return false;
2810  }
2811 
2812  bool IsThumb1 = false;
2813  if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
2814  return false;
2815 
2816  // We also want to do this peephole for cases like this: if (a*b == 0),
2817  // and optimise away the CMP instruction from the generated code sequence:
2818  // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
2819  // resulting from the select instruction, but these MOVS instructions for
2820  // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
2821  // However, if we only have MOVS instructions in between the CMP and the
2822  // other instruction (the MULS in this example), then the CPSR is dead so we
2823  // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
2824  // reordering and then continue the analysis hoping we can eliminate the
2825  // CMP. This peephole works on the vregs, so is still in SSA form. As a
2826  // consequence, the movs won't redefine/kill the MUL operands which would
2827  // make this reordering illegal.
2829  if (MI && IsThumb1) {
2830  --I;
2831  if (I != E && !MI->readsRegister(ARM::CPSR, TRI)) {
2832  bool CanReorder = true;
2833  for (; I != E; --I) {
2834  if (I->getOpcode() != ARM::tMOVi8) {
2835  CanReorder = false;
2836  break;
2837  }
2838  }
2839  if (CanReorder) {
2840  MI = MI->removeFromParent();
2841  E = CmpInstr;
2842  CmpInstr.getParent()->insert(E, MI);
2843  }
2844  }
2845  I = CmpInstr;
2846  E = MI;
2847  }
2848 
2849  // Check that CPSR isn't set between the comparison instruction and the one we
2850  // want to change. At the same time, search for SubAdd.
2851  bool SubAddIsThumb1 = false;
2852  do {
2853  const MachineInstr &Instr = *--I;
2854 
2855  // Check whether CmpInstr can be made redundant by the current instruction.
2856  if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr,
2857  SubAddIsThumb1)) {
2858  SubAdd = &*I;
2859  break;
2860  }
2861 
2862  // Allow E (which was initially MI) to be SubAdd but do not search before E.
2863  if (I == E)
2864  break;
2865 
2866  if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
2867  Instr.readsRegister(ARM::CPSR, TRI))
2868  // This instruction modifies or uses CPSR after the one we want to
2869  // change. We can't do this transformation.
2870  return false;
2871 
2872  } while (I != B);
2873 
2874  // Return false if no candidates exist.
2875  if (!MI && !SubAdd)
2876  return false;
2877 
2878  // If we found a SubAdd, use it as it will be closer to the CMP
2879  if (SubAdd) {
2880  MI = SubAdd;
2881  IsThumb1 = SubAddIsThumb1;
2882  }
2883 
2884  // We can't use a predicated instruction - it doesn't always write the flags.
2885  if (isPredicated(*MI))
2886  return false;
2887 
2888  // Scan forward for the use of CPSR
2889  // When checking against MI: if it's a conditional code that requires
2890  // checking of the V bit or C bit, then this is not safe to do.
2891  // It is safe to remove CmpInstr if CPSR is redefined or killed.
2892  // If we are done with the basic block, we need to check whether CPSR is
2893  // live-out.
2895  OperandsToUpdate;
2896  bool isSafe = false;
2897  I = CmpInstr;
2898  E = CmpInstr.getParent()->end();
2899  while (!isSafe && ++I != E) {
2900  const MachineInstr &Instr = *I;
2901  for (unsigned IO = 0, EO = Instr.getNumOperands();
2902  !isSafe && IO != EO; ++IO) {
2903  const MachineOperand &MO = Instr.getOperand(IO);
2904  if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
2905  isSafe = true;
2906  break;
2907  }
2908  if (!MO.isReg() || MO.getReg() != ARM::CPSR)
2909  continue;
2910  if (MO.isDef()) {
2911  isSafe = true;
2912  break;
2913  }
2914  // Condition code is after the operand before CPSR except for VSELs.
2915  ARMCC::CondCodes CC;
2916  bool IsInstrVSel = true;
2917  switch (Instr.getOpcode()) {
2918  default:
2919  IsInstrVSel = false;
2920  CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
2921  break;
2922  case ARM::VSELEQD:
2923  case ARM::VSELEQS:
2924  CC = ARMCC::EQ;
2925  break;
2926  case ARM::VSELGTD:
2927  case ARM::VSELGTS:
2928  CC = ARMCC::GT;
2929  break;
2930  case ARM::VSELGED:
2931  case ARM::VSELGES:
2932  CC = ARMCC::GE;
2933  break;
2934  case ARM::VSELVSS:
2935  case ARM::VSELVSD:
2936  CC = ARMCC::VS;
2937  break;
2938  }
2939 
2940  if (SubAdd) {
2941  // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
2942  // on CMP needs to be updated to be based on SUB.
2943  // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
2944  // needs to be modified.
2945  // Push the condition code operands to OperandsToUpdate.
2946  // If it is safe to remove CmpInstr, the condition code of these
2947  // operands will be modified.
2948  unsigned Opc = SubAdd->getOpcode();
2949  bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
2950  Opc == ARM::SUBri || Opc == ARM::t2SUBri ||
2951  Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 ||
2952  Opc == ARM::tSUBi8;
2953  unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2;
2954  if (!IsSub ||
2955  (SrcReg2 != 0 && SubAdd->getOperand(OpI).getReg() == SrcReg2 &&
2956  SubAdd->getOperand(OpI + 1).getReg() == SrcReg)) {
2957  // VSel doesn't support condition code update.
2958  if (IsInstrVSel)
2959  return false;
2960  // Ensure we can swap the condition.
2961  ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC));
2962  if (NewCC == ARMCC::AL)
2963  return false;
2964  OperandsToUpdate.push_back(
2965  std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
2966  }
2967  } else {
2968  // No SubAdd, so this is x = <op> y, z; cmp x, 0.
2969  switch (CC) {
2970  case ARMCC::EQ: // Z
2971  case ARMCC::NE: // Z
2972  case ARMCC::MI: // N
2973  case ARMCC::PL: // N
2974  case ARMCC::AL: // none
2975  // CPSR can be used multiple times, we should continue.
2976  break;
2977  case ARMCC::HS: // C
2978  case ARMCC::LO: // C
2979  case ARMCC::VS: // V
2980  case ARMCC::VC: // V
2981  case ARMCC::HI: // C Z
2982  case ARMCC::LS: // C Z
2983  case ARMCC::GE: // N V
2984  case ARMCC::LT: // N V
2985  case ARMCC::GT: // Z N V
2986  case ARMCC::LE: // Z N V
2987  // The instruction uses the V bit or C bit which is not safe.
2988  return false;
2989  }
2990  }
2991  }
2992  }
2993 
2994  // If CPSR is not killed nor re-defined, we should check whether it is
2995  // live-out. If it is live-out, do not optimize.
2996  if (!isSafe) {
2997  MachineBasicBlock *MBB = CmpInstr.getParent();
2999  SE = MBB->succ_end(); SI != SE; ++SI)
3000  if ((*SI)->isLiveIn(ARM::CPSR))
3001  return false;
3002  }
3003 
3004  // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
3005  // set CPSR so this is represented as an explicit output)
3006  if (!IsThumb1) {
3007  MI->getOperand(5).setReg(ARM::CPSR);
3008  MI->getOperand(5).setIsDef(true);
3009  }
3010  assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
3011  CmpInstr.eraseFromParent();
3012 
3013  // Modify the condition code of operands in OperandsToUpdate.
3014  // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
3015  // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
3016  for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
3017  OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
3018 
3019  MI->clearRegisterDeads(ARM::CPSR);
3020 
3021  return true;
3022 }
3023 
3025  // Do not sink MI if it might be used to optimize a redundant compare.
3026  // We heuristically only look at the instruction immediately following MI to
3027  // avoid potentially searching the entire basic block.
3028  if (isPredicated(MI))
3029  return true;
3031  ++Next;
3032  unsigned SrcReg, SrcReg2;
3033  int CmpMask, CmpValue;
3034  bool IsThumb1;
3035  if (Next != MI.getParent()->end() &&
3036  analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
3037  isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI, IsThumb1))
3038  return false;
3039  return true;
3040 }
3041 
3043  unsigned Reg,
3044  MachineRegisterInfo *MRI) const {
3045  // Fold large immediates into add, sub, or, xor.
3046  unsigned DefOpc = DefMI.getOpcode();
3047  if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
3048  return false;
3049  if (!DefMI.getOperand(1).isImm())
3050  // Could be t2MOVi32imm @xx
3051  return false;
3052 
3053  if (!MRI->hasOneNonDBGUse(Reg))
3054  return false;
3055 
3056  const MCInstrDesc &DefMCID = DefMI.getDesc();
3057  if (DefMCID.hasOptionalDef()) {
3058  unsigned NumOps = DefMCID.getNumOperands();
3059  const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
3060  if (MO.getReg() == ARM::CPSR && !MO.isDead())
3061  // If DefMI defines CPSR and it is not dead, it's obviously not safe
3062  // to delete DefMI.
3063  return false;
3064  }
3065 
3066  const MCInstrDesc &UseMCID = UseMI.getDesc();
3067  if (UseMCID.hasOptionalDef()) {
3068  unsigned NumOps = UseMCID.getNumOperands();
3069  if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
3070  // If the instruction sets the flag, do not attempt this optimization
3071  // since it may change the semantics of the code.
3072  return false;
3073  }
3074 
3075  unsigned UseOpc = UseMI.getOpcode();
3076  unsigned NewUseOpc = 0;
3077  uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();
3078  uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
3079  bool Commute = false;
3080  switch (UseOpc) {
3081  default: return false;
3082  case ARM::SUBrr:
3083  case ARM::ADDrr:
3084  case ARM::ORRrr:
3085  case ARM::EORrr:
3086  case ARM::t2SUBrr:
3087  case ARM::t2ADDrr:
3088  case ARM::t2ORRrr:
3089  case ARM::t2EORrr: {
3090  Commute = UseMI.getOperand(2).getReg() != Reg;
3091  switch (UseOpc) {
3092  default: break;
3093  case ARM::ADDrr:
3094  case ARM::SUBrr:
3095  if (UseOpc == ARM::SUBrr && Commute)
3096  return false;
3097 
3098  // ADD/SUB are special because they're essentially the same operation, so
3099  // we can handle a larger range of immediates.
3100  if (ARM_AM::isSOImmTwoPartVal(ImmVal))
3101  NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
3102  else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
3103  ImmVal = -ImmVal;
3104  NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
3105  } else
3106  return false;
3107  SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3108  SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3109  break;
3110  case ARM::ORRrr:
3111  case ARM::EORrr:
3112  if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
3113  return false;
3114  SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3115  SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3116  switch (UseOpc) {
3117  default: break;
3118  case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
3119  case ARM::EORrr: NewUseOpc = ARM::EORri; break;
3120  }
3121  break;
3122  case ARM::t2ADDrr:
3123  case ARM::t2SUBrr:
3124  if (UseOpc == ARM::t2SUBrr && Commute)
3125  return false;
3126 
3127  // ADD/SUB are special because they're essentially the same operation, so
3128  // we can handle a larger range of immediates.
3129  if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3130  NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2ADDri : ARM::t2SUBri;
3131  else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
3132  ImmVal = -ImmVal;
3133  NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2SUBri : ARM::t2ADDri;
3134  } else
3135  return false;
3136  SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3137  SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3138  break;
3139  case ARM::t2ORRrr:
3140  case ARM::t2EORrr:
3141  if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3142  return false;
3143  SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3144  SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3145  switch (UseOpc) {
3146  default: break;
3147  case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
3148  case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
3149  }
3150  break;
3151  }
3152  }
3153  }
3154 
3155  unsigned OpIdx = Commute ? 2 : 1;
3156  unsigned Reg1 = UseMI.getOperand(OpIdx).getReg();
3157  bool isKill = UseMI.getOperand(OpIdx).isKill();
3158  unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
3159  BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
3160  NewReg)
3161  .addReg(Reg1, getKillRegState(isKill))
3162  .addImm(SOImmValV1)
3163  .add(predOps(ARMCC::AL))
3164  .add(condCodeOp());
3165  UseMI.setDesc(get(NewUseOpc));
3166  UseMI.getOperand(1).setReg(NewReg);
3167  UseMI.getOperand(1).setIsKill();
3168  UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
3169  DefMI.eraseFromParent();
3170  return true;
3171 }
3172 
3173 static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
3174  const MachineInstr &MI) {
3175  switch (MI.getOpcode()) {
3176  default: {
3177  const MCInstrDesc &Desc = MI.getDesc();
3178  int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
3179  assert(UOps >= 0 && "bad # UOps");
3180  return UOps;
3181  }
3182 
3183  case ARM::LDRrs:
3184  case ARM::LDRBrs:
3185  case ARM::STRrs:
3186  case ARM::STRBrs: {
3187  unsigned ShOpVal = MI.getOperand(3).getImm();
3188  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3189  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3190  if (!isSub &&
3191  (ShImm == 0 ||
3192  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3193  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3194  return 1;
3195  return 2;
3196  }
3197 
3198  case ARM::LDRH:
3199  case ARM::STRH: {
3200  if (!MI.getOperand(2).getReg())
3201  return 1;
3202 
3203  unsigned ShOpVal = MI.getOperand(3).getImm();
3204  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3205  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3206  if (!isSub &&
3207  (ShImm == 0 ||
3208  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3209  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3210  return 1;
3211  return 2;
3212  }
3213 
3214  case ARM::LDRSB:
3215  case ARM::LDRSH:
3216  return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;
3217 
3218  case ARM::LDRSB_POST:
3219  case ARM::LDRSH_POST: {
3220  unsigned Rt = MI.getOperand(0).getReg();
3221  unsigned Rm = MI.getOperand(3).getReg();
3222  return (Rt == Rm) ? 4 : 3;
3223  }
3224 
3225  case ARM::LDR_PRE_REG:
3226  case ARM::LDRB_PRE_REG: {
3227  unsigned Rt = MI.getOperand(0).getReg();
3228  unsigned Rm = MI.getOperand(3).getReg();
3229  if (Rt == Rm)
3230  return 3;
3231  unsigned ShOpVal = MI.getOperand(4).getImm();
3232  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3233  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3234  if (!isSub &&
3235  (ShImm == 0 ||
3236  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3237  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3238  return 2;
3239  return 3;
3240  }
3241 
3242  case ARM::STR_PRE_REG:
3243  case ARM::STRB_PRE_REG: {
3244  unsigned ShOpVal = MI.getOperand(4).getImm();
3245  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3246  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3247  if (!isSub &&
3248  (ShImm == 0 ||
3249  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3250  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3251  return 2;
3252  return 3;
3253  }
3254 
3255  case ARM::LDRH_PRE:
3256  case ARM::STRH_PRE: {
3257  unsigned Rt = MI.getOperand(0).getReg();
3258  unsigned Rm = MI.getOperand(3).getReg();
3259  if (!Rm)
3260  return 2;
3261  if (Rt == Rm)
3262  return 3;
3263  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;
3264  }
3265 
3266  case ARM::LDR_POST_REG:
3267  case ARM::LDRB_POST_REG:
3268  case ARM::LDRH_POST: {
3269  unsigned Rt = MI.getOperand(0).getReg();
3270  unsigned Rm = MI.getOperand(3).getReg();
3271  return (Rt == Rm) ? 3 : 2;
3272  }
3273 
3274  case ARM::LDR_PRE_IMM:
3275  case ARM::LDRB_PRE_IMM:
3276  case ARM::LDR_POST_IMM:
3277  case ARM::LDRB_POST_IMM:
3278  case ARM::STRB_POST_IMM:
3279  case ARM::STRB_POST_REG:
3280  case ARM::STRB_PRE_IMM:
3281  case ARM::STRH_POST:
3282  case ARM::STR_POST_IMM:
3283  case ARM::STR_POST_REG:
3284  case ARM::STR_PRE_IMM:
3285  return 2;
3286 
3287  case ARM::LDRSB_PRE:
3288  case ARM::LDRSH_PRE: {
3289  unsigned Rm = MI.getOperand(3).getReg();
3290  if (Rm == 0)
3291  return 3;
3292  unsigned Rt = MI.getOperand(0).getReg();
3293  if (Rt == Rm)
3294  return 4;
3295  unsigned ShOpVal = MI.getOperand(4).getImm();
3296  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3297  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3298  if (!isSub &&
3299  (ShImm == 0 ||
3300  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3301  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3302  return 3;
3303  return 4;
3304  }
3305 
3306  case ARM::LDRD: {
3307  unsigned Rt = MI.getOperand(0).getReg();
3308  unsigned Rn = MI.getOperand(2).getReg();
3309  unsigned Rm = MI.getOperand(3).getReg();
3310  if (Rm)
3311  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3312  : 3;
3313  return (Rt == Rn) ? 3 : 2;
3314  }
3315 
3316  case ARM::STRD: {
3317  unsigned Rm = MI.getOperand(3).getReg();
3318  if (Rm)
3319  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3320  : 3;
3321  return 2;
3322  }
3323 
3324  case ARM::LDRD_POST:
3325  case ARM::t2LDRD_POST:
3326  return 3;
3327 
3328  case ARM::STRD_POST:
3329  case ARM::t2STRD_POST:
3330  return 4;
3331 
3332  case ARM::LDRD_PRE: {
3333  unsigned Rt = MI.getOperand(0).getReg();
3334  unsigned Rn = MI.getOperand(3).getReg();
3335  unsigned Rm = MI.getOperand(4).getReg();
3336  if (Rm)
3337  return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3338  : 4;
3339  return (Rt == Rn) ? 4 : 3;
3340  }
3341 
3342  case ARM::t2LDRD_PRE: {
3343  unsigned Rt = MI.getOperand(0).getReg();
3344  unsigned Rn = MI.getOperand(3).getReg();
3345  return (Rt == Rn) ? 4 : 3;
3346  }
3347 
3348  case ARM::STRD_PRE: {
3349  unsigned Rm = MI.getOperand(4).getReg();
3350  if (Rm)
3351  return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3352  : 4;
3353  return 3;
3354  }
3355 
3356  case ARM::t2STRD_PRE:
3357  return 3;
3358 
3359  case ARM::t2LDR_POST:
3360  case ARM::t2LDRB_POST:
3361  case ARM::t2LDRB_PRE:
3362  case ARM::t2LDRSBi12:
3363  case ARM::t2LDRSBi8:
3364  case ARM::t2LDRSBpci:
3365  case ARM::t2LDRSBs:
3366  case ARM::t2LDRH_POST:
3367  case ARM::t2LDRH_PRE:
3368  case ARM::t2LDRSBT:
3369  case ARM::t2LDRSB_POST:
3370  case ARM::t2LDRSB_PRE:
3371  case ARM::t2LDRSH_POST:
3372  case ARM::t2LDRSH_PRE:
3373  case ARM::t2LDRSHi12:
3374  case ARM::t2LDRSHi8:
3375  case ARM::t2LDRSHpci:
3376  case ARM::t2LDRSHs:
3377  return 2;
3378 
3379  case ARM::t2LDRDi8: {
3380  unsigned Rt = MI.getOperand(0).getReg();
3381  unsigned Rn = MI.getOperand(2).getReg();
3382  return (Rt == Rn) ? 3 : 2;
3383  }
3384 
3385  case ARM::t2STRB_POST:
3386  case ARM::t2STRB_PRE:
3387  case ARM::t2STRBs:
3388  case ARM::t2STRDi8:
3389  case ARM::t2STRH_POST:
3390  case ARM::t2STRH_PRE:
3391  case ARM::t2STRHs:
3392  case ARM::t2STR_POST:
3393  case ARM::t2STR_PRE:
3394  case ARM::t2STRs:
3395  return 2;
3396  }
3397 }
3398 
3399 // Return the number of 32-bit words loaded by LDM or stored by STM. If this
3400 // can't be easily determined return 0 (missing MachineMemOperand).
3401 //
3402 // FIXME: The current MachineInstr design does not support relying on machine
3403 // mem operands to determine the width of a memory access. Instead, we expect
3404 // the target to provide this information based on the instruction opcode and
3405 // operands. However, using MachineMemOperand is the best solution now for
3406 // two reasons:
3407 //
3408 // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
3409 // operands. This is much more dangerous than using the MachineMemOperand
3410 // sizes because CodeGen passes can insert/remove optional machine operands. In
3411 // fact, it's totally incorrect for preRA passes and appears to be wrong for
3412 // postRA passes as well.
3413 //
3414 // 2) getNumLDMAddresses is only used by the scheduling machine model and any
3415 // machine model that calls this should handle the unknown (zero size) case.
3416 //
3417 // Long term, we should require a target hook that verifies MachineMemOperand
3418 // sizes during MC lowering. That target hook should be local to MC lowering
3419 // because we can't ensure that it is aware of other MI forms. Doing this will
3420 // ensure that MachineMemOperands are correctly propagated through all passes.
3422  unsigned Size = 0;
3424  E = MI.memoperands_end();
3425  I != E; ++I) {
3426  Size += (*I)->getSize();
3427  }
3428  return Size / 4;
3429 }
3430 
3431 static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,
3432  unsigned NumRegs) {
3433  unsigned UOps = 1 + NumRegs; // 1 for address computation.
3434  switch (Opc) {
3435  default:
3436  break;
3437  case ARM::VLDMDIA_UPD:
3438  case ARM::VLDMDDB_UPD:
3439  case ARM::VLDMSIA_UPD:
3440  case ARM::VLDMSDB_UPD:
3441  case ARM::VSTMDIA_UPD:
3442  case ARM::VSTMDDB_UPD:
3443  case ARM::VSTMSIA_UPD:
3444  case ARM::VSTMSDB_UPD:
3445  case ARM::LDMIA_UPD:
3446  case ARM::LDMDA_UPD:
3447  case ARM::LDMDB_UPD:
3448  case ARM::LDMIB_UPD:
3449  case ARM::STMIA_UPD:
3450  case ARM::STMDA_UPD:
3451  case ARM::STMDB_UPD:
3452  case ARM::STMIB_UPD:
3453  case ARM::tLDMIA_UPD:
3454  case ARM::tSTMIA_UPD:
3455  case ARM::t2LDMIA_UPD:
3456  case ARM::t2LDMDB_UPD:
3457  case ARM::t2STMIA_UPD:
3458  case ARM::t2STMDB_UPD:
3459  ++UOps; // One for base register writeback.
3460  break;
3461  case ARM::LDMIA_RET:
3462  case ARM::tPOP_RET:
3463  case ARM::t2LDMIA_RET:
3464  UOps += 2; // One for base reg wb, one for write to pc.
3465  break;
3466  }
3467  return UOps;
3468 }
3469 
3471  const MachineInstr &MI) const {
3472  if (!ItinData || ItinData->isEmpty())
3473  return 1;
3474 
3475  const MCInstrDesc &Desc = MI.getDesc();
3476  unsigned Class = Desc.getSchedClass();
3477  int ItinUOps = ItinData->getNumMicroOps(Class);
3478  if (ItinUOps >= 0) {
3479  if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
3480  return getNumMicroOpsSwiftLdSt(ItinData, MI);
3481 
3482  return ItinUOps;
3483  }
3484 
3485  unsigned Opc = MI.getOpcode();
3486  switch (Opc) {
3487  default:
3488  llvm_unreachable("Unexpected multi-uops instruction!");
3489  case ARM::VLDMQIA:
3490  case ARM::VSTMQIA:
3491  return 2;
3492 
3493  // The number of uOps for load / store multiple are determined by the number
3494  // registers.
3495  //
3496  // On Cortex-A8, each pair of register loads / stores can be scheduled on the
3497  // same cycle. The scheduling for the first load / store must be done
3498  // separately by assuming the address is not 64-bit aligned.
3499  //
3500  // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
3501  // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
3502  // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
3503  case ARM::VLDMDIA:
3504  case ARM::VLDMDIA_UPD:
3505  case ARM::VLDMDDB_UPD:
3506  case ARM::VLDMSIA:
3507  case ARM::VLDMSIA_UPD:
3508  case ARM::VLDMSDB_UPD:
3509  case ARM::VSTMDIA:
3510  case ARM::VSTMDIA_UPD:
3511  case ARM::VSTMDDB_UPD:
3512  case ARM::VSTMSIA:
3513  case ARM::VSTMSIA_UPD:
3514  case ARM::VSTMSDB_UPD: {
3515  unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
3516  return (NumRegs / 2) + (NumRegs % 2) + 1;
3517  }
3518 
3519  case ARM::LDMIA_RET:
3520  case ARM::LDMIA:
3521  case ARM::LDMDA:
3522  case ARM::LDMDB:
3523  case ARM::LDMIB:
3524  case ARM::LDMIA_UPD:
3525  case ARM::LDMDA_UPD:
3526  case ARM::LDMDB_UPD:
3527  case ARM::LDMIB_UPD:
3528  case ARM::STMIA:
3529  case ARM::STMDA:
3530  case ARM::STMDB:
3531  case ARM::STMIB:
3532  case ARM::STMIA_UPD:
3533  case ARM::STMDA_UPD:
3534  case ARM::STMDB_UPD:
3535  case ARM::STMIB_UPD:
3536  case ARM::tLDMIA:
3537  case ARM::tLDMIA_UPD:
3538  case ARM::tSTMIA_UPD:
3539  case ARM::tPOP_RET:
3540  case ARM::tPOP:
3541  case ARM::tPUSH:
3542  case ARM::t2LDMIA_RET:
3543  case ARM::t2LDMIA:
3544  case ARM::t2LDMDB:
3545  case ARM::t2LDMIA_UPD:
3546  case ARM::t2LDMDB_UPD:
3547  case ARM::t2STMIA:
3548  case ARM::t2STMDB:
3549  case ARM::t2STMIA_UPD:
3550  case ARM::t2STMDB_UPD: {
3551  unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
3552  switch (Subtarget.getLdStMultipleTiming()) {
3554  return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs);
3556  // Assume the worst.
3557  return NumRegs;
3559  if (NumRegs < 4)
3560  return 2;
3561  // 4 registers would be issued: 2, 2.
3562  // 5 registers would be issued: 2, 2, 1.
3563  unsigned UOps = (NumRegs / 2);
3564  if (NumRegs % 2)
3565  ++UOps;
3566  return UOps;
3567  }
3569  unsigned UOps = (NumRegs / 2);
3570  // If there are odd number of registers or if it's not 64-bit aligned,
3571  // then it takes an extra AGU (Address Generation Unit) cycle.
3572  if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
3573  (*MI.memoperands_begin())->getAlignment() < 8)
3574  ++UOps;
3575  return UOps;
3576  }
3577  }
3578  }
3579  }
3580  llvm_unreachable("Didn't find the number of microops");
3581 }
3582 
3583 int
3584 ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
3585  const MCInstrDesc &DefMCID,
3586  unsigned DefClass,
3587  unsigned DefIdx, unsigned DefAlign) const {
3588  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3589  if (RegNo <= 0)
3590  // Def is the address writeback.
3591  return ItinData->getOperandCycle(DefClass, DefIdx);
3592 
3593  int DefCycle;
3594  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3595  // (regno / 2) + (regno % 2) + 1
3596  DefCycle = RegNo / 2 + 1;
3597  if (RegNo % 2)
3598  ++DefCycle;
3599  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3600  DefCycle = RegNo;
3601  bool isSLoad = false;
3602 
3603  switch (DefMCID.getOpcode()) {
3604  default: break;
3605  case ARM::VLDMSIA:
3606  case ARM::VLDMSIA_UPD:
3607  case ARM::VLDMSDB_UPD:
3608  isSLoad = true;
3609  break;
3610  }
3611 
3612  // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3613  // then it takes an extra cycle.
3614  if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
3615  ++DefCycle;
3616  } else {
3617  // Assume the worst.
3618  DefCycle = RegNo + 2;
3619  }
3620 
3621  return DefCycle;
3622 }
3623 
3625  unsigned BaseReg = MI.getOperand(0).getReg();
3626  for (unsigned i = 1, sz = MI.getNumOperands(); i < sz; ++i) {
3627  const auto &Op = MI.getOperand(i);
3628  if (Op.isReg() && Op.getReg() == BaseReg)
3629  return true;
3630  }
3631  return false;
3632 }
3633 unsigned
3635  // ins GPR:$Rn, $p (2xOp), reglist:$regs, variable_ops
3636  // (outs GPR:$wb), (ins GPR:$Rn, $p (2xOp), reglist:$regs, variable_ops)
3637  return MI.getNumOperands() + 1 - MI.getDesc().getNumOperands();
3638 }
3639 
3640 int
3641 ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
3642  const MCInstrDesc &DefMCID,
3643  unsigned DefClass,
3644  unsigned DefIdx, unsigned DefAlign) const {
3645  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3646  if (RegNo <= 0)
3647  // Def is the address writeback.
3648  return ItinData->getOperandCycle(DefClass, DefIdx);
3649 
3650  int DefCycle;
3651  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3652  // 4 registers would be issued: 1, 2, 1.
3653  // 5 registers would be issued: 1, 2, 2.
3654  DefCycle = RegNo / 2;
3655  if (DefCycle < 1)
3656  DefCycle = 1;
3657  // Result latency is issue cycle + 2: E2.
3658  DefCycle += 2;
3659  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3660  DefCycle = (RegNo / 2);
3661  // If there are odd number of registers or if it's not 64-bit aligned,
3662  // then it takes an extra AGU (Address Generation Unit) cycle.
3663  if ((RegNo % 2) || DefAlign < 8)
3664  ++DefCycle;
3665  // Result latency is AGU cycles + 2.
3666  DefCycle += 2;
3667  } else {
3668  // Assume the worst.
3669  DefCycle = RegNo + 2;
3670  }
3671 
3672  return DefCycle;
3673 }
3674 
3675 int
3676 ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
3677  const MCInstrDesc &UseMCID,
3678  unsigned UseClass,
3679  unsigned UseIdx, unsigned UseAlign) const {
3680  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3681  if (RegNo <= 0)
3682  return ItinData->getOperandCycle(UseClass, UseIdx);
3683 
3684  int UseCycle;
3685  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3686  // (regno / 2) + (regno % 2) + 1
3687  UseCycle = RegNo / 2 + 1;
3688  if (RegNo % 2)
3689  ++UseCycle;
3690  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3691  UseCycle = RegNo;
3692  bool isSStore = false;
3693 
3694  switch (UseMCID.getOpcode()) {
3695  default: break;
3696  case ARM::VSTMSIA:
3697  case ARM::VSTMSIA_UPD:
3698  case ARM::VSTMSDB_UPD:
3699  isSStore = true;
3700  break;
3701  }
3702 
3703  // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3704  // then it takes an extra cycle.
3705  if ((isSStore && (RegNo % 2)) || UseAlign < 8)
3706  ++UseCycle;
3707  } else {
3708  // Assume the worst.
3709  UseCycle = RegNo + 2;
3710  }
3711 
3712  return UseCycle;
3713 }
3714 
3715 int
3716 ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
3717  const MCInstrDesc &UseMCID,
3718  unsigned UseClass,
3719  unsigned UseIdx, unsigned UseAlign) const {
3720  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3721  if (RegNo <= 0)
3722  return ItinData->getOperandCycle(UseClass, UseIdx);
3723 
3724  int UseCycle;
3725  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3726  UseCycle = RegNo / 2;
3727  if (UseCycle < 2)
3728  UseCycle = 2;
3729  // Read in E3.
3730  UseCycle += 2;
3731  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3732  UseCycle = (RegNo / 2);
3733  // If there are odd number of registers or if it's not 64-bit aligned,
3734  // then it takes an extra AGU (Address Generation Unit) cycle.
3735  if ((RegNo % 2) || UseAlign < 8)
3736  ++UseCycle;
3737  } else {
3738  // Assume the worst.
3739  UseCycle = 1;
3740  }
3741  return UseCycle;
3742 }
3743 
3744 int
3746  const MCInstrDesc &DefMCID,
3747  unsigned DefIdx, unsigned DefAlign,
3748  const MCInstrDesc &UseMCID,
3749  unsigned UseIdx, unsigned UseAlign) const {
3750  unsigned DefClass = DefMCID.getSchedClass();
3751  unsigned UseClass = UseMCID.getSchedClass();
3752 
3753  if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
3754  return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
3755 
3756  // This may be a def / use of a variable_ops instruction, the operand
3757  // latency might be determinable dynamically. Let the target try to
3758  // figure it out.
3759  int DefCycle = -1;
3760  bool LdmBypass = false;
3761  switch (DefMCID.getOpcode()) {
3762  default:
3763  DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
3764  break;
3765 
3766  case ARM::VLDMDIA:
3767  case ARM::VLDMDIA_UPD:
3768  case ARM::VLDMDDB_UPD:
3769  case ARM::VLDMSIA:
3770  case ARM::VLDMSIA_UPD:
3771  case ARM::VLDMSDB_UPD:
3772  DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3773  break;
3774 
3775  case ARM::LDMIA_RET:
3776  case ARM::LDMIA:
3777  case ARM::LDMDA:
3778  case ARM::LDMDB:
3779  case ARM::LDMIB:
3780  case ARM::LDMIA_UPD:
3781  case ARM::LDMDA_UPD:
3782  case ARM::LDMDB_UPD:
3783  case ARM::LDMIB_UPD:
3784  case ARM::tLDMIA:
3785  case ARM::tLDMIA_UPD:
3786  case ARM::tPUSH:
3787  case ARM::t2LDMIA_RET:
3788  case ARM::t2LDMIA:
3789  case ARM::t2LDMDB:
3790  case ARM::t2LDMIA_UPD:
3791  case ARM::t2LDMDB_UPD:
3792  LdmBypass = true;
3793  DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3794  break;
3795  }
3796 
3797  if (DefCycle == -1)
3798  // We can't seem to determine the result latency of the def, assume it's 2.
3799  DefCycle = 2;
3800 
3801  int UseCycle = -1;
3802  switch (UseMCID.getOpcode()) {
3803  default:
3804  UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
3805  break;
3806 
3807  case ARM::VSTMDIA:
3808  case ARM::VSTMDIA_UPD:
3809  case ARM::VSTMDDB_UPD:
3810  case ARM::VSTMSIA:
3811  case ARM::VSTMSIA_UPD:
3812  case ARM::VSTMSDB_UPD:
3813  UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3814  break;
3815 
3816  case ARM::STMIA:
3817  case ARM::STMDA:
3818  case ARM::STMDB:
3819  case ARM::STMIB:
3820  case ARM::STMIA_UPD:
3821  case ARM::STMDA_UPD:
3822  case ARM::STMDB_UPD:
3823  case ARM::STMIB_UPD:
3824  case ARM::tSTMIA_UPD:
3825  case ARM::tPOP_RET:
3826  case ARM::tPOP:
3827  case ARM::t2STMIA:
3828  case ARM::t2STMDB:
3829  case ARM::t2STMIA_UPD:
3830  case ARM::t2STMDB_UPD:
3831  UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3832  break;
3833  }
3834 
3835  if (UseCycle == -1)
3836  // Assume it's read in the first stage.
3837  UseCycle = 1;
3838 
3839  UseCycle = DefCycle - UseCycle + 1;
3840  if (UseCycle > 0) {
3841  if (LdmBypass) {
3842  // It's a variable_ops instruction so we can't use DefIdx here. Just use
3843  // first def operand.
3844  if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
3845  UseClass, UseIdx))
3846  --UseCycle;
3847  } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
3848  UseClass, UseIdx)) {
3849  --UseCycle;
3850  }
3851  }
3852 
3853  return UseCycle;
3854 }
3855 
3857  const MachineInstr *MI, unsigned Reg,
3858  unsigned &DefIdx, unsigned &Dist) {
3859  Dist = 0;
3860 
3863  assert(II->isInsideBundle() && "Empty bundle?");
3864 
3865  int Idx = -1;
3866  while (II->isInsideBundle()) {
3867  Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
3868  if (Idx != -1)
3869  break;
3870  --II;
3871  ++Dist;
3872  }
3873 
3874  assert(Idx != -1 && "Cannot find bundled definition!");
3875  DefIdx = Idx;
3876  return &*II;
3877 }
3878 
3880  const MachineInstr &MI, unsigned Reg,
3881  unsigned &UseIdx, unsigned &Dist) {
3882  Dist = 0;
3883 
3885  assert(II->isInsideBundle() && "Empty bundle?");
3887 
3888  // FIXME: This doesn't properly handle multiple uses.
3889  int Idx = -1;
3890  while (II != E && II->isInsideBundle()) {
3891  Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
3892  if (Idx != -1)
3893  break;
3894  if (II->getOpcode() != ARM::t2IT)
3895  ++Dist;
3896  ++II;
3897  }
3898 
3899  if (Idx == -1) {
3900  Dist = 0;
3901  return nullptr;
3902  }
3903 
3904  UseIdx = Idx;
3905  return &*II;
3906 }
3907 
3908 /// Return the number of cycles to add to (or subtract from) the static
3909 /// itinerary based on the def opcode and alignment. The caller will ensure that
3910 /// adjusted latency is at least one cycle.
3911 static int adjustDefLatency(const ARMSubtarget &Subtarget,
3912  const MachineInstr &DefMI,
3913  const MCInstrDesc &DefMCID, unsigned DefAlign) {
3914  int Adjust = 0;
3915  if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
3916  // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
3917  // variants are one cycle cheaper.
3918  switch (DefMCID.getOpcode()) {
3919  default: break;
3920  case ARM::LDRrs:
3921  case ARM::LDRBrs: {
3922  unsigned ShOpVal = DefMI.getOperand(3).getImm();
3923  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3924  if (ShImm == 0 ||
3925  (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
3926  --Adjust;
3927  break;
3928  }
3929  case ARM::t2LDRs:
3930  case ARM::t2LDRBs:
3931  case ARM::t2LDRHs:
3932  case ARM::t2LDRSHs: {
3933  // Thumb2 mode: lsl only.
3934  unsigned ShAmt = DefMI.getOperand(3).getImm();
3935  if (ShAmt == 0 || ShAmt == 2)
3936  --Adjust;
3937  break;
3938  }
3939  }
3940  } else if (Subtarget.isSwift()) {
3941  // FIXME: Properly handle all of the latency adjustments for address
3942  // writeback.
3943  switch (DefMCID.getOpcode()) {
3944  default: break;
3945  case ARM::LDRrs:
3946  case ARM::LDRBrs: {
3947  unsigned ShOpVal = DefMI.getOperand(3).getImm();
3948  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3949  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3950  if (!isSub &&
3951  (ShImm == 0 ||
3952  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3953  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3954  Adjust -= 2;
3955  else if (!isSub &&
3956  ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
3957  --Adjust;
3958  break;
3959  }
3960  case ARM::t2LDRs:
3961  case ARM::t2LDRBs:
3962  case ARM::t2LDRHs:
3963  case ARM::t2LDRSHs: {
3964  // Thumb2 mode: lsl only.
3965  unsigned ShAmt = DefMI.getOperand(3).getImm();
3966  if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
3967  Adjust -= 2;
3968  break;
3969  }
3970  }
3971  }
3972 
3973  if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
3974  switch (DefMCID.getOpcode()) {
3975  default: break;
3976  case ARM::VLD1q8:
3977  case ARM::VLD1q16:
3978  case ARM::VLD1q32:
3979  case ARM::VLD1q64:
3980  case ARM::VLD1q8wb_fixed:
3981  case ARM::VLD1q16wb_fixed:
3982  case ARM::VLD1q32wb_fixed:
3983  case ARM::VLD1q64wb_fixed:
3984  case ARM::VLD1q8wb_register:
3985  case ARM::VLD1q16wb_register:
3986  case ARM::VLD1q32wb_register:
3987  case ARM::VLD1q64wb_register:
3988  case ARM::VLD2d8:
3989  case ARM::VLD2d16:
3990  case ARM::VLD2d32:
3991  case ARM::VLD2q8:
3992  case ARM::VLD2q16:
3993  case ARM::VLD2q32:
3994  case ARM::VLD2d8wb_fixed:
3995  case ARM::VLD2d16wb_fixed:
3996  case ARM::VLD2d32wb_fixed:
3997  case ARM::VLD2q8wb_fixed:
3998  case ARM::VLD2q16wb_fixed:
3999  case ARM::VLD2q32wb_fixed:
4000  case ARM::VLD2d8wb_register:
4001  case ARM::VLD2d16wb_register:
4002  case ARM::VLD2d32wb_register:
4003  case ARM::VLD2q8wb_register:
4004  case ARM::VLD2q16wb_register:
4005  case ARM::VLD2q32wb_register:
4006  case ARM::VLD3d8:
4007  case ARM::VLD3d16:
4008  case ARM::VLD3d32:
4009  case ARM::VLD1d64T:
4010  case ARM::VLD3d8_UPD:
4011  case ARM::VLD3d16_UPD:
4012  case ARM::VLD3d32_UPD:
4013  case ARM::VLD1d64Twb_fixed:
4014  case ARM::VLD1d64Twb_register:
4015  case ARM::VLD3q8_UPD:
4016  case ARM::VLD3q16_UPD:
4017  case ARM::VLD3q32_UPD:
4018  case ARM::VLD4d8:
4019  case ARM::VLD4d16:
4020  case ARM::VLD4d32:
4021  case ARM::VLD1d64Q:
4022  case ARM::VLD4d8_UPD:
4023  case ARM::VLD4d16_UPD:
4024  case ARM::VLD4d32_UPD:
4025  case ARM::VLD1d64Qwb_fixed:
4026  case ARM::VLD1d64Qwb_register:
4027  case ARM::VLD4q8_UPD:
4028  case ARM::VLD4q16_UPD:
4029  case ARM::VLD4q32_UPD:
4030  case ARM::VLD1DUPq8:
4031  case ARM::VLD1DUPq16:
4032  case ARM::VLD1DUPq32:
4033  case ARM::VLD1DUPq8wb_fixed:
4034  case ARM::VLD1DUPq16wb_fixed:
4035  case ARM::VLD1DUPq32wb_fixed:
4036  case ARM::VLD1DUPq8wb_register:
4037  case ARM::VLD1DUPq16wb_register:
4038  case ARM::VLD1DUPq32wb_register:
4039  case ARM::VLD2DUPd8:
4040  case ARM::VLD2DUPd16:
4041  case ARM::VLD2DUPd32:
4042  case ARM::VLD2DUPd8wb_fixed:
4043  case ARM::VLD2DUPd16wb_fixed:
4044  case ARM::VLD2DUPd32wb_fixed:
4045  case ARM::VLD2DUPd8wb_register:
4046  case ARM::VLD2DUPd16wb_register:
4047  case ARM::VLD2DUPd32wb_register:
4048  case ARM::VLD4DUPd8:
4049  case ARM::VLD4DUPd16:
4050  case ARM::VLD4DUPd32:
4051  case ARM::VLD4DUPd8_UPD:
4052  case ARM::VLD4DUPd16_UPD:
4053  case ARM::VLD4DUPd32_UPD:
4054  case ARM::VLD1LNd8:
4055  case ARM::VLD1LNd16:
4056  case ARM::VLD1LNd32:
4057  case ARM::VLD1LNd8_UPD:
4058  case ARM::VLD1LNd16_UPD:
4059  case ARM::VLD1LNd32_UPD:
4060  case ARM::VLD2LNd8:
4061  case ARM::VLD2LNd16:
4062  case ARM::VLD2LNd32:
4063  case ARM::VLD2LNq16:
4064  case ARM::VLD2LNq32:
4065  case ARM::VLD2LNd8_UPD:
4066  case ARM::VLD2LNd16_UPD:
4067  case ARM::VLD2LNd32_UPD:
4068  case ARM::VLD2LNq16_UPD:
4069  case ARM::VLD2LNq32_UPD:
4070  case ARM::VLD4LNd8:
4071  case ARM::VLD4LNd16:
4072  case ARM::VLD4LNd32:
4073  case ARM::VLD4LNq16:
4074  case ARM::VLD4LNq32:
4075  case ARM::VLD4LNd8_UPD:
4076  case ARM::VLD4LNd16_UPD:
4077  case ARM::VLD4LNd32_UPD:
4078  case ARM::VLD4LNq16_UPD:
4079  case ARM::VLD4LNq32_UPD:
4080  // If the address is not 64-bit aligned, the latencies of these
4081  // instructions increases by one.
4082  ++Adjust;
4083  break;
4084  }
4085  }
4086  return Adjust;
4087 }
4088 
4090  const MachineInstr &DefMI,
4091  unsigned DefIdx,
4092  const MachineInstr &UseMI,
4093  unsigned UseIdx) const {
4094  // No operand latency. The caller may fall back to getInstrLatency.
4095  if (!ItinData || ItinData->isEmpty())
4096  return -1;
4097 
4098  const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
4099  unsigned Reg = DefMO.getReg();
4100 
4101  const MachineInstr *ResolvedDefMI = &DefMI;
4102  unsigned DefAdj = 0;
4103  if (DefMI.isBundle())
4104  ResolvedDefMI =
4105  getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);
4106  if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
4107  ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
4108  return 1;
4109  }
4110 
4111  const MachineInstr *ResolvedUseMI = &UseMI;
4112  unsigned UseAdj = 0;
4113  if (UseMI.isBundle()) {
4114  ResolvedUseMI =
4115  getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
4116  if (!ResolvedUseMI)
4117  return -1;
4118  }
4119 
4120  return getOperandLatencyImpl(
4121  ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
4122  Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
4123 }
4124 
4125 int ARMBaseInstrInfo::getOperandLatencyImpl(
4126  const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4127  unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
4128  const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
4129  unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
4130  if (Reg == ARM::CPSR) {
4131  if (DefMI.getOpcode() == ARM::FMSTAT) {
4132  // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
4133  return Subtarget.isLikeA9() ? 1 : 20;
4134  }
4135 
4136  // CPSR set and branch can be paired in the same cycle.
4137  if (UseMI.isBranch())
4138  return 0;
4139 
4140  // Otherwise it takes the instruction latency (generally one).
4141  unsigned Latency = getInstrLatency(ItinData, DefMI);
4142 
4143  // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
4144  // its uses. Instructions which are otherwise scheduled between them may
4145  // incur a code size penalty (not able to use the CPSR setting 16-bit
4146  // instructions).
4147  if (Latency > 0 && Subtarget.isThumb2()) {
4148  const MachineFunction *MF = DefMI.getParent()->getParent();
4149  // FIXME: Use Function::optForSize().
4150  if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize))
4151  --Latency;
4152  }
4153  return Latency;
4154  }
4155 
4156  if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
4157  return -1;
4158 
4159  unsigned DefAlign = DefMI.hasOneMemOperand()
4160  ? (*DefMI.memoperands_begin())->getAlignment()
4161  : 0;
4162  unsigned UseAlign = UseMI.hasOneMemOperand()
4163  ? (*UseMI.memoperands_begin())->getAlignment()
4164  : 0;
4165 
4166  // Get the itinerary's latency if possible, and handle variable_ops.
4167  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, UseMCID,
4168  UseIdx, UseAlign);
4169  // Unable to find operand latency. The caller may resort to getInstrLatency.
4170  if (Latency < 0)
4171  return Latency;
4172 
4173  // Adjust for IT block position.
4174  int Adj = DefAdj + UseAdj;
4175 
4176  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4177  Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
4178  if (Adj >= 0 || (int)Latency > -Adj) {
4179  return Latency + Adj;
4180  }
4181  // Return the itinerary latency, which may be zero but not less than zero.
4182  return Latency;
4183 }
4184 
4185 int
4187  SDNode *DefNode, unsigned DefIdx,
4188  SDNode *UseNode, unsigned UseIdx) const {
4189  if (!DefNode->isMachineOpcode())
4190  return 1;
4191 
4192  const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
4193 
4194  if (isZeroCost(DefMCID.Opcode))
4195  return 0;
4196 
4197  if (!ItinData || ItinData->isEmpty())
4198  return DefMCID.mayLoad() ? 3 : 1;
4199 
4200  if (!UseNode->isMachineOpcode()) {
4201  int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
4202  int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
4203  int Threshold = 1 + Adj;
4204  return Latency <= Threshold ? 1 : Latency - Adj;
4205  }
4206 
4207  const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
4208  const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
4209  unsigned DefAlign = !DefMN->memoperands_empty()
4210  ? (*DefMN->memoperands_begin())->getAlignment() : 0;
4211  const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
4212  unsigned UseAlign = !UseMN->memoperands_empty()
4213  ? (*UseMN->memoperands_begin())->getAlignment() : 0;
4214  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
4215  UseMCID, UseIdx, UseAlign);
4216 
4217  if (Latency > 1 &&
4218  (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
4219  Subtarget.isCortexA7())) {
4220  // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4221  // variants are one cycle cheaper.
4222  switch (DefMCID.getOpcode()) {
4223  default: break;
4224  case ARM::LDRrs:
4225  case ARM::LDRBrs: {
4226  unsigned ShOpVal =
4227  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4228  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4229  if (ShImm == 0 ||
4230  (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4231  --Latency;
4232  break;
4233  }
4234  case ARM::t2LDRs:
4235  case ARM::t2LDRBs:
4236  case ARM::t2LDRHs:
4237  case ARM::t2LDRSHs: {
4238  // Thumb2 mode: lsl only.
4239  unsigned ShAmt =
4240  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4241  if (ShAmt == 0 || ShAmt == 2)
4242  --Latency;
4243  break;
4244  }
4245  }
4246  } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) {
4247  // FIXME: Properly handle all of the latency adjustments for address
4248  // writeback.
4249  switch (DefMCID.getOpcode()) {
4250  default: break;
4251  case ARM::LDRrs:
4252  case ARM::LDRBrs: {
4253  unsigned ShOpVal =
4254  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4255  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4256  if (ShImm == 0 ||
4257  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4258  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4259  Latency -= 2;
4260  else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4261  --Latency;
4262  break;
4263  }
4264  case ARM::t2LDRs:
4265  case ARM::t2LDRBs:
4266  case ARM::t2LDRHs:
4267  case ARM::t2LDRSHs:
4268  // Thumb2 mode: lsl 0-3 only.
4269  Latency -= 2;
4270  break;
4271  }
4272  }
4273 
4274  if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
4275  switch (DefMCID.getOpcode()) {
4276  default: break;
4277  case ARM::VLD1q8:
4278  case ARM::VLD1q16:
4279  case ARM::VLD1q32:
4280  case ARM::VLD1q64:
4281  case ARM::VLD1q8wb_register:
4282  case ARM::VLD1q16wb_register:
4283  case ARM::VLD1q32wb_register:
4284  case ARM::VLD1q64wb_register:
4285  case ARM::VLD1q8wb_fixed:
4286  case ARM::VLD1q16wb_fixed:
4287  case ARM::VLD1q32wb_fixed:
4288  case ARM::VLD1q64wb_fixed:
4289  case ARM::VLD2d8:
4290  case ARM::VLD2d16:
4291  case ARM::VLD2d32:
4292  case ARM::VLD2q8Pseudo:
4293  case ARM::VLD2q16Pseudo:
4294  case ARM::VLD2q32Pseudo:
4295  case ARM::VLD2d8wb_fixed:
4296  case ARM::VLD2d16wb_fixed:
4297  case ARM::VLD2d32wb_fixed:
4298  case ARM::VLD2q8PseudoWB_fixed:
4299  case ARM::VLD2q16PseudoWB_fixed:
4300  case ARM::VLD2q32PseudoWB_fixed:
4301  case ARM::VLD2d8wb_register:
4302  case ARM::VLD2d16wb_register:
4303  case ARM::VLD2d32wb_register:
4304  case ARM::VLD2q8PseudoWB_register:
4305  case ARM::VLD2q16PseudoWB_register:
4306  case ARM::VLD2q32PseudoWB_register:
4307  case ARM::VLD3d8Pseudo:
4308  case ARM::VLD3d16Pseudo:
4309  case ARM::VLD3d32Pseudo:
4310  case ARM::VLD1d8TPseudo:
4311  case ARM::VLD1d16TPseudo:
4312  case ARM::VLD1d32TPseudo:
4313  case ARM::VLD1d64TPseudo:
4314  case ARM::VLD1d64TPseudoWB_fixed:
4315  case ARM::VLD1d64TPseudoWB_register:
4316  case ARM::VLD3d8Pseudo_UPD:
4317  case ARM::VLD3d16Pseudo_UPD:
4318  case ARM::VLD3d32Pseudo_UPD:
4319  case ARM::VLD3q8Pseudo_UPD:
4320  case ARM::VLD3q16Pseudo_UPD:
4321  case ARM::VLD3q32Pseudo_UPD:
4322  case ARM::VLD3q8oddPseudo:
4323  case ARM::VLD3q16oddPseudo:
4324  case ARM::VLD3q32oddPseudo:
4325  case ARM::VLD3q8oddPseudo_UPD:
4326  case ARM::VLD3q16oddPseudo_UPD:
4327  case ARM::VLD3q32oddPseudo_UPD:
4328  case ARM::VLD4d8Pseudo:
4329  case ARM::VLD4d16Pseudo:
4330  case ARM::VLD4d32Pseudo:
4331  case ARM::VLD1d8QPseudo:
4332  case ARM::VLD1d16QPseudo:
4333  case ARM::VLD1d32QPseudo:
4334  case ARM::VLD1d64QPseudo:
4335  case ARM::VLD1d64QPseudoWB_fixed:
4336  case ARM::VLD1d64QPseudoWB_register:
4337  case ARM::VLD1q8HighQPseudo:
4338  case ARM::VLD1q8LowQPseudo_UPD:
4339  case ARM::VLD1q8HighTPseudo:
4340  case ARM::VLD1q8LowTPseudo_UPD:
4341  case ARM::VLD1q16HighQPseudo:
4342  case ARM::VLD1q16LowQPseudo_UPD:
4343  case ARM::VLD1q16HighTPseudo:
4344  case ARM::VLD1q16LowTPseudo_UPD:
4345  case ARM::VLD1q32HighQPseudo:
4346  case ARM::VLD1q32LowQPseudo_UPD:
4347  case ARM::VLD1q32HighTPseudo:
4348  case ARM::VLD1q32LowTPseudo_UPD:
4349  case ARM::VLD1q64HighQPseudo:
4350  case ARM::VLD1q64LowQPseudo_UPD:
4351  case ARM::VLD1q64HighTPseudo:
4352  case ARM::VLD1q64LowTPseudo_UPD:
4353  case ARM::VLD4d8Pseudo_UPD:
4354  case ARM::VLD4d16Pseudo_UPD:
4355  case ARM::VLD4d32Pseudo_UPD:
4356  case ARM::VLD4q8Pseudo_UPD:
4357  case ARM::VLD4q16Pseudo_UPD:
4358  case ARM::VLD4q32Pseudo_UPD:
4359  case ARM::VLD4q8oddPseudo:
4360  case ARM::VLD4q16oddPseudo:
4361  case ARM::VLD4q32oddPseudo:
4362  case ARM::VLD4q8oddPseudo_UPD:
4363  case ARM::VLD4q16oddPseudo_UPD:
4364  case ARM::VLD4q32oddPseudo_UPD:
4365  case ARM::VLD1DUPq8:
4366  case ARM::VLD1DUPq16:
4367  case ARM::VLD1DUPq32:
4368  case ARM::VLD1DUPq8wb_fixed:
4369  case ARM::VLD1DUPq16wb_fixed:
4370  case ARM::VLD1DUPq32wb_fixed:
4371  case ARM::VLD1DUPq8wb_register:
4372  case ARM::VLD1DUPq16wb_register:
4373  case ARM::VLD1DUPq32wb_register:
4374  case ARM::VLD2DUPd8:
4375  case ARM::VLD2DUPd16:
4376  case ARM::VLD2DUPd32:
4377  case ARM::VLD2DUPd8wb_fixed:
4378  case ARM::VLD2DUPd16wb_fixed:
4379  case ARM::VLD2DUPd32wb_fixed:
4380  case ARM::VLD2DUPd8wb_register:
4381  case ARM::VLD2DUPd16wb_register:
4382  case ARM::VLD2DUPd32wb_register:
4383  case ARM::VLD2DUPq8EvenPseudo:
4384  case ARM::VLD2DUPq8OddPseudo:
4385  case ARM::VLD2DUPq16EvenPseudo:
4386  case ARM::VLD2DUPq16OddPseudo:
4387  case ARM::VLD2DUPq32EvenPseudo:
4388  case ARM::VLD2DUPq32OddPseudo:
4389  case ARM::VLD3DUPq8EvenPseudo:
4390  case ARM::VLD3DUPq8OddPseudo:
4391  case ARM::VLD3DUPq16EvenPseudo:
4392  case ARM::VLD3DUPq16OddPseudo:
4393  case ARM::VLD3DUPq32EvenPseudo:
4394  case ARM::VLD3DUPq32OddPseudo:
4395  case ARM::VLD4DUPd8Pseudo:
4396  case ARM::VLD4DUPd16Pseudo:
4397  case ARM::VLD4DUPd32Pseudo:
4398  case ARM::VLD4DUPd8Pseudo_UPD:
4399  case ARM::VLD4DUPd16Pseudo_UPD:
4400  case ARM::VLD4DUPd32Pseudo_UPD:
4401  case ARM::VLD4DUPq8EvenPseudo:
4402  case ARM::VLD4DUPq8OddPseudo:
4403  case ARM::VLD4DUPq16EvenPseudo:
4404  case ARM::VLD4DUPq16OddPseudo:
4405  case ARM::VLD4DUPq32EvenPseudo:
4406  case ARM::VLD4DUPq32OddPseudo:
4407  case ARM::VLD1LNq8Pseudo:
4408  case ARM::VLD1LNq16Pseudo:
4409  case ARM::VLD1LNq32Pseudo:
4410  case ARM::VLD1LNq8Pseudo_UPD:
4411  case ARM::VLD1LNq16Pseudo_UPD:
4412  case ARM::VLD1LNq32Pseudo_UPD:
4413  case ARM::VLD2LNd8Pseudo:
4414  case ARM::VLD2LNd16Pseudo:
4415  case ARM::VLD2LNd32Pseudo:
4416  case ARM::VLD2LNq16Pseudo:
4417  case ARM::VLD2LNq32Pseudo:
4418  case ARM::VLD2LNd8Pseudo_UPD:
4419  case ARM::VLD2LNd16Pseudo_UPD:
4420  case ARM::VLD2LNd32Pseudo_UPD:
4421  case ARM::VLD2LNq16Pseudo_UPD:
4422  case ARM::VLD2LNq32Pseudo_UPD:
4423  case ARM::VLD4LNd8Pseudo:
4424  case ARM::VLD4LNd16Pseudo:
4425  case ARM::VLD4LNd32Pseudo:
4426  case ARM::VLD4LNq16Pseudo:
4427  case ARM::VLD4LNq32Pseudo:
4428  case ARM::VLD4LNd8Pseudo_UPD:
4429  case ARM::VLD4LNd16Pseudo_UPD:
4430  case ARM::VLD4LNd32Pseudo_UPD:
4431  case ARM::VLD4LNq16Pseudo_UPD:
4432  case ARM::VLD4LNq32Pseudo_UPD:
4433  // If the address is not 64-bit aligned, the latencies of these
4434  // instructions increases by one.
4435  ++Latency;
4436  break;
4437  }
4438 
4439  return Latency;
4440 }
4441 
4442 unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
4443  if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4444  MI.isImplicitDef())
4445  return 0;
4446 
4447  if (MI.isBundle())
4448  return 0;
4449 
4450  const MCInstrDesc &MCID = MI.getDesc();
4451 
4452  if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4453  !Subtarget.cheapPredicableCPSRDef())) {
4454  // When predicated, CPSR is an additional source operand for CPSR updating
4455  // instructions, this apparently increases their latencies.
4456  return 1;
4457  }
4458  return 0;
4459 }
4460 
4461 unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4462  const MachineInstr &MI,
4463  unsigned *PredCost) const {
4464  if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4465  MI.isImplicitDef())
4466  return 1;
4467 
4468  // An instruction scheduler typically runs on unbundled instructions, however
4469  // other passes may query the latency of a bundled instruction.
4470  if (MI.isBundle()) {
4471  unsigned Latency = 0;
4474  while (++I != E && I->isInsideBundle()) {
4475  if (I->getOpcode() != ARM::t2IT)
4476  Latency += getInstrLatency(ItinData, *I, PredCost);
4477  }
4478  return Latency;
4479  }
4480 
4481  const MCInstrDesc &MCID = MI.getDesc();
4482  if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4483  !Subtarget.cheapPredicableCPSRDef()))) {
4484  // When predicated, CPSR is an additional source operand for CPSR updating
4485  // instructions, this apparently increases their latencies.
4486  *PredCost = 1;
4487  }
4488  // Be sure to call getStageLatency for an empty itinerary in case it has a
4489  // valid MinLatency property.
4490  if (!ItinData)
4491  return MI.mayLoad() ? 3 : 1;
4492 
4493  unsigned Class = MCID.getSchedClass();
4494 
4495  // For instructions with variable uops, use uops as latency.
4496  if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
4497  return getNumMicroOps(ItinData, MI);
4498 
4499  // For the common case, fall back on the itinerary's latency.
4500  unsigned Latency = ItinData->getStageLatency(Class);
4501 
4502  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4503  unsigned DefAlign =
4504  MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlignment() : 0;
4505  int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
4506  if (Adj >= 0 || (int)Latency > -Adj) {
4507  return Latency + Adj;
4508  }
4509  return Latency;
4510 }
4511 
4512 int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4513  SDNode *Node) const {
4514  if (!Node->isMachineOpcode())
4515  return 1;
4516 
4517  if (!ItinData || ItinData->isEmpty())
4518  return 1;
4519 
4520  unsigned Opcode = Node->getMachineOpcode();
4521  switch (Opcode) {
4522  default:
4523  return ItinData->getStageLatency(get(Opcode).getSchedClass());
4524  case ARM::VLDMQIA:
4525  case ARM::VSTMQIA:
4526  return 2;
4527  }
4528 }
4529 
4530 bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
4531  const MachineRegisterInfo *MRI,
4532  const MachineInstr &DefMI,
4533  unsigned DefIdx,
4534  const MachineInstr &UseMI,
4535  unsigned UseIdx) const {
4536  unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4537  unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
4538  if (Subtarget.nonpipelinedVFP() &&
4539  (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
4540  return true;
4541 
4542  // Hoist VFP / NEON instructions with 4 or higher latency.
4543  unsigned Latency =
4544  SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);
4545  if (Latency <= 3)
4546  return false;
4547  return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
4548  UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
4549 }
4550 
4551 bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
4552  const MachineInstr &DefMI,
4553  unsigned DefIdx) const {
4554  const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
4555  if (!ItinData || ItinData->isEmpty())
4556  return false;
4557 
4558  unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4559  if (DDomain == ARMII::DomainGeneral) {
4560  unsigned DefClass = DefMI.getDesc().getSchedClass();
4561  int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
4562  return (DefCycle != -1 && DefCycle <= 2);
4563  }
4564  return false;
4565 }
4566 
4567 bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
4568  StringRef &ErrInfo) const {
4569  if (convertAddSubFlagsOpcode(MI.getOpcode())) {
4570  ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
4571  return false;
4572  }
4573  if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) {
4574  // Make sure we don't generate a lo-lo mov that isn't supported.
4575  if (!ARM::hGPRRegClass.contains(MI.getOperand(0).getReg()) &&
4576  !ARM::hGPRRegClass.contains(MI.getOperand(1).getReg())) {
4577  ErrInfo = "Non-flag-setting Thumb1 mov is v6-only";
4578  return false;
4579  }
4580  }
4581  if (MI.getOpcode() == ARM::tPUSH ||
4582  MI.getOpcode() == ARM::tPOP ||
4583  MI.getOpcode() == ARM::tPOP_RET) {
4584  for (int i = 2, e = MI.getNumOperands(); i < e; ++i) {
4585  if (MI.getOperand(i).isImplicit() ||
4586  !MI.getOperand(i).isReg())
4587  continue;
4588  unsigned Reg = MI.getOperand(i).getReg();
4589  if (Reg < ARM::R0 || Reg > ARM::R7) {
4590  if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) &&
4591  !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) {
4592  ErrInfo = "Unsupported register in Thumb1 push/pop";
4593  return false;
4594  }
4595  }
4596  }
4597  }
4598  return true;
4599 }
4600 
4601 // LoadStackGuard has so far only been implemented for MachO. Different code
4602 // sequence is needed for other targets.
4604  unsigned LoadImmOpc,
4605  unsigned LoadOpc) const {
4606  assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
4607  "ROPI/RWPI not currently supported with stack guard");
4608 
4609  MachineBasicBlock &MBB = *MI->getParent();
4610  DebugLoc DL = MI->getDebugLoc();
4611  unsigned Reg = MI->getOperand(0).getReg();
4612  const GlobalValue *GV =
4613  cast<GlobalValue>((*MI->memoperands_begin())->getValue());
4614  MachineInstrBuilder MIB;
4615 
4616  BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4618 
4619  if (Subtarget.isGVIndirectSymbol(GV)) {
4620  MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4621  MIB.addReg(Reg, RegState::Kill).addImm(0);
4622  auto Flags = MachineMemOperand::MOLoad |
4625  MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
4626  MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4);
4627  MIB.addMemOperand(MMO).add(predOps(ARMCC::AL));
4628  }
4629 
4630  MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4631  MIB.addReg(Reg, RegState::Kill)
4632  .addImm(0)
4633  .cloneMemRefs(*MI)
4634  .add(predOps(ARMCC::AL));
4635 }
4636 
4637 bool
4638 ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
4639  unsigned &AddSubOpc,
4640  bool &NegAcc, bool &HasLane) const {
4641  DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
4642  if (I == MLxEntryMap.end())
4643  return false;
4644 
4645  const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
4646  MulOpc = Entry.MulOpc;
4647  AddSubOpc = Entry.AddSubOpc;
4648  NegAcc = Entry.NegAcc;
4649  HasLane = Entry.HasLane;
4650  return true;
4651 }
4652 
4653 //===----------------------------------------------------------------------===//
4654 // Execution domains.
4655 //===----------------------------------------------------------------------===//
4656 //
4657 // Some instructions go down the NEON pipeline, some go down the VFP pipeline,
4658 // and some can go down both. The vmov instructions go down the VFP pipeline,
4659 // but they can be changed to vorr equivalents that are executed by the NEON
4660 // pipeline.
4661 //
4662 // We use the following execution domain numbering:
4663 //
4666  ExeVFP = 1,
4668 };
4669 
4670 //
4671 // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
4672 //
4673 std::pair<uint16_t, uint16_t>
4675  // If we don't have access to NEON instructions then we won't be able
4676  // to swizzle anything to the NEON domain. Check to make sure.
4677  if (Subtarget.hasNEON()) {
4678  // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
4679  // if they are not predicated.
4680  if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI))
4681  return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4682 
4683  // CortexA9 is particularly picky about mixing the two and wants these
4684  // converted.
4685  if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) &&
4686  (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR ||
4687  MI.getOpcode() == ARM::VMOVS))
4688  return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4689  }
4690  // No other instructions can be swizzled, so just determine their domain.
4691  unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;
4692 
4693  if (Domain & ARMII::DomainNEON)
4694  return std::make_pair(ExeNEON, 0);
4695 
4696  // Certain instructions can go either way on Cortex-A8.
4697  // Treat them as NEON instructions.
4698  if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
4699  return std::make_pair(ExeNEON, 0);
4700 
4701  if (Domain & ARMII::DomainVFP)
4702  return std::make_pair(ExeVFP, 0);
4703 
4704  return std::make_pair(ExeGeneric, 0);
4705 }
4706 
4708  unsigned SReg, unsigned &Lane) {
4709  unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
4710  Lane = 0;
4711 
4712  if (DReg != ARM::NoRegister)
4713  return DReg;
4714 
4715  Lane = 1;
4716  DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
4717 
4718  assert(DReg && "S-register with no D super-register?");
4719  return DReg;
4720 }
4721 
4722 /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
4723 /// set ImplicitSReg to a register number that must be marked as implicit-use or
4724 /// zero if no register needs to be defined as implicit-use.
4725 ///
4726 /// If the function cannot determine if an SPR should be marked implicit use or
4727 /// not, it returns false.
4728 ///
4729 /// This function handles cases where an instruction is being modified from taking
4730 /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
4731 /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
4732 /// lane of the DPR).
4733 ///
4734 /// If the other SPR is defined, an implicit-use of it should be added. Else,
4735 /// (including the case where the DPR itself is defined), it should not.
4736 ///
4738  MachineInstr &MI, unsigned DReg,
4739  unsigned Lane, unsigned &ImplicitSReg) {
4740  // If the DPR is defined or used already, the other SPR lane will be chained
4741  // correctly, so there is nothing to be done.
4742  if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) {
4743  ImplicitSReg = 0;
4744  return true;
4745  }
4746 
4747  // Otherwise we need to go searching to see if the SPR is set explicitly.
4748  ImplicitSReg = TRI->getSubReg(DReg,
4749  (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
4751  MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
4752 
4753  if (LQR == MachineBasicBlock::LQR_Live)
4754  return true;
4755  else if (LQR == MachineBasicBlock::LQR_Unknown)
4756  return false;
4757 
4758  // If the register is known not to be live, there is no need to add an
4759  // implicit-use.
4760  ImplicitSReg = 0;
4761  return true;
4762 }
4763 
4765  unsigned Domain) const {
4766  unsigned DstReg, SrcReg, DReg;
4767  unsigned Lane;
4768  MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
4770  switch (MI.getOpcode()) {
4771  default:
4772  llvm_unreachable("cannot handle opcode!");
4773  break;
4774  case ARM::VMOVD:
4775  if (Domain != ExeNEON)
4776  break;
4777 
4778  // Zap the predicate operands.
4779  assert(!isPredicated(MI) && "Cannot predicate a VORRd");
4780 
4781  // Make sure we've got NEON instructions.
4782  assert(Subtarget.hasNEON() && "VORRd requires NEON");
4783 
4784  // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
4785  DstReg = MI.getOperand(0).getReg();
4786  SrcReg = MI.getOperand(1).getReg();
4787 
4788  for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4789  MI.RemoveOperand(i - 1);
4790 
4791  // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
4792  MI.setDesc(get(ARM::VORRd));
4793  MIB.addReg(DstReg, RegState::Define)
4794  .addReg(SrcReg)
4795  .addReg(SrcReg)
4796  .add(predOps(ARMCC::AL));
4797  break;
4798  case ARM::VMOVRS:
4799  if (Domain != ExeNEON)
4800  break;
4801  assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
4802 
4803  // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
4804  DstReg = MI.getOperand(0).getReg();
4805  SrcReg = MI.getOperand(1).getReg();
4806 
4807  for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4808  MI.RemoveOperand(i - 1);
4809 
4810  DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
4811 
4812  // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
4813  // Note that DSrc has been widened and the other lane may be undef, which
4814  // contaminates the entire register.
4815  MI.setDesc(get(ARM::VGETLNi32));
4816  MIB.addReg(DstReg, RegState::Define)
4817  .addReg(DReg, RegState::Undef)
4818  .addImm(Lane)
4819  .add(predOps(ARMCC::AL));
4820 
4821  // The old source should be an implicit use, otherwise we might think it
4822  // was dead before here.
4823  MIB.addReg(SrcReg, RegState::Implicit);
4824  break;
4825  case ARM::VMOVSR: {
4826  if (Domain != ExeNEON)
4827  break;
4828  assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
4829 
4830  // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
4831  DstReg = MI.getOperand(0).getReg();
4832  SrcReg = MI.getOperand(1).getReg();
4833 
4834  DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
4835 
4836  unsigned ImplicitSReg;
4837  if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
4838  break;
4839 
4840  for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4841  MI.RemoveOperand(i - 1);
4842 
4843  // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
4844  // Again DDst may be undefined at the beginning of this instruction.
4845  MI.setDesc(get(ARM::VSETLNi32));
4846  MIB.addReg(DReg, RegState::Define)
4847  .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI)))
4848  .addReg(SrcReg)
4849  .addImm(Lane)
4850  .add(predOps(ARMCC::AL));
4851 
4852  // The narrower destination must be marked as set to keep previous chains
4853  // in place.
4854  MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
4855  if (ImplicitSReg != 0)
4856  MIB.addReg(ImplicitSReg, RegState::Implicit);
4857  break;
4858  }
4859  case ARM::VMOVS: {
4860  if (Domain != ExeNEON)
4861  break;
4862 
4863  // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
4864  DstReg = MI.getOperand(0).getReg();
4865  SrcReg = MI.getOperand(1).getReg();
4866 
4867  unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
4868  DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
4869  DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
4870 
4871  unsigned ImplicitSReg;
4872  if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
4873  break;
4874 
4875  for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4876  MI.RemoveOperand(i - 1);
4877 
4878  if (DSrc == DDst) {
4879  // Destination can be:
4880  // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
4881  MI.setDesc(get(ARM::VDUPLN32d));
4882  MIB.addReg(DDst, RegState::Define)
4883  .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI)))
4884  .addImm(SrcLane)
4885  .add(predOps(ARMCC::AL));
4886 
4887  // Neither the source or the destination are naturally represented any
4888  // more, so add them in manually.
4889  MIB.addReg(DstReg, RegState::Implicit | RegState::Define);
4890  MIB.addReg(SrcReg, RegState::Implicit);
4891  if (ImplicitSReg != 0)
4892  MIB.addReg(ImplicitSReg, RegState::Implicit);
4893  break;
4894  }
4895 
4896  // In general there's no single instruction that can perform an S <-> S
4897  // move in NEON space, but a pair of VEXT instructions *can* do the
4898  // job. It turns out that the VEXTs needed will only use DSrc once, with
4899  // the position based purely on the combination of lane-0 and lane-1
4900  // involved. For example
4901  // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
4902  // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
4903  // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
4904  // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
4905  //
4906  // Pattern of the MachineInstrs is:
4907  // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
4908  MachineInstrBuilder NewMIB;
4909  NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32),
4910  DDst);
4911 
4912  // On the first instruction, both DSrc and DDst may be undef if present.
4913  // Specifically when the original instruction didn't have them as an
4914  // <imp-use>.
4915  unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
4916  bool CurUndef = !MI.readsRegister(CurReg, TRI);
4917  NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
4918 
4919  CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
4920  CurUndef = !MI.readsRegister(CurReg, TRI);
4921  NewMIB.addReg(CurReg, getUndefRegState(CurUndef))
4922  .addImm(1)
4923  .add(predOps(ARMCC::AL));
4924 
4925  if (SrcLane == DstLane)
4926  NewMIB.addReg(SrcReg, RegState::Implicit);
4927 
4928  MI.setDesc(get(ARM::VEXTd32));
4929  MIB.addReg(DDst, RegState::Define);
4930 
4931  // On the second instruction, DDst has definitely been defined above, so
4932  // it is not undef. DSrc, if present, can be undef as above.
4933  CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
4934  CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
4935  MIB.addReg(CurReg, getUndefRegState(CurUndef));
4936 
4937  CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
4938  CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
4939  MIB.addReg(CurReg, getUndefRegState(CurUndef))
4940  .addImm(1)
4941  .add(predOps(ARMCC::AL));
4942 
4943  if (SrcLane != DstLane)
4944  MIB.addReg(SrcReg, RegState::Implicit);
4945 
4946  // As before, the original destination is no longer represented, add it
4947  // implicitly.
4948  MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
4949  if (ImplicitSReg != 0)
4950  MIB.addReg(ImplicitSReg, RegState::Implicit);
4951  break;
4952  }
4953  }
4954 }
4955 
4956 //===----------------------------------------------------------------------===//
4957 // Partial register updates
4958 //===----------------------------------------------------------------------===//
4959 //
4960 // Swift renames NEON registers with 64-bit granularity. That means any
4961 // instruction writing an S-reg implicitly reads the containing D-reg. The
4962 // problem is mostly avoided by translating f32 operations to v2f32 operations
4963 // on D-registers, but f32 loads are still a problem.
4964 //
4965 // These instructions can load an f32 into a NEON register:
4966 //
4967 // VLDRS - Only writes S, partial D update.
4968 // VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
4969 // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
4970 //
4971 // FCONSTD can be used as a dependency-breaking instruction.
4973  const MachineInstr &MI, unsigned OpNum,
4974  const TargetRegisterInfo *TRI) const {
4975  auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance();
4976  if (!PartialUpdateClearance)
4977  return 0;
4978 
4979  assert(TRI && "Need TRI instance");
4980 
4981  const MachineOperand &MO = MI.getOperand(OpNum);
4982  if (MO.readsReg())
4983  return 0;
4984  unsigned Reg = MO.getReg();
4985  int UseOp = -1;
4986 
4987  switch (MI.getOpcode()) {
4988  // Normal instructions writing only an S-register.
4989  case ARM::VLDRS:
4990  case ARM::FCONSTS:
4991  case ARM::VMOVSR:
4992  case ARM::VMOVv8i8:
4993  case ARM::VMOVv4i16:
4994  case ARM::VMOVv2i32:
4995  case ARM::VMOVv2f32:
4996  case ARM::VMOVv1i64:
4997  UseOp = MI.findRegisterUseOperandIdx(Reg, false, TRI);
4998  break;
4999 
5000  // Explicitly reads the dependency.
5001  case ARM::VLD1LNd32:
5002  UseOp = 3;
5003  break;
5004  default:
5005  return 0;
5006  }
5007 
5008  // If this instruction actually reads a value from Reg, there is no unwanted
5009  // dependency.
5010  if (UseOp != -1 && MI.getOperand(UseOp).readsReg())
5011  return 0;
5012 
5013  // We must be able to clobber the whole D-reg.
5015  // Virtual register must be a def undef foo:ssub_0 operand.
5016  if (!MO.getSubReg() || MI.readsVirtualRegister(Reg))
5017  return 0;
5018  } else if (ARM::SPRRegClass.contains(Reg)) {
5019  // Physical register: MI must define the full D-reg.
5020  unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
5021  &ARM::DPRRegClass);
5022  if (!DReg || !MI.definesRegister(DReg, TRI))
5023  return 0;
5024  }
5025 
5026  // MI has an unwanted D-register dependency.
5027  // Avoid defs in the previous N instructrions.
5028  return PartialUpdateClearance;
5029 }
5030 
5031 // Break a partial register dependency after getPartialRegUpdateClearance
5032 // returned non-zero.
5034  MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
5035  assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def");
5036  assert(TRI && "Need TRI instance");
5037 
5038  const MachineOperand &MO = MI.getOperand(OpNum);
5039  unsigned Reg = MO.getReg();
5041  "Can't break virtual register dependencies.");
5042  unsigned DReg = Reg;
5043 
5044  // If MI defines an S-reg, find the corresponding D super-register.
5045  if (ARM::SPRRegClass.contains(Reg)) {
5046  DReg = ARM::D0 + (Reg - ARM::S0) / 2;
5047  assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
5048  }
5049 
5050  assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
5051  assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
5052 
5053  // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
5054  // the full D-register by loading the same value to both lanes. The
5055  // instruction is micro-coded with 2 uops, so don't do this until we can
5056  // properly schedule micro-coded instructions. The dispatcher stalls cause
5057  // too big regressions.
5058 
5059  // Insert the dependency-breaking FCONSTD before MI.
5060  // 96 is the encoding of 0.5, but the actual value doesn't matter here.
5061  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
5062  .addImm(96)
5063  .add(predOps(ARMCC::AL));
5064  MI.addRegisterKilled(DReg, TRI, true);
5065 }
5066 
5068  return Subtarget.getFeatureBits()[ARM::HasV6KOps];
5069 }
5070 
5072  if (MI->getNumOperands() < 4)
5073  return true;
5074  unsigned ShOpVal = MI->getOperand(3).getImm();
5075  unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
5076  // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
5077  if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
5078  ((ShImm == 1 || ShImm == 2) &&
5079  ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
5080  return true;
5081 
5082  return false;
5083 }
5084 
5086  const MachineInstr &MI, unsigned DefIdx,
5087  SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
5088  assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5089  assert(MI.isRegSequenceLike() && "Invalid kind of instruction");
5090 
5091  switch (MI.getOpcode()) {
5092  case ARM::VMOVDRR:
5093  // dX = VMOVDRR rY, rZ
5094  // is the same as:
5095  // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
5096  // Populate the InputRegs accordingly.
5097  // rY
5098  const MachineOperand *MOReg = &MI.getOperand(1);
5099  if (!MOReg->isUndef())
5100  InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5101  MOReg->getSubReg(), ARM::ssub_0));
5102  // rZ
5103  MOReg = &MI.getOperand(2);
5104  if (!MOReg->isUndef())
5105  InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5106  MOReg->getSubReg(), ARM::ssub_1));
5107  return true;
5108  }
5109  llvm_unreachable("Target dependent opcode missing");
5110 }
5111 
5113  const MachineInstr &MI, unsigned DefIdx,
5114  RegSubRegPairAndIdx &InputReg) const {
5115  assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5116  assert(MI.isExtractSubregLike() && "Invalid kind of instruction");
5117 
5118  switch (MI.getOpcode()) {
5119  case ARM::VMOVRRD:
5120  // rX, rY = VMOVRRD dZ
5121  // is the same as:
5122  // rX = EXTRACT_SUBREG dZ, ssub_0
5123  // rY = EXTRACT_SUBREG dZ, ssub_1
5124  const MachineOperand &MOReg = MI.getOperand(2);
5125  if (MOReg.isUndef())
5126  return false;
5127  InputReg.Reg = MOReg.getReg();
5128  InputReg.SubReg = MOReg.getSubReg();
5129  InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
5130  return true;
5131  }
5132  llvm_unreachable("Target dependent opcode missing");
5133 }
5134 
5136  const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
5137  RegSubRegPairAndIdx &InsertedReg) const {
5138  assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5139  assert(MI.isInsertSubregLike() && "Invalid kind of instruction");
5140 
5141  switch (MI.getOpcode()) {
5142  case ARM::VSETLNi32:
5143  // dX = VSETLNi32 dY, rZ, imm
5144  const MachineOperand &MOBaseReg = MI.getOperand(1);
5145  const MachineOperand &MOInsertedReg = MI.getOperand(2);
5146  if (MOInsertedReg.isUndef())
5147  return false;
5148  const MachineOperand &MOIndex = MI.getOperand(3);
5149  BaseReg.Reg = MOBaseReg.getReg();
5150  BaseReg.SubReg = MOBaseReg.getSubReg();
5151 
5152  InsertedReg.Reg = MOInsertedReg.getReg();
5153  InsertedReg.SubReg = MOInsertedReg.getSubReg();
5154  InsertedReg.SubIdx = MOIndex.getImm() == 0 ? ARM::ssub_0 : ARM::ssub_1;
5155  return true;
5156  }
5157  llvm_unreachable("Target dependent opcode missing");
5158 }
5159 
5160 std::pair<unsigned, unsigned>
5162  const unsigned Mask = ARMII::MO_OPTION_MASK;
5163  return std::make_pair(TF & Mask, TF & ~Mask);
5164 }
5165 
5168  using namespace ARMII;
5169 
5170  static const std::pair<unsigned, const char *> TargetFlags[] = {
5171  {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"}};
5172  return makeArrayRef(TargetFlags);
5173 }
5174 
5177  using namespace ARMII;
5178 
5179  static const std::pair<unsigned, const char *> TargetFlags[] = {
5180  {MO_COFFSTUB, "arm-coffstub"},
5181  {MO_GOT, "arm-got"},
5182  {MO_SBREL, "arm-sbrel"},
5183  {MO_DLLIMPORT, "arm-dllimport"},
5184  {MO_SECREL, "arm-secrel"},
5185  {MO_NONLAZY, "arm-nonlazy"}};
5186  return makeArrayRef(TargetFlags);
5187 }
bool isLdstSoMinusReg(const MachineInstr &MI, unsigned Op) const
MachineConstantPoolValue * MachineCPVal
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const override
bool getInsertSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const override
Build the equivalent inputs of a INSERT_SUBREG for the given MI and DefIdx.
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
bool checkVLDnAccessAlignment() const
Definition: ARMSubtarget.h:624
BranchProbability getCompl() const
int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate...
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:632
instr_iterator instr_end()
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
unsigned getT2SOImmTwoPartFirst(unsigned Imm)
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isThumb() const
Definition: ARMSubtarget.h:717
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool DefinesPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred) const override
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, unsigned Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before...
unsigned getRegister(unsigned i) const
Return the specified register in the class.
bool isExtractSubregLike(QueryType Type=IgnoreBundle) const
Return true if this instruction behaves the same way as the generic EXTRACT_SUBREG instructions...
Definition: MachineInstr.h:781
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:382
ARMConstantPoolValue - ARM specific constantpool value.
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
bool expandPostRAPseudo(MachineInstr &MI) const override
void setIsUndef(bool Val=true)
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, int CmpMask, bool CommonUse)
isSuitableForMask - Identify a suitable &#39;and&#39; instruction that operates on the given source register ...
unsigned Reg
bool isPredicated(const MachineInstr &MI) const override
virtual MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const
Clones instruction or the whole instruction bundle Orig and insert into MBB before InsertBefore...
unsigned getSubReg() const
bool isPredicable(QueryType Type=AllInBundle) const
Return true if this instruction has a predicate operand that controls execution.
Definition: MachineInstr.h:686
bool isV8EligibleForIT(const InstrType *Instr)
Definition: ARMFeatures.h:24
static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, unsigned SReg, unsigned &Lane)
bool isRegSequence() const
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:320
bool isSOImmTwoPartVal(unsigned V)
isSOImmTwoPartVal - Return true if the specified value can be obtained by or&#39;ing together two SOImmVa...
VarInfo - This represents the regions where a virtual register is live in the program.
Definition: LiveVariables.h:78
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
void setIsDead(bool Val=true)
unsigned getPartialRegUpdateClearance(const MachineInstr &, unsigned, const TargetRegisterInfo *) const override
setjmp/longjmp based exceptions
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multipl