LLVM  8.0.0svn
AArch64InstrInfo.cpp
Go to the documentation of this file.
1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the AArch64 implementation of the TargetInstrInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64InstrInfo.h"
16 #include "AArch64Subtarget.h"
18 #include "Utils/AArch64BaseInfo.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/CodeGen/StackMaps.h"
34 #include "llvm/IR/DebugLoc.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/MC/MCInst.h"
37 #include "llvm/MC/MCInstrDesc.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/CodeGen.h"
41 #include "llvm/Support/Compiler.h"
46 #include <cassert>
47 #include <cstdint>
48 #include <iterator>
49 #include <utility>
50 
51 using namespace llvm;
52 
53 #define GET_INSTRINFO_CTOR_DTOR
54 #include "AArch64GenInstrInfo.inc"
55 
57  "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
58  cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
59 
61  "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
62  cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
63 
64 static cl::opt<unsigned>
65  BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
66  cl::desc("Restrict range of Bcc instructions (DEBUG)"));
67 
69  : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
70  RI(STI.getTargetTriple()), Subtarget(STI) {}
71 
72 /// GetInstSize - Return the number of bytes of code the specified
73 /// instruction may be. This returns the maximum number of bytes.
75  const MachineBasicBlock &MBB = *MI.getParent();
76  const MachineFunction *MF = MBB.getParent();
77  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
78 
79  if (MI.getOpcode() == AArch64::INLINEASM)
80  return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
81 
82  // FIXME: We currently only handle pseudoinstructions that don't get expanded
83  // before the assembly printer.
84  unsigned NumBytes = 0;
85  const MCInstrDesc &Desc = MI.getDesc();
86  switch (Desc.getOpcode()) {
87  default:
88  // Anything not explicitly designated otherwise is a normal 4-byte insn.
89  NumBytes = 4;
90  break;
91  case TargetOpcode::DBG_VALUE:
93  case TargetOpcode::IMPLICIT_DEF:
94  case TargetOpcode::KILL:
95  NumBytes = 0;
96  break;
97  case TargetOpcode::STACKMAP:
98  // The upper bound for a stackmap intrinsic is the full length of its shadow
99  NumBytes = StackMapOpers(&MI).getNumPatchBytes();
100  assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
101  break;
102  case TargetOpcode::PATCHPOINT:
103  // The size of the patchpoint intrinsic is the number of bytes requested
104  NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
105  assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
106  break;
108  // This gets lowered to an instruction sequence which takes 16 bytes
109  NumBytes = 16;
110  break;
111  }
112 
113  return NumBytes;
114 }
115 
118  // Block ends with fall-through condbranch.
119  switch (LastInst->getOpcode()) {
120  default:
121  llvm_unreachable("Unknown branch instruction?");
122  case AArch64::Bcc:
123  Target = LastInst->getOperand(1).getMBB();
124  Cond.push_back(LastInst->getOperand(0));
125  break;
126  case AArch64::CBZW:
127  case AArch64::CBZX:
128  case AArch64::CBNZW:
129  case AArch64::CBNZX:
130  Target = LastInst->getOperand(1).getMBB();
132  Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
133  Cond.push_back(LastInst->getOperand(0));
134  break;
135  case AArch64::TBZW:
136  case AArch64::TBZX:
137  case AArch64::TBNZW:
138  case AArch64::TBNZX:
139  Target = LastInst->getOperand(2).getMBB();
141  Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
142  Cond.push_back(LastInst->getOperand(0));
143  Cond.push_back(LastInst->getOperand(1));
144  }
145 }
146 
147 static unsigned getBranchDisplacementBits(unsigned Opc) {
148  switch (Opc) {
149  default:
150  llvm_unreachable("unexpected opcode!");
151  case AArch64::B:
152  return 64;
153  case AArch64::TBNZW:
154  case AArch64::TBZW:
155  case AArch64::TBNZX:
156  case AArch64::TBZX:
157  return TBZDisplacementBits;
158  case AArch64::CBNZW:
159  case AArch64::CBZW:
160  case AArch64::CBNZX:
161  case AArch64::CBZX:
162  return CBZDisplacementBits;
163  case AArch64::Bcc:
164  return BCCDisplacementBits;
165  }
166 }
167 
169  int64_t BrOffset) const {
170  unsigned Bits = getBranchDisplacementBits(BranchOp);
171  assert(Bits >= 3 && "max branch displacement must be enough to jump"
172  "over conditional branch expansion");
173  return isIntN(Bits, BrOffset / 4);
174 }
175 
178  switch (MI.getOpcode()) {
179  default:
180  llvm_unreachable("unexpected opcode!");
181  case AArch64::B:
182  return MI.getOperand(0).getMBB();
183  case AArch64::TBZW:
184  case AArch64::TBNZW:
185  case AArch64::TBZX:
186  case AArch64::TBNZX:
187  return MI.getOperand(2).getMBB();
188  case AArch64::CBZW:
189  case AArch64::CBNZW:
190  case AArch64::CBZX:
191  case AArch64::CBNZX:
192  case AArch64::Bcc:
193  return MI.getOperand(1).getMBB();
194  }
195 }
196 
197 // Branch analysis.
199  MachineBasicBlock *&TBB,
200  MachineBasicBlock *&FBB,
202  bool AllowModify) const {
203  // If the block has no terminators, it just falls into the block after it.
205  if (I == MBB.end())
206  return false;
207 
208  if (!isUnpredicatedTerminator(*I))
209  return false;
210 
211  // Get the last instruction in the block.
212  MachineInstr *LastInst = &*I;
213 
214  // If there is only one terminator instruction, process it.
215  unsigned LastOpc = LastInst->getOpcode();
216  if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
217  if (isUncondBranchOpcode(LastOpc)) {
218  TBB = LastInst->getOperand(0).getMBB();
219  return false;
220  }
221  if (isCondBranchOpcode(LastOpc)) {
222  // Block ends with fall-through condbranch.
223  parseCondBranch(LastInst, TBB, Cond);
224  return false;
225  }
226  return true; // Can't handle indirect branch.
227  }
228 
229  // Get the instruction before it if it is a terminator.
230  MachineInstr *SecondLastInst = &*I;
231  unsigned SecondLastOpc = SecondLastInst->getOpcode();
232 
233  // If AllowModify is true and the block ends with two or more unconditional
234  // branches, delete all but the first unconditional branch.
235  if (AllowModify && isUncondBranchOpcode(LastOpc)) {
236  while (isUncondBranchOpcode(SecondLastOpc)) {
237  LastInst->eraseFromParent();
238  LastInst = SecondLastInst;
239  LastOpc = LastInst->getOpcode();
240  if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
241  // Return now the only terminator is an unconditional branch.
242  TBB = LastInst->getOperand(0).getMBB();
243  return false;
244  } else {
245  SecondLastInst = &*I;
246  SecondLastOpc = SecondLastInst->getOpcode();
247  }
248  }
249  }
250 
251  // If there are three terminators, we don't know what sort of block this is.
252  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
253  return true;
254 
255  // If the block ends with a B and a Bcc, handle it.
256  if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
257  parseCondBranch(SecondLastInst, TBB, Cond);
258  FBB = LastInst->getOperand(0).getMBB();
259  return false;
260  }
261 
262  // If the block ends with two unconditional branches, handle it. The second
263  // one is not executed, so remove it.
264  if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
265  TBB = SecondLastInst->getOperand(0).getMBB();
266  I = LastInst;
267  if (AllowModify)
268  I->eraseFromParent();
269  return false;
270  }
271 
272  // ...likewise if it ends with an indirect branch followed by an unconditional
273  // branch.
274  if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
275  I = LastInst;
276  if (AllowModify)
277  I->eraseFromParent();
278  return true;
279  }
280 
281  // Otherwise, can't handle this.
282  return true;
283 }
284 
286  SmallVectorImpl<MachineOperand> &Cond) const {
287  if (Cond[0].getImm() != -1) {
288  // Regular Bcc
289  AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
290  Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
291  } else {
292  // Folded compare-and-branch
293  switch (Cond[1].getImm()) {
294  default:
295  llvm_unreachable("Unknown conditional branch!");
296  case AArch64::CBZW:
297  Cond[1].setImm(AArch64::CBNZW);
298  break;
299  case AArch64::CBNZW:
300  Cond[1].setImm(AArch64::CBZW);
301  break;
302  case AArch64::CBZX:
303  Cond[1].setImm(AArch64::CBNZX);
304  break;
305  case AArch64::CBNZX:
306  Cond[1].setImm(AArch64::CBZX);
307  break;
308  case AArch64::TBZW:
309  Cond[1].setImm(AArch64::TBNZW);
310  break;
311  case AArch64::TBNZW:
312  Cond[1].setImm(AArch64::TBZW);
313  break;
314  case AArch64::TBZX:
315  Cond[1].setImm(AArch64::TBNZX);
316  break;
317  case AArch64::TBNZX:
318  Cond[1].setImm(AArch64::TBZX);
319  break;
320  }
321  }
322 
323  return false;
324 }
325 
327  int *BytesRemoved) const {
329  if (I == MBB.end())
330  return 0;
331 
332  if (!isUncondBranchOpcode(I->getOpcode()) &&
333  !isCondBranchOpcode(I->getOpcode()))
334  return 0;
335 
336  // Remove the branch.
337  I->eraseFromParent();
338 
339  I = MBB.end();
340 
341  if (I == MBB.begin()) {
342  if (BytesRemoved)
343  *BytesRemoved = 4;
344  return 1;
345  }
346  --I;
347  if (!isCondBranchOpcode(I->getOpcode())) {
348  if (BytesRemoved)
349  *BytesRemoved = 4;
350  return 1;
351  }
352 
353  // Remove the branch.
354  I->eraseFromParent();
355  if (BytesRemoved)
356  *BytesRemoved = 8;
357 
358  return 2;
359 }
360 
361 void AArch64InstrInfo::instantiateCondBranch(
362  MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
363  ArrayRef<MachineOperand> Cond) const {
364  if (Cond[0].getImm() != -1) {
365  // Regular Bcc
366  BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
367  } else {
368  // Folded compare-and-branch
369  // Note that we use addOperand instead of addReg to keep the flags.
370  const MachineInstrBuilder MIB =
371  BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
372  if (Cond.size() > 3)
373  MIB.addImm(Cond[3].getImm());
374  MIB.addMBB(TBB);
375  }
376 }
377 
380  ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
381  // Shouldn't be a fall through.
382  assert(TBB && "insertBranch must not be told to insert a fallthrough");
383 
384  if (!FBB) {
385  if (Cond.empty()) // Unconditional branch?
386  BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
387  else
388  instantiateCondBranch(MBB, DL, TBB, Cond);
389 
390  if (BytesAdded)
391  *BytesAdded = 4;
392 
393  return 1;
394  }
395 
396  // Two-way conditional branch.
397  instantiateCondBranch(MBB, DL, TBB, Cond);
398  BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
399 
400  if (BytesAdded)
401  *BytesAdded = 8;
402 
403  return 2;
404 }
405 
406 // Find the original register that VReg is copied from.
407 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
409  const MachineInstr *DefMI = MRI.getVRegDef(VReg);
410  if (!DefMI->isFullCopy())
411  return VReg;
412  VReg = DefMI->getOperand(1).getReg();
413  }
414  return VReg;
415 }
416 
417 // Determine if VReg is defined by an instruction that can be folded into a
418 // csel instruction. If so, return the folded opcode, and the replacement
419 // register.
420 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
421  unsigned *NewVReg = nullptr) {
422  VReg = removeCopies(MRI, VReg);
424  return 0;
425 
426  bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
427  const MachineInstr *DefMI = MRI.getVRegDef(VReg);
428  unsigned Opc = 0;
429  unsigned SrcOpNum = 0;
430  switch (DefMI->getOpcode()) {
431  case AArch64::ADDSXri:
432  case AArch64::ADDSWri:
433  // if NZCV is used, do not fold.
434  if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
435  return 0;
436  // fall-through to ADDXri and ADDWri.
438  case AArch64::ADDXri:
439  case AArch64::ADDWri:
440  // add x, 1 -> csinc.
441  if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
442  DefMI->getOperand(3).getImm() != 0)
443  return 0;
444  SrcOpNum = 1;
445  Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
446  break;
447 
448  case AArch64::ORNXrr:
449  case AArch64::ORNWrr: {
450  // not x -> csinv, represented as orn dst, xzr, src.
451  unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
452  if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
453  return 0;
454  SrcOpNum = 2;
455  Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
456  break;
457  }
458 
459  case AArch64::SUBSXrr:
460  case AArch64::SUBSWrr:
461  // if NZCV is used, do not fold.
462  if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
463  return 0;
464  // fall-through to SUBXrr and SUBWrr.
466  case AArch64::SUBXrr:
467  case AArch64::SUBWrr: {
468  // neg x -> csneg, represented as sub dst, xzr, src.
469  unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
470  if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
471  return 0;
472  SrcOpNum = 2;
473  Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
474  break;
475  }
476  default:
477  return 0;
478  }
479  assert(Opc && SrcOpNum && "Missing parameters");
480 
481  if (NewVReg)
482  *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
483  return Opc;
484 }
485 
488  unsigned TrueReg, unsigned FalseReg,
489  int &CondCycles, int &TrueCycles,
490  int &FalseCycles) const {
491  // Check register classes.
492  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
493  const TargetRegisterClass *RC =
494  RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
495  if (!RC)
496  return false;
497 
498  // Expanding cbz/tbz requires an extra cycle of latency on the condition.
499  unsigned ExtraCondLat = Cond.size() != 1;
500 
501  // GPRs are handled by csel.
502  // FIXME: Fold in x+1, -x, and ~x when applicable.
503  if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
504  AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
505  // Single-cycle csel, csinc, csinv, and csneg.
506  CondCycles = 1 + ExtraCondLat;
507  TrueCycles = FalseCycles = 1;
508  if (canFoldIntoCSel(MRI, TrueReg))
509  TrueCycles = 0;
510  else if (canFoldIntoCSel(MRI, FalseReg))
511  FalseCycles = 0;
512  return true;
513  }
514 
515  // Scalar floating point is handled by fcsel.
516  // FIXME: Form fabs, fmin, and fmax when applicable.
517  if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
518  AArch64::FPR32RegClass.hasSubClassEq(RC)) {
519  CondCycles = 5 + ExtraCondLat;
520  TrueCycles = FalseCycles = 2;
521  return true;
522  }
523 
524  // Can't do vectors.
525  return false;
526 }
527 
530  const DebugLoc &DL, unsigned DstReg,
532  unsigned TrueReg, unsigned FalseReg) const {
534 
535  // Parse the condition code, see parseCondBranch() above.
537  switch (Cond.size()) {
538  default:
539  llvm_unreachable("Unknown condition opcode in Cond");
540  case 1: // b.cc
541  CC = AArch64CC::CondCode(Cond[0].getImm());
542  break;
543  case 3: { // cbz/cbnz
544  // We must insert a compare against 0.
545  bool Is64Bit;
546  switch (Cond[1].getImm()) {
547  default:
548  llvm_unreachable("Unknown branch opcode in Cond");
549  case AArch64::CBZW:
550  Is64Bit = false;
551  CC = AArch64CC::EQ;
552  break;
553  case AArch64::CBZX:
554  Is64Bit = true;
555  CC = AArch64CC::EQ;
556  break;
557  case AArch64::CBNZW:
558  Is64Bit = false;
559  CC = AArch64CC::NE;
560  break;
561  case AArch64::CBNZX:
562  Is64Bit = true;
563  CC = AArch64CC::NE;
564  break;
565  }
566  unsigned SrcReg = Cond[2].getReg();
567  if (Is64Bit) {
568  // cmp reg, #0 is actually subs xzr, reg, #0.
569  MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
570  BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
571  .addReg(SrcReg)
572  .addImm(0)
573  .addImm(0);
574  } else {
575  MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
576  BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
577  .addReg(SrcReg)
578  .addImm(0)
579  .addImm(0);
580  }
581  break;
582  }
583  case 4: { // tbz/tbnz
584  // We must insert a tst instruction.
585  switch (Cond[1].getImm()) {
586  default:
587  llvm_unreachable("Unknown branch opcode in Cond");
588  case AArch64::TBZW:
589  case AArch64::TBZX:
590  CC = AArch64CC::EQ;
591  break;
592  case AArch64::TBNZW:
593  case AArch64::TBNZX:
594  CC = AArch64CC::NE;
595  break;
596  }
597  // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
598  if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
599  BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
600  .addReg(Cond[2].getReg())
601  .addImm(
602  AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
603  else
604  BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
605  .addReg(Cond[2].getReg())
606  .addImm(
607  AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
608  break;
609  }
610  }
611 
612  unsigned Opc = 0;
613  const TargetRegisterClass *RC = nullptr;
614  bool TryFold = false;
615  if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
616  RC = &AArch64::GPR64RegClass;
617  Opc = AArch64::CSELXr;
618  TryFold = true;
619  } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
620  RC = &AArch64::GPR32RegClass;
621  Opc = AArch64::CSELWr;
622  TryFold = true;
623  } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
624  RC = &AArch64::FPR64RegClass;
625  Opc = AArch64::FCSELDrrr;
626  } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
627  RC = &AArch64::FPR32RegClass;
628  Opc = AArch64::FCSELSrrr;
629  }
630  assert(RC && "Unsupported regclass");
631 
632  // Try folding simple instructions into the csel.
633  if (TryFold) {
634  unsigned NewVReg = 0;
635  unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
636  if (FoldedOpc) {
637  // The folded opcodes csinc, csinc and csneg apply the operation to
638  // FalseReg, so we need to invert the condition.
640  TrueReg = FalseReg;
641  } else
642  FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
643 
644  // Fold the operation. Leave any dead instructions for DCE to clean up.
645  if (FoldedOpc) {
646  FalseReg = NewVReg;
647  Opc = FoldedOpc;
648  // The extends the live range of NewVReg.
649  MRI.clearKillFlags(NewVReg);
650  }
651  }
652 
653  // Pull all virtual register into the appropriate class.
654  MRI.constrainRegClass(TrueReg, RC);
655  MRI.constrainRegClass(FalseReg, RC);
656 
657  // Insert the csel.
658  BuildMI(MBB, I, DL, get(Opc), DstReg)
659  .addReg(TrueReg)
660  .addReg(FalseReg)
661  .addImm(CC);
662 }
663 
664 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
665 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
666  uint64_t Imm = MI.getOperand(1).getImm();
667  uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
668  uint64_t Encoding;
669  return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
670 }
671 
672 // FIXME: this implementation should be micro-architecture dependent, so a
673 // micro-architecture target hook should be introduced here in future.
675  if (!Subtarget.hasCustomCheapAsMoveHandling())
676  return MI.isAsCheapAsAMove();
677 
678  const unsigned Opcode = MI.getOpcode();
679 
680  // Firstly, check cases gated by features.
681 
682  if (Subtarget.hasZeroCycleZeroingFP()) {
683  if (Opcode == AArch64::FMOVH0 ||
684  Opcode == AArch64::FMOVS0 ||
685  Opcode == AArch64::FMOVD0)
686  return true;
687  }
688 
689  if (Subtarget.hasZeroCycleZeroingGP()) {
690  if (Opcode == TargetOpcode::COPY &&
691  (MI.getOperand(1).getReg() == AArch64::WZR ||
692  MI.getOperand(1).getReg() == AArch64::XZR))
693  return true;
694  }
695 
696  // Secondly, check cases specific to sub-targets.
697 
698  if (Subtarget.hasExynosCheapAsMoveHandling()) {
700  return true;
701  else
702  return MI.isAsCheapAsAMove();
703  }
704 
705  // Finally, check generic cases.
706 
707  switch (Opcode) {
708  default:
709  return false;
710 
711  // add/sub on register without shift
712  case AArch64::ADDWri:
713  case AArch64::ADDXri:
714  case AArch64::SUBWri:
715  case AArch64::SUBXri:
716  return (MI.getOperand(3).getImm() == 0);
717 
718  // logical ops on immediate
719  case AArch64::ANDWri:
720  case AArch64::ANDXri:
721  case AArch64::EORWri:
722  case AArch64::EORXri:
723  case AArch64::ORRWri:
724  case AArch64::ORRXri:
725  return true;
726 
727  // logical ops on register without shift
728  case AArch64::ANDWrr:
729  case AArch64::ANDXrr:
730  case AArch64::BICWrr:
731  case AArch64::BICXrr:
732  case AArch64::EONWrr:
733  case AArch64::EONXrr:
734  case AArch64::EORWrr:
735  case AArch64::EORXrr:
736  case AArch64::ORNWrr:
737  case AArch64::ORNXrr:
738  case AArch64::ORRWrr:
739  case AArch64::ORRXrr:
740  return true;
741 
742  // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
743  // ORRXri, it is as cheap as MOV
744  case AArch64::MOVi32imm:
745  return canBeExpandedToORR(MI, 32);
746  case AArch64::MOVi64imm:
747  return canBeExpandedToORR(MI, 64);
748  }
749 
750  llvm_unreachable("Unknown opcode to check as cheap as a move!");
751 }
752 
754  unsigned Reg, Imm, Shift;
755 
756  switch (MI.getOpcode()) {
757  default:
758  return false;
759 
760  // MOV Rd, SP
761  case AArch64::ADDWri:
762  case AArch64::ADDXri:
763  if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
764  return false;
765 
766  Reg = MI.getOperand(1).getReg();
767  Imm = MI.getOperand(2).getImm();
768  return ((Reg == AArch64::WSP || Reg == AArch64::SP) && Imm == 0);
769 
770  // Literal
771  case AArch64::ADR:
772  case AArch64::ADRP:
773  return true;
774 
775  // MOVI Vd, #0
776  case AArch64::MOVID:
777  case AArch64::MOVIv8b_ns:
778  case AArch64::MOVIv2d_ns:
779  case AArch64::MOVIv16b_ns:
780  Imm = MI.getOperand(1).getImm();
781  return (Imm == 0);
782 
783  // MOVI Vd, #0
784  case AArch64::MOVIv2i32:
785  case AArch64::MOVIv4i16:
786  case AArch64::MOVIv4i32:
787  case AArch64::MOVIv8i16:
788  Imm = MI.getOperand(1).getImm();
789  Shift = MI.getOperand(2).getImm();
790  return (Imm == 0 && Shift == 0);
791 
792  // MOV Rd, Imm
793  case AArch64::MOVNWi:
794  case AArch64::MOVNXi:
795 
796  // MOV Rd, Imm
797  case AArch64::MOVZWi:
798  case AArch64::MOVZXi:
799  return true;
800 
801  // MOV Rd, Imm
802  case AArch64::ORRWri:
803  case AArch64::ORRXri:
804  if (!MI.getOperand(1).isReg())
805  return false;
806 
807  Reg = MI.getOperand(1).getReg();
808  Imm = MI.getOperand(2).getImm();
809  return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Imm == 0);
810 
811  // MOV Rd, Rm
812  case AArch64::ORRWrs:
813  case AArch64::ORRXrs:
814  if (!MI.getOperand(1).isReg())
815  return false;
816 
817  Reg = MI.getOperand(1).getReg();
818  Imm = MI.getOperand(3).getImm();
819  Shift = AArch64_AM::getShiftValue(Imm);
820  return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Shift == 0);
821  }
822 }
823 
825  unsigned Imm, Shift;
827 
828  switch (MI.getOpcode()) {
829  default:
830  return false;
831 
832  // WriteI
833  case AArch64::ADDSWri:
834  case AArch64::ADDSXri:
835  case AArch64::ADDWri:
836  case AArch64::ADDXri:
837  case AArch64::SUBSWri:
838  case AArch64::SUBSXri:
839  case AArch64::SUBWri:
840  case AArch64::SUBXri:
841  return true;
842 
843  // WriteISReg
844  case AArch64::ADDSWrs:
845  case AArch64::ADDSXrs:
846  case AArch64::ADDWrs:
847  case AArch64::ADDXrs:
848  case AArch64::ANDSWrs:
849  case AArch64::ANDSXrs:
850  case AArch64::ANDWrs:
851  case AArch64::ANDXrs:
852  case AArch64::BICSWrs:
853  case AArch64::BICSXrs:
854  case AArch64::BICWrs:
855  case AArch64::BICXrs:
856  case AArch64::EONWrs:
857  case AArch64::EONXrs:
858  case AArch64::EORWrs:
859  case AArch64::EORXrs:
860  case AArch64::ORNWrs:
861  case AArch64::ORNXrs:
862  case AArch64::ORRWrs:
863  case AArch64::ORRXrs:
864  case AArch64::SUBSWrs:
865  case AArch64::SUBSXrs:
866  case AArch64::SUBWrs:
867  case AArch64::SUBXrs:
868  Imm = MI.getOperand(3).getImm();
869  Shift = AArch64_AM::getShiftValue(Imm);
870  Ext = AArch64_AM::getShiftType(Imm);
871  return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::LSL));
872 
873  // WriteIEReg
874  case AArch64::ADDSWrx:
875  case AArch64::ADDSXrx:
876  case AArch64::ADDSXrx64:
877  case AArch64::ADDWrx:
878  case AArch64::ADDXrx:
879  case AArch64::ADDXrx64:
880  case AArch64::SUBSWrx:
881  case AArch64::SUBSXrx:
882  case AArch64::SUBSXrx64:
883  case AArch64::SUBWrx:
884  case AArch64::SUBXrx:
885  case AArch64::SUBXrx64:
886  Imm = MI.getOperand(3).getImm();
887  Shift = AArch64_AM::getArithShiftValue(Imm);
889  return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::UXTX));
890 
891  case AArch64::PRFMroW:
892  case AArch64::PRFMroX:
893 
894  // WriteLDIdx
895  case AArch64::LDRBBroW:
896  case AArch64::LDRBBroX:
897  case AArch64::LDRHHroW:
898  case AArch64::LDRHHroX:
899  case AArch64::LDRSBWroW:
900  case AArch64::LDRSBWroX:
901  case AArch64::LDRSBXroW:
902  case AArch64::LDRSBXroX:
903  case AArch64::LDRSHWroW:
904  case AArch64::LDRSHWroX:
905  case AArch64::LDRSHXroW:
906  case AArch64::LDRSHXroX:
907  case AArch64::LDRSWroW:
908  case AArch64::LDRSWroX:
909  case AArch64::LDRWroW:
910  case AArch64::LDRWroX:
911  case AArch64::LDRXroW:
912  case AArch64::LDRXroX:
913 
914  case AArch64::LDRBroW:
915  case AArch64::LDRBroX:
916  case AArch64::LDRDroW:
917  case AArch64::LDRDroX:
918  case AArch64::LDRHroW:
919  case AArch64::LDRHroX:
920  case AArch64::LDRSroW:
921  case AArch64::LDRSroX:
922 
923  // WriteSTIdx
924  case AArch64::STRBBroW:
925  case AArch64::STRBBroX:
926  case AArch64::STRHHroW:
927  case AArch64::STRHHroX:
928  case AArch64::STRWroW:
929  case AArch64::STRWroX:
930  case AArch64::STRXroW:
931  case AArch64::STRXroX:
932 
933  case AArch64::STRBroW:
934  case AArch64::STRBroX:
935  case AArch64::STRDroW:
936  case AArch64::STRDroX:
937  case AArch64::STRHroW:
938  case AArch64::STRHroX:
939  case AArch64::STRSroW:
940  case AArch64::STRSroX:
941  Imm = MI.getOperand(3).getImm();
942  Ext = AArch64_AM::getMemExtendType(Imm);
943  return (Ext == AArch64_AM::SXTX || Ext == AArch64_AM::UXTX);
944  }
945 }
946 
948  switch (MI.getOpcode()) {
949  default:
950  return false;
951 
952  case AArch64::ADDWrs:
953  case AArch64::ADDXrs:
954  case AArch64::ADDSWrs:
955  case AArch64::ADDSXrs: {
956  unsigned Imm = MI.getOperand(3).getImm();
957  unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
958  if (ShiftVal == 0)
959  return true;
960  return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
961  }
962 
963  case AArch64::ADDWrx:
964  case AArch64::ADDXrx:
965  case AArch64::ADDXrx64:
966  case AArch64::ADDSWrx:
967  case AArch64::ADDSXrx:
968  case AArch64::ADDSXrx64: {
969  unsigned Imm = MI.getOperand(3).getImm();
970  switch (AArch64_AM::getArithExtendType(Imm)) {
971  default:
972  return false;
973  case AArch64_AM::UXTB:
974  case AArch64_AM::UXTH:
975  case AArch64_AM::UXTW:
976  case AArch64_AM::UXTX:
977  return AArch64_AM::getArithShiftValue(Imm) <= 4;
978  }
979  }
980 
981  case AArch64::SUBWrs:
982  case AArch64::SUBSWrs: {
983  unsigned Imm = MI.getOperand(3).getImm();
984  unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
985  return ShiftVal == 0 ||
986  (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
987  }
988 
989  case AArch64::SUBXrs:
990  case AArch64::SUBSXrs: {
991  unsigned Imm = MI.getOperand(3).getImm();
992  unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
993  return ShiftVal == 0 ||
994  (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
995  }
996 
997  case AArch64::SUBWrx:
998  case AArch64::SUBXrx:
999  case AArch64::SUBXrx64:
1000  case AArch64::SUBSWrx:
1001  case AArch64::SUBSXrx:
1002  case AArch64::SUBSXrx64: {
1003  unsigned Imm = MI.getOperand(3).getImm();
1004  switch (AArch64_AM::getArithExtendType(Imm)) {
1005  default:
1006  return false;
1007  case AArch64_AM::UXTB:
1008  case AArch64_AM::UXTH:
1009  case AArch64_AM::UXTW:
1010  case AArch64_AM::UXTX:
1011  return AArch64_AM::getArithShiftValue(Imm) == 0;
1012  }
1013  }
1014 
1015  case AArch64::LDRBBroW:
1016  case AArch64::LDRBBroX:
1017  case AArch64::LDRBroW:
1018  case AArch64::LDRBroX:
1019  case AArch64::LDRDroW:
1020  case AArch64::LDRDroX:
1021  case AArch64::LDRHHroW:
1022  case AArch64::LDRHHroX:
1023  case AArch64::LDRHroW:
1024  case AArch64::LDRHroX:
1025  case AArch64::LDRQroW:
1026  case AArch64::LDRQroX:
1027  case AArch64::LDRSBWroW:
1028  case AArch64::LDRSBWroX:
1029  case AArch64::LDRSBXroW:
1030  case AArch64::LDRSBXroX:
1031  case AArch64::LDRSHWroW:
1032  case AArch64::LDRSHWroX:
1033  case AArch64::LDRSHXroW:
1034  case AArch64::LDRSHXroX:
1035  case AArch64::LDRSWroW:
1036  case AArch64::LDRSWroX:
1037  case AArch64::LDRSroW:
1038  case AArch64::LDRSroX:
1039  case AArch64::LDRWroW:
1040  case AArch64::LDRWroX:
1041  case AArch64::LDRXroW:
1042  case AArch64::LDRXroX:
1043  case AArch64::PRFMroW:
1044  case AArch64::PRFMroX:
1045  case AArch64::STRBBroW:
1046  case AArch64::STRBBroX:
1047  case AArch64::STRBroW:
1048  case AArch64::STRBroX:
1049  case AArch64::STRDroW:
1050  case AArch64::STRDroX:
1051  case AArch64::STRHHroW:
1052  case AArch64::STRHHroX:
1053  case AArch64::STRHroW:
1054  case AArch64::STRHroX:
1055  case AArch64::STRQroW:
1056  case AArch64::STRQroX:
1057  case AArch64::STRSroW:
1058  case AArch64::STRSroX:
1059  case AArch64::STRWroW:
1060  case AArch64::STRWroX:
1061  case AArch64::STRXroW:
1062  case AArch64::STRXroX: {
1063  unsigned IsSigned = MI.getOperand(3).getImm();
1064  return !IsSigned;
1065  }
1066  }
1067 }
1068 
1070  unsigned &SrcReg, unsigned &DstReg,
1071  unsigned &SubIdx) const {
1072  switch (MI.getOpcode()) {
1073  default:
1074  return false;
1075  case AArch64::SBFMXri: // aka sxtw
1076  case AArch64::UBFMXri: // aka uxtw
1077  // Check for the 32 -> 64 bit extension case, these instructions can do
1078  // much more.
1079  if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1080  return false;
1081  // This is a signed or unsigned 32 -> 64 bit extension.
1082  SrcReg = MI.getOperand(1).getReg();
1083  DstReg = MI.getOperand(0).getReg();
1084  SubIdx = AArch64::sub_32;
1085  return true;
1086  }
1087 }
1088 
1090  MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
1092  unsigned BaseRegA = 0, BaseRegB = 0;
1093  int64_t OffsetA = 0, OffsetB = 0;
1094  unsigned WidthA = 0, WidthB = 0;
1095 
1096  assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1097  assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1098 
1099  if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
1101  return false;
1102 
1103  // Retrieve the base register, offset from the base register and width. Width
1104  // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1105  // base registers are identical, and the offset of a lower memory access +
1106  // the width doesn't overlap the offset of a higher memory access,
1107  // then the memory accesses are different.
1108  if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
1109  getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
1110  if (BaseRegA == BaseRegB) {
1111  int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1112  int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1113  int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1114  if (LowOffset + LowWidth <= HighOffset)
1115  return true;
1116  }
1117  }
1118  return false;
1119 }
1120 
1121 /// analyzeCompare - For a comparison instruction, return the source registers
1122 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1123 /// Return true if the comparison instruction can be analyzed.
1124 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
1125  unsigned &SrcReg2, int &CmpMask,
1126  int &CmpValue) const {
1127  // The first operand can be a frame index where we'd normally expect a
1128  // register.
1129  assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1130  if (!MI.getOperand(1).isReg())
1131  return false;
1132 
1133  switch (MI.getOpcode()) {
1134  default:
1135  break;
1136  case AArch64::SUBSWrr:
1137  case AArch64::SUBSWrs:
1138  case AArch64::SUBSWrx:
1139  case AArch64::SUBSXrr:
1140  case AArch64::SUBSXrs:
1141  case AArch64::SUBSXrx:
1142  case AArch64::ADDSWrr:
1143  case AArch64::ADDSWrs:
1144  case AArch64::ADDSWrx:
1145  case AArch64::ADDSXrr:
1146  case AArch64::ADDSXrs:
1147  case AArch64::ADDSXrx:
1148  // Replace SUBSWrr with SUBWrr if NZCV is not used.
1149  SrcReg = MI.getOperand(1).getReg();
1150  SrcReg2 = MI.getOperand(2).getReg();
1151  CmpMask = ~0;
1152  CmpValue = 0;
1153  return true;
1154  case AArch64::SUBSWri:
1155  case AArch64::ADDSWri:
1156  case AArch64::SUBSXri:
1157  case AArch64::ADDSXri:
1158  SrcReg = MI.getOperand(1).getReg();
1159  SrcReg2 = 0;
1160  CmpMask = ~0;
1161  // FIXME: In order to convert CmpValue to 0 or 1
1162  CmpValue = MI.getOperand(2).getImm() != 0;
1163  return true;
1164  case AArch64::ANDSWri:
1165  case AArch64::ANDSXri:
1166  // ANDS does not use the same encoding scheme as the others xxxS
1167  // instructions.
1168  SrcReg = MI.getOperand(1).getReg();
1169  SrcReg2 = 0;
1170  CmpMask = ~0;
1171  // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
1172  // while the type of CmpValue is int. When converting uint64_t to int,
1173  // the high 32 bits of uint64_t will be lost.
1174  // In fact it causes a bug in spec2006-483.xalancbmk
1175  // CmpValue is only used to compare with zero in OptimizeCompareInstr
1177  MI.getOperand(2).getImm(),
1178  MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
1179  return true;
1180  }
1181 
1182  return false;
1183 }
1184 
1186  MachineBasicBlock *MBB = Instr.getParent();
1187  assert(MBB && "Can't get MachineBasicBlock here");
1188  MachineFunction *MF = MBB->getParent();
1189  assert(MF && "Can't get MachineFunction here");
1190  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1193 
1194  for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1195  ++OpIdx) {
1196  MachineOperand &MO = Instr.getOperand(OpIdx);
1197  const TargetRegisterClass *OpRegCstraints =
1198  Instr.getRegClassConstraint(OpIdx, TII, TRI);
1199 
1200  // If there's no constraint, there's nothing to do.
1201  if (!OpRegCstraints)
1202  continue;
1203  // If the operand is a frame index, there's nothing to do here.
1204  // A frame index operand will resolve correctly during PEI.
1205  if (MO.isFI())
1206  continue;
1207 
1208  assert(MO.isReg() &&
1209  "Operand has register constraints without being a register!");
1210 
1211  unsigned Reg = MO.getReg();
1213  if (!OpRegCstraints->contains(Reg))
1214  return false;
1215  } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1216  !MRI->constrainRegClass(Reg, OpRegCstraints))
1217  return false;
1218  }
1219 
1220  return true;
1221 }
1222 
1223 /// Return the opcode that does not set flags when possible - otherwise
1224 /// return the original opcode. The caller is responsible to do the actual
1225 /// substitution and legality checking.
1226 static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
1227  // Don't convert all compare instructions, because for some the zero register
1228  // encoding becomes the sp register.
1229  bool MIDefinesZeroReg = false;
1230  if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
1231  MIDefinesZeroReg = true;
1232 
1233  switch (MI.getOpcode()) {
1234  default:
1235  return MI.getOpcode();
1236  case AArch64::ADDSWrr:
1237  return AArch64::ADDWrr;
1238  case AArch64::ADDSWri:
1239  return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1240  case AArch64::ADDSWrs:
1241  return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1242  case AArch64::ADDSWrx:
1243  return AArch64::ADDWrx;
1244  case AArch64::ADDSXrr:
1245  return AArch64::ADDXrr;
1246  case AArch64::ADDSXri:
1247  return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1248  case AArch64::ADDSXrs:
1249  return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1250  case AArch64::ADDSXrx:
1251  return AArch64::ADDXrx;
1252  case AArch64::SUBSWrr:
1253  return AArch64::SUBWrr;
1254  case AArch64::SUBSWri:
1255  return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1256  case AArch64::SUBSWrs:
1257  return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1258  case AArch64::SUBSWrx:
1259  return AArch64::SUBWrx;
1260  case AArch64::SUBSXrr:
1261  return AArch64::SUBXrr;
1262  case AArch64::SUBSXri:
1263  return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1264  case AArch64::SUBSXrs:
1265  return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1266  case AArch64::SUBSXrx:
1267  return AArch64::SUBXrx;
1268  }
1269 }
1270 
1271 enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1272 
1273 /// True when condition flags are accessed (either by writing or reading)
1274 /// on the instruction trace starting at From and ending at To.
1275 ///
1276 /// Note: If From and To are from different blocks it's assumed CC are accessed
1277 /// on the path.
1280  const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1281  // Early exit if To is at the beginning of the BB.
1282  if (To == To->getParent()->begin())
1283  return true;
1284 
1285  // Check whether the instructions are in the same basic block
1286  // If not, assume the condition flags might get modified somewhere.
1287  if (To->getParent() != From->getParent())
1288  return true;
1289 
1290  // From must be above To.
1291  assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
1292  [From](MachineInstr &MI) {
1293  return MI.getIterator() == From;
1294  }) != To->getParent()->rend());
1295 
1296  // We iterate backward starting \p To until we hit \p From.
1297  for (--To; To != From; --To) {
1298  const MachineInstr &Instr = *To;
1299 
1300  if (((AccessToCheck & AK_Write) &&
1301  Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1302  ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1303  return true;
1304  }
1305  return false;
1306 }
1307 
1308 /// Try to optimize a compare instruction. A compare instruction is an
1309 /// instruction which produces AArch64::NZCV. It can be truly compare
1310 /// instruction
1311 /// when there are no uses of its destination register.
1312 ///
1313 /// The following steps are tried in order:
1314 /// 1. Convert CmpInstr into an unconditional version.
1315 /// 2. Remove CmpInstr if above there is an instruction producing a needed
1316 /// condition code or an instruction which can be converted into such an
1317 /// instruction.
1318 /// Only comparison with zero is supported.
1320  MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
1321  int CmpValue, const MachineRegisterInfo *MRI) const {
1322  assert(CmpInstr.getParent());
1323  assert(MRI);
1324 
1325  // Replace SUBSWrr with SUBWrr if NZCV is not used.
1326  int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
1327  if (DeadNZCVIdx != -1) {
1328  if (CmpInstr.definesRegister(AArch64::WZR) ||
1329  CmpInstr.definesRegister(AArch64::XZR)) {
1330  CmpInstr.eraseFromParent();
1331  return true;
1332  }
1333  unsigned Opc = CmpInstr.getOpcode();
1334  unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1335  if (NewOpc == Opc)
1336  return false;
1337  const MCInstrDesc &MCID = get(NewOpc);
1338  CmpInstr.setDesc(MCID);
1339  CmpInstr.RemoveOperand(DeadNZCVIdx);
1340  bool succeeded = UpdateOperandRegClass(CmpInstr);
1341  (void)succeeded;
1342  assert(succeeded && "Some operands reg class are incompatible!");
1343  return true;
1344  }
1345 
1346  // Continue only if we have a "ri" where immediate is zero.
1347  // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1348  // function.
1349  assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
1350  if (CmpValue != 0 || SrcReg2 != 0)
1351  return false;
1352 
1353  // CmpInstr is a Compare instruction if destination register is not used.
1354  if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1355  return false;
1356 
1357  return substituteCmpToZero(CmpInstr, SrcReg, MRI);
1358 }
1359 
1360 /// Get opcode of S version of Instr.
1361 /// If Instr is S version its opcode is returned.
1362 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1363 /// or we are not interested in it.
1364 static unsigned sForm(MachineInstr &Instr) {
1365  switch (Instr.getOpcode()) {
1366  default:
1367  return AArch64::INSTRUCTION_LIST_END;
1368 
1369  case AArch64::ADDSWrr:
1370  case AArch64::ADDSWri:
1371  case AArch64::ADDSXrr:
1372  case AArch64::ADDSXri:
1373  case AArch64::SUBSWrr:
1374  case AArch64::SUBSWri:
1375  case AArch64::SUBSXrr:
1376  case AArch64::SUBSXri:
1377  return Instr.getOpcode();
1378 
1379  case AArch64::ADDWrr:
1380  return AArch64::ADDSWrr;
1381  case AArch64::ADDWri:
1382  return AArch64::ADDSWri;
1383  case AArch64::ADDXrr:
1384  return AArch64::ADDSXrr;
1385  case AArch64::ADDXri:
1386  return AArch64::ADDSXri;
1387  case AArch64::ADCWr:
1388  return AArch64::ADCSWr;
1389  case AArch64::ADCXr:
1390  return AArch64::ADCSXr;
1391  case AArch64::SUBWrr:
1392  return AArch64::SUBSWrr;
1393  case AArch64::SUBWri:
1394  return AArch64::SUBSWri;
1395  case AArch64::SUBXrr:
1396  return AArch64::SUBSXrr;
1397  case AArch64::SUBXri:
1398  return AArch64::SUBSXri;
1399  case AArch64::SBCWr:
1400  return AArch64::SBCSWr;
1401  case AArch64::SBCXr:
1402  return AArch64::SBCSXr;
1403  case AArch64::ANDWri:
1404  return AArch64::ANDSWri;
1405  case AArch64::ANDXri:
1406  return AArch64::ANDSXri;
1407  }
1408 }
1409 
1410 /// Check if AArch64::NZCV should be alive in successors of MBB.
1412  for (auto *BB : MBB->successors())
1413  if (BB->isLiveIn(AArch64::NZCV))
1414  return true;
1415  return false;
1416 }
1417 
1418 namespace {
1419 
1420 struct UsedNZCV {
1421  bool N = false;
1422  bool Z = false;
1423  bool C = false;
1424  bool V = false;
1425 
1426  UsedNZCV() = default;
1427 
1428  UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
1429  this->N |= UsedFlags.N;
1430  this->Z |= UsedFlags.Z;
1431  this->C |= UsedFlags.C;
1432  this->V |= UsedFlags.V;
1433  return *this;
1434  }
1435 };
1436 
1437 } // end anonymous namespace
1438 
1439 /// Find a condition code used by the instruction.
1440 /// Returns AArch64CC::Invalid if either the instruction does not use condition
1441 /// codes or we don't optimize CmpInstr in the presence of such instructions.
1443  switch (Instr.getOpcode()) {
1444  default:
1445  return AArch64CC::Invalid;
1446 
1447  case AArch64::Bcc: {
1448  int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1449  assert(Idx >= 2);
1450  return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1451  }
1452 
1453  case AArch64::CSINVWr:
1454  case AArch64::CSINVXr:
1455  case AArch64::CSINCWr:
1456  case AArch64::CSINCXr:
1457  case AArch64::CSELWr:
1458  case AArch64::CSELXr:
1459  case AArch64::CSNEGWr:
1460  case AArch64::CSNEGXr:
1461  case AArch64::FCSELSrrr:
1462  case AArch64::FCSELDrrr: {
1463  int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1464  assert(Idx >= 1);
1465  return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1466  }
1467  }
1468 }
1469 
1470 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1471  assert(CC != AArch64CC::Invalid);
1472  UsedNZCV UsedFlags;
1473  switch (CC) {
1474  default:
1475  break;
1476 
1477  case AArch64CC::EQ: // Z set
1478  case AArch64CC::NE: // Z clear
1479  UsedFlags.Z = true;
1480  break;
1481 
1482  case AArch64CC::HI: // Z clear and C set
1483  case AArch64CC::LS: // Z set or C clear
1484  UsedFlags.Z = true;
1486  case AArch64CC::HS: // C set
1487  case AArch64CC::LO: // C clear
1488  UsedFlags.C = true;
1489  break;
1490 
1491  case AArch64CC::MI: // N set
1492  case AArch64CC::PL: // N clear
1493  UsedFlags.N = true;
1494  break;
1495 
1496  case AArch64CC::VS: // V set
1497  case AArch64CC::VC: // V clear
1498  UsedFlags.V = true;
1499  break;
1500 
1501  case AArch64CC::GT: // Z clear, N and V the same
1502  case AArch64CC::LE: // Z set, N and V differ
1503  UsedFlags.Z = true;
1505  case AArch64CC::GE: // N and V the same
1506  case AArch64CC::LT: // N and V differ
1507  UsedFlags.N = true;
1508  UsedFlags.V = true;
1509  break;
1510  }
1511  return UsedFlags;
1512 }
1513 
1514 static bool isADDSRegImm(unsigned Opcode) {
1515  return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1516 }
1517 
1518 static bool isSUBSRegImm(unsigned Opcode) {
1519  return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1520 }
1521 
1522 /// Check if CmpInstr can be substituted by MI.
1523 ///
1524 /// CmpInstr can be substituted:
1525 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1526 /// - and, MI and CmpInstr are from the same MachineBB
1527 /// - and, condition flags are not alive in successors of the CmpInstr parent
1528 /// - and, if MI opcode is the S form there must be no defs of flags between
1529 /// MI and CmpInstr
1530 /// or if MI opcode is not the S form there must be neither defs of flags
1531 /// nor uses of flags between MI and CmpInstr.
1532 /// - and C/V flags are not used after CmpInstr
1534  const TargetRegisterInfo *TRI) {
1535  assert(MI);
1536  assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1537  assert(CmpInstr);
1538 
1539  const unsigned CmpOpcode = CmpInstr->getOpcode();
1540  if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1541  return false;
1542 
1543  if (MI->getParent() != CmpInstr->getParent())
1544  return false;
1545 
1546  if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1547  return false;
1548 
1549  AccessKind AccessToCheck = AK_Write;
1550  if (sForm(*MI) != MI->getOpcode())
1551  AccessToCheck = AK_All;
1552  if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1553  return false;
1554 
1555  UsedNZCV NZCVUsedAfterCmp;
1556  for (auto I = std::next(CmpInstr->getIterator()),
1557  E = CmpInstr->getParent()->instr_end();
1558  I != E; ++I) {
1559  const MachineInstr &Instr = *I;
1560  if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1562  if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1563  return false;
1564  NZCVUsedAfterCmp |= getUsedNZCV(CC);
1565  }
1566 
1567  if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1568  break;
1569  }
1570 
1571  return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1572 }
1573 
1574 /// Substitute an instruction comparing to zero with another instruction
1575 /// which produces needed condition flags.
1576 ///
1577 /// Return true on success.
1578 bool AArch64InstrInfo::substituteCmpToZero(
1579  MachineInstr &CmpInstr, unsigned SrcReg,
1580  const MachineRegisterInfo *MRI) const {
1581  assert(MRI);
1582  // Get the unique definition of SrcReg.
1583  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1584  if (!MI)
1585  return false;
1586 
1588 
1589  unsigned NewOpc = sForm(*MI);
1590  if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1591  return false;
1592 
1593  if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
1594  return false;
1595 
1596  // Update the instruction to set NZCV.
1597  MI->setDesc(get(NewOpc));
1598  CmpInstr.eraseFromParent();
1599  bool succeeded = UpdateOperandRegClass(*MI);
1600  (void)succeeded;
1601  assert(succeeded && "Some operands reg class are incompatible!");
1602  MI->addRegisterDefined(AArch64::NZCV, TRI);
1603  return true;
1604 }
1605 
1607  if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
1608  return false;
1609 
1610  MachineBasicBlock &MBB = *MI.getParent();
1611  DebugLoc DL = MI.getDebugLoc();
1612  unsigned Reg = MI.getOperand(0).getReg();
1613  const GlobalValue *GV =
1614  cast<GlobalValue>((*MI.memoperands_begin())->getValue());
1615  const TargetMachine &TM = MBB.getParent()->getTarget();
1616  unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1617  const unsigned char MO_NC = AArch64II::MO_NC;
1618 
1619  if ((OpFlags & AArch64II::MO_GOT) != 0) {
1620  BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1621  .addGlobalAddress(GV, 0, OpFlags);
1622  BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1623  .addReg(Reg, RegState::Kill)
1624  .addImm(0)
1626  } else if (TM.getCodeModel() == CodeModel::Large) {
1627  BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1628  .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
1629  .addImm(0);
1630  BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1631  .addReg(Reg, RegState::Kill)
1632  .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
1633  .addImm(16);
1634  BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1635  .addReg(Reg, RegState::Kill)
1636  .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
1637  .addImm(32);
1638  BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1639  .addReg(Reg, RegState::Kill)
1641  .addImm(48);
1642  BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1643  .addReg(Reg, RegState::Kill)
1644  .addImm(0)
1646  } else if (TM.getCodeModel() == CodeModel::Tiny) {
1647  BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
1648  .addGlobalAddress(GV, 0, OpFlags);
1649  } else {
1650  BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1651  .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1652  unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1653  BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1654  .addReg(Reg, RegState::Kill)
1655  .addGlobalAddress(GV, 0, LoFlags)
1657  }
1658 
1659  MBB.erase(MI);
1660 
1661  return true;
1662 }
1663 
1664 /// Return true if this is this instruction has a non-zero immediate
1666  switch (MI.getOpcode()) {
1667  default:
1668  break;
1669  case AArch64::ADDSWrs:
1670  case AArch64::ADDSXrs:
1671  case AArch64::ADDWrs:
1672  case AArch64::ADDXrs:
1673  case AArch64::ANDSWrs:
1674  case AArch64::ANDSXrs:
1675  case AArch64::ANDWrs:
1676  case AArch64::ANDXrs:
1677  case AArch64::BICSWrs:
1678  case AArch64::BICSXrs:
1679  case AArch64::BICWrs:
1680  case AArch64::BICXrs:
1681  case AArch64::EONWrs:
1682  case AArch64::EONXrs:
1683  case AArch64::EORWrs:
1684  case AArch64::EORXrs:
1685  case AArch64::ORNWrs:
1686  case AArch64::ORNXrs:
1687  case AArch64::ORRWrs:
1688  case AArch64::ORRXrs:
1689  case AArch64::SUBSWrs:
1690  case AArch64::SUBSXrs:
1691  case AArch64::SUBWrs:
1692  case AArch64::SUBXrs:
1693  if (MI.getOperand(3).isImm()) {
1694  unsigned val = MI.getOperand(3).getImm();
1695  return (val != 0);
1696  }
1697  break;
1698  }
1699  return false;
1700 }
1701 
1702 /// Return true if this is this instruction has a non-zero immediate
1704  switch (MI.getOpcode()) {
1705  default:
1706  break;
1707  case AArch64::ADDSWrx:
1708  case AArch64::ADDSXrx:
1709  case AArch64::ADDSXrx64:
1710  case AArch64::ADDWrx:
1711  case AArch64::ADDXrx:
1712  case AArch64::ADDXrx64:
1713  case AArch64::SUBSWrx:
1714  case AArch64::SUBSXrx:
1715  case AArch64::SUBSXrx64:
1716  case AArch64::SUBWrx:
1717  case AArch64::SUBXrx:
1718  case AArch64::SUBXrx64:
1719  if (MI.getOperand(3).isImm()) {
1720  unsigned val = MI.getOperand(3).getImm();
1721  return (val != 0);
1722  }
1723  break;
1724  }
1725 
1726  return false;
1727 }
1728 
1729 // Return true if this instruction simply sets its single destination register
1730 // to zero. This is equivalent to a register rename of the zero-register.
1732  switch (MI.getOpcode()) {
1733  default:
1734  break;
1735  case AArch64::MOVZWi:
1736  case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
1737  if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1738  assert(MI.getDesc().getNumOperands() == 3 &&
1739  MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1740  return true;
1741  }
1742  break;
1743  case AArch64::ANDWri: // and Rd, Rzr, #imm
1744  return MI.getOperand(1).getReg() == AArch64::WZR;
1745  case AArch64::ANDXri:
1746  return MI.getOperand(1).getReg() == AArch64::XZR;
1747  case TargetOpcode::COPY:
1748  return MI.getOperand(1).getReg() == AArch64::WZR;
1749  }
1750  return false;
1751 }
1752 
1753 // Return true if this instruction simply renames a general register without
1754 // modifying bits.
1756  switch (MI.getOpcode()) {
1757  default:
1758  break;
1759  case TargetOpcode::COPY: {
1760  // GPR32 copies will by lowered to ORRXrs
1761  unsigned DstReg = MI.getOperand(0).getReg();
1762  return (AArch64::GPR32RegClass.contains(DstReg) ||
1763  AArch64::GPR64RegClass.contains(DstReg));
1764  }
1765  case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
1766  if (MI.getOperand(1).getReg() == AArch64::XZR) {
1767  assert(MI.getDesc().getNumOperands() == 4 &&
1768  MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1769  return true;
1770  }
1771  break;
1772  case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
1773  if (MI.getOperand(2).getImm() == 0) {
1774  assert(MI.getDesc().getNumOperands() == 4 &&
1775  MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1776  return true;
1777  }
1778  break;
1779  }
1780  return false;
1781 }
1782 
1783 // Return true if this instruction simply renames a general register without
1784 // modifying bits.
1786  switch (MI.getOpcode()) {
1787  default:
1788  break;
1789  case TargetOpcode::COPY: {
1790  // FPR64 copies will by lowered to ORR.16b
1791  unsigned DstReg = MI.getOperand(0).getReg();
1792  return (AArch64::FPR64RegClass.contains(DstReg) ||
1793  AArch64::FPR128RegClass.contains(DstReg));
1794  }
1795  case AArch64::ORRv16i8:
1796  if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1797  assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
1798  "invalid ORRv16i8 operands");
1799  return true;
1800  }
1801  break;
1802  }
1803  return false;
1804 }
1805 
1807  int &FrameIndex) const {
1808  switch (MI.getOpcode()) {
1809  default:
1810  break;
1811  case AArch64::LDRWui:
1812  case AArch64::LDRXui:
1813  case AArch64::LDRBui:
1814  case AArch64::LDRHui:
1815  case AArch64::LDRSui:
1816  case AArch64::LDRDui:
1817  case AArch64::LDRQui:
1818  if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1819  MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1820  FrameIndex = MI.getOperand(1).getIndex();
1821  return MI.getOperand(0).getReg();
1822  }
1823  break;
1824  }
1825 
1826  return 0;
1827 }
1828 
1830  int &FrameIndex) const {
1831  switch (MI.getOpcode()) {
1832  default:
1833  break;
1834  case AArch64::STRWui:
1835  case AArch64::STRXui:
1836  case AArch64::STRBui:
1837  case AArch64::STRHui:
1838  case AArch64::STRSui:
1839  case AArch64::STRDui:
1840  case AArch64::STRQui:
1841  if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1842  MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1843  FrameIndex = MI.getOperand(1).getIndex();
1844  return MI.getOperand(0).getReg();
1845  }
1846  break;
1847  }
1848  return 0;
1849 }
1850 
1851 /// Return true if this is load/store scales or extends its register offset.
1852 /// This refers to scaling a dynamic index as opposed to scaled immediates.
1853 /// MI should be a memory op that allows scaled addressing.
1855  switch (MI.getOpcode()) {
1856  default:
1857  break;
1858  case AArch64::LDRBBroW:
1859  case AArch64::LDRBroW:
1860  case AArch64::LDRDroW:
1861  case AArch64::LDRHHroW:
1862  case AArch64::LDRHroW:
1863  case AArch64::LDRQroW:
1864  case AArch64::LDRSBWroW:
1865  case AArch64::LDRSBXroW:
1866  case AArch64::LDRSHWroW:
1867  case AArch64::LDRSHXroW:
1868  case AArch64::LDRSWroW:
1869  case AArch64::LDRSroW:
1870  case AArch64::LDRWroW:
1871  case AArch64::LDRXroW:
1872  case AArch64::STRBBroW:
1873  case AArch64::STRBroW:
1874  case AArch64::STRDroW:
1875  case AArch64::STRHHroW:
1876  case AArch64::STRHroW:
1877  case AArch64::STRQroW:
1878  case AArch64::STRSroW:
1879  case AArch64::STRWroW:
1880  case AArch64::STRXroW:
1881  case AArch64::LDRBBroX:
1882  case AArch64::LDRBroX:
1883  case AArch64::LDRDroX:
1884  case AArch64::LDRHHroX:
1885  case AArch64::LDRHroX:
1886  case AArch64::LDRQroX:
1887  case AArch64::LDRSBWroX:
1888  case AArch64::LDRSBXroX:
1889  case AArch64::LDRSHWroX:
1890  case AArch64::LDRSHXroX:
1891  case AArch64::LDRSWroX:
1892  case AArch64::LDRSroX:
1893  case AArch64::LDRWroX:
1894  case AArch64::LDRXroX:
1895  case AArch64::STRBBroX:
1896  case AArch64::STRBroX:
1897  case AArch64::STRDroX:
1898  case AArch64::STRHHroX:
1899  case AArch64::STRHroX:
1900  case AArch64::STRQroX:
1901  case AArch64::STRSroX:
1902  case AArch64::STRWroX:
1903  case AArch64::STRXroX:
1904 
1905  unsigned Val = MI.getOperand(3).getImm();
1907  return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
1908  }
1909  return false;
1910 }
1911 
1912 /// Check all MachineMemOperands for a hint to suppress pairing.
1914  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1915  return MMO->getFlags() & MOSuppressPair;
1916  });
1917 }
1918 
1919 /// Set a flag on the first MachineMemOperand to suppress pairing.
1921  if (MI.memoperands_empty())
1922  return;
1923  (*MI.memoperands_begin())->setFlags(MOSuppressPair);
1924 }
1925 
1926 /// Check all MachineMemOperands for a hint that the load/store is strided.
1928  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1929  return MMO->getFlags() & MOStridedAccess;
1930  });
1931 }
1932 
1934  switch (Opc) {
1935  default:
1936  return false;
1937  case AArch64::STURSi:
1938  case AArch64::STURDi:
1939  case AArch64::STURQi:
1940  case AArch64::STURBBi:
1941  case AArch64::STURHHi:
1942  case AArch64::STURWi:
1943  case AArch64::STURXi:
1944  case AArch64::LDURSi:
1945  case AArch64::LDURDi:
1946  case AArch64::LDURQi:
1947  case AArch64::LDURWi:
1948  case AArch64::LDURXi:
1949  case AArch64::LDURSWi:
1950  case AArch64::LDURHHi:
1951  case AArch64::LDURBBi:
1952  case AArch64::LDURSBWi:
1953  case AArch64::LDURSHWi:
1954  return true;
1955  }
1956 }
1957 
1959  switch (MI.getOpcode()) {
1960  default:
1961  return false;
1962  // Scaled instructions.
1963  case AArch64::STRSui:
1964  case AArch64::STRDui:
1965  case AArch64::STRQui:
1966  case AArch64::STRXui:
1967  case AArch64::STRWui:
1968  case AArch64::LDRSui:
1969  case AArch64::LDRDui:
1970  case AArch64::LDRQui:
1971  case AArch64::LDRXui:
1972  case AArch64::LDRWui:
1973  case AArch64::LDRSWui:
1974  // Unscaled instructions.
1975  case AArch64::STURSi:
1976  case AArch64::STURDi:
1977  case AArch64::STURQi:
1978  case AArch64::STURWi:
1979  case AArch64::STURXi:
1980  case AArch64::LDURSi:
1981  case AArch64::LDURDi:
1982  case AArch64::LDURQi:
1983  case AArch64::LDURWi:
1984  case AArch64::LDURXi:
1985  case AArch64::LDURSWi:
1986  return true;
1987  }
1988 }
1989 
1991  bool &Is64Bit) {
1992  switch (Opc) {
1993  default:
1994  llvm_unreachable("Opcode has no flag setting equivalent!");
1995  // 32-bit cases:
1996  case AArch64::ADDWri:
1997  Is64Bit = false;
1998  return AArch64::ADDSWri;
1999  case AArch64::ADDWrr:
2000  Is64Bit = false;
2001  return AArch64::ADDSWrr;
2002  case AArch64::ADDWrs:
2003  Is64Bit = false;
2004  return AArch64::ADDSWrs;
2005  case AArch64::ADDWrx:
2006  Is64Bit = false;
2007  return AArch64::ADDSWrx;
2008  case AArch64::ANDWri:
2009  Is64Bit = false;
2010  return AArch64::ANDSWri;
2011  case AArch64::ANDWrr:
2012  Is64Bit = false;
2013  return AArch64::ANDSWrr;
2014  case AArch64::ANDWrs:
2015  Is64Bit = false;
2016  return AArch64::ANDSWrs;
2017  case AArch64::BICWrr:
2018  Is64Bit = false;
2019  return AArch64::BICSWrr;
2020  case AArch64::BICWrs:
2021  Is64Bit = false;
2022  return AArch64::BICSWrs;
2023  case AArch64::SUBWri:
2024  Is64Bit = false;
2025  return AArch64::SUBSWri;
2026  case AArch64::SUBWrr:
2027  Is64Bit = false;
2028  return AArch64::SUBSWrr;
2029  case AArch64::SUBWrs:
2030  Is64Bit = false;
2031  return AArch64::SUBSWrs;
2032  case AArch64::SUBWrx:
2033  Is64Bit = false;
2034  return AArch64::SUBSWrx;
2035  // 64-bit cases:
2036  case AArch64::ADDXri:
2037  Is64Bit = true;
2038  return AArch64::ADDSXri;
2039  case AArch64::ADDXrr:
2040  Is64Bit = true;
2041  return AArch64::ADDSXrr;
2042  case AArch64::ADDXrs:
2043  Is64Bit = true;
2044  return AArch64::ADDSXrs;
2045  case AArch64::ADDXrx:
2046  Is64Bit = true;
2047  return AArch64::ADDSXrx;
2048  case AArch64::ANDXri:
2049  Is64Bit = true;
2050  return AArch64::ANDSXri;
2051  case AArch64::ANDXrr:
2052  Is64Bit = true;
2053  return AArch64::ANDSXrr;
2054  case AArch64::ANDXrs:
2055  Is64Bit = true;
2056  return AArch64::ANDSXrs;
2057  case AArch64::BICXrr:
2058  Is64Bit = true;
2059  return AArch64::BICSXrr;
2060  case AArch64::BICXrs:
2061  Is64Bit = true;
2062  return AArch64::BICSXrs;
2063  case AArch64::SUBXri:
2064  Is64Bit = true;
2065  return AArch64::SUBSXri;
2066  case AArch64::SUBXrr:
2067  Is64Bit = true;
2068  return AArch64::SUBSXrr;
2069  case AArch64::SUBXrs:
2070  Is64Bit = true;
2071  return AArch64::SUBSXrs;
2072  case AArch64::SUBXrx:
2073  Is64Bit = true;
2074  return AArch64::SUBSXrx;
2075  }
2076 }
2077 
2078 // Is this a candidate for ld/st merging or pairing? For example, we don't
2079 // touch volatiles or load/stores that have a hint to avoid pair formation.
2081  // If this is a volatile load/store, don't mess with it.
2082  if (MI.hasOrderedMemoryRef())
2083  return false;
2084 
2085  // Make sure this is a reg+imm (as opposed to an address reloc).
2086  assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
2087  if (!MI.getOperand(2).isImm())
2088  return false;
2089 
2090  // Can't merge/pair if the instruction modifies the base register.
2091  // e.g., ldr x0, [x0]
2092  unsigned BaseReg = MI.getOperand(1).getReg();
2094  if (MI.modifiesRegister(BaseReg, TRI))
2095  return false;
2096 
2097  // Check if this load/store has a hint to avoid pair formation.
2098  // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2099  if (isLdStPairSuppressed(MI))
2100  return false;
2101 
2102  // On some CPUs quad load/store pairs are slower than two single load/stores.
2103  if (Subtarget.isPaired128Slow()) {
2104  switch (MI.getOpcode()) {
2105  default:
2106  break;
2107  case AArch64::LDURQi:
2108  case AArch64::STURQi:
2109  case AArch64::LDRQui:
2110  case AArch64::STRQui:
2111  return false;
2112  }
2113  }
2114 
2115  return true;
2116 }
2117 
2119  MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
2120  const TargetRegisterInfo *TRI) const {
2121  unsigned Width;
2122  return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
2123 }
2124 
2126  MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
2127  const TargetRegisterInfo *TRI) const {
2128  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
2129  // Handle only loads/stores with base register followed by immediate offset.
2130  if (LdSt.getNumExplicitOperands() == 3) {
2131  // Non-paired instruction (e.g., ldr x1, [x0, #8]).
2132  if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
2133  return false;
2134  } else if (LdSt.getNumExplicitOperands() == 4) {
2135  // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
2136  if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
2137  !LdSt.getOperand(3).isImm())
2138  return false;
2139  } else
2140  return false;
2141 
2142  // Get the scaling factor for the instruction and set the width for the
2143  // instruction.
2144  unsigned Scale = 0;
2145  int64_t Dummy1, Dummy2;
2146 
2147  // If this returns false, then it's an instruction we don't want to handle.
2148  if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
2149  return false;
2150 
2151  // Compute the offset. Offset is calculated as the immediate operand
2152  // multiplied by the scaling factor. Unscaled instructions have scaling factor
2153  // set to 1.
2154  if (LdSt.getNumExplicitOperands() == 3) {
2155  BaseReg = LdSt.getOperand(1).getReg();
2156  Offset = LdSt.getOperand(2).getImm() * Scale;
2157  } else {
2158  assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
2159  BaseReg = LdSt.getOperand(2).getReg();
2160  Offset = LdSt.getOperand(3).getImm() * Scale;
2161  }
2162  return true;
2163 }
2164 
2167  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
2168  MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
2169  assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
2170  return OfsOp;
2171 }
2172 
2173 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
2174  unsigned &Width, int64_t &MinOffset,
2175  int64_t &MaxOffset) const {
2176  switch (Opcode) {
2177  // Not a memory operation or something we want to handle.
2178  default:
2179  Scale = Width = 0;
2180  MinOffset = MaxOffset = 0;
2181  return false;
2182  case AArch64::STRWpost:
2183  case AArch64::LDRWpost:
2184  Width = 32;
2185  Scale = 4;
2186  MinOffset = -256;
2187  MaxOffset = 255;
2188  break;
2189  case AArch64::LDURQi:
2190  case AArch64::STURQi:
2191  Width = 16;
2192  Scale = 1;
2193  MinOffset = -256;
2194  MaxOffset = 255;
2195  break;
2196  case AArch64::LDURXi:
2197  case AArch64::LDURDi:
2198  case AArch64::STURXi:
2199  case AArch64::STURDi:
2200  Width = 8;
2201  Scale = 1;
2202  MinOffset = -256;
2203  MaxOffset = 255;
2204  break;
2205  case AArch64::LDURWi:
2206  case AArch64::LDURSi:
2207  case AArch64::LDURSWi:
2208  case AArch64::STURWi:
2209  case AArch64::STURSi:
2210  Width = 4;
2211  Scale = 1;
2212  MinOffset = -256;
2213  MaxOffset = 255;
2214  break;
2215  case AArch64::LDURHi:
2216  case AArch64::LDURHHi:
2217  case AArch64::LDURSHXi:
2218  case AArch64::LDURSHWi:
2219  case AArch64::STURHi:
2220  case AArch64::STURHHi:
2221  Width = 2;
2222  Scale = 1;
2223  MinOffset = -256;
2224  MaxOffset = 255;
2225  break;
2226  case AArch64::LDURBi:
2227  case AArch64::LDURBBi:
2228  case AArch64::LDURSBXi:
2229  case AArch64::LDURSBWi:
2230  case AArch64::STURBi:
2231  case AArch64::STURBBi:
2232  Width = 1;
2233  Scale = 1;
2234  MinOffset = -256;
2235  MaxOffset = 255;
2236  break;
2237  case AArch64::LDPQi:
2238  case AArch64::LDNPQi:
2239  case AArch64::STPQi:
2240  case AArch64::STNPQi:
2241  Scale = 16;
2242  Width = 32;
2243  MinOffset = -64;
2244  MaxOffset = 63;
2245  break;
2246  case AArch64::LDRQui:
2247  case AArch64::STRQui:
2248  Scale = Width = 16;
2249  MinOffset = 0;
2250  MaxOffset = 4095;
2251  break;
2252  case AArch64::LDPXi:
2253  case AArch64::LDPDi:
2254  case AArch64::LDNPXi:
2255  case AArch64::LDNPDi:
2256  case AArch64::STPXi:
2257  case AArch64::STPDi:
2258  case AArch64::STNPXi:
2259  case AArch64::STNPDi:
2260  Scale = 8;
2261  Width = 16;
2262  MinOffset = -64;
2263  MaxOffset = 63;
2264  break;
2265  case AArch64::LDRXui:
2266  case AArch64::LDRDui:
2267  case AArch64::STRXui:
2268  case AArch64::STRDui:
2269  Scale = Width = 8;
2270  MinOffset = 0;
2271  MaxOffset = 4095;
2272  break;
2273  case AArch64::LDPWi:
2274  case AArch64::LDPSi:
2275  case AArch64::LDNPWi:
2276  case AArch64::LDNPSi:
2277  case AArch64::STPWi:
2278  case AArch64::STPSi:
2279  case AArch64::STNPWi:
2280  case AArch64::STNPSi:
2281  Scale = 4;
2282  Width = 8;
2283  MinOffset = -64;
2284  MaxOffset = 63;
2285  break;
2286  case AArch64::LDRWui:
2287  case AArch64::LDRSui:
2288  case AArch64::LDRSWui:
2289  case AArch64::STRWui:
2290  case AArch64::STRSui:
2291  Scale = Width = 4;
2292  MinOffset = 0;
2293  MaxOffset = 4095;
2294  break;
2295  case AArch64::LDRHui:
2296  case AArch64::LDRHHui:
2297  case AArch64::STRHui:
2298  case AArch64::STRHHui:
2299  Scale = Width = 2;
2300  MinOffset = 0;
2301  MaxOffset = 4095;
2302  break;
2303  case AArch64::LDRBui:
2304  case AArch64::LDRBBui:
2305  case AArch64::STRBui:
2306  case AArch64::STRBBui:
2307  Scale = Width = 1;
2308  MinOffset = 0;
2309  MaxOffset = 4095;
2310  break;
2311  }
2312 
2313  return true;
2314 }
2315 
2316 // Scale the unscaled offsets. Returns false if the unscaled offset can't be
2317 // scaled.
2318 static bool scaleOffset(unsigned Opc, int64_t &Offset) {
2319  unsigned OffsetStride = 1;
2320  switch (Opc) {
2321  default:
2322  return false;
2323  case AArch64::LDURQi:
2324  case AArch64::STURQi:
2325  OffsetStride = 16;
2326  break;
2327  case AArch64::LDURXi:
2328  case AArch64::LDURDi:
2329  case AArch64::STURXi:
2330  case AArch64::STURDi:
2331  OffsetStride = 8;
2332  break;
2333  case AArch64::LDURWi:
2334  case AArch64::LDURSi:
2335  case AArch64::LDURSWi:
2336  case AArch64::STURWi:
2337  case AArch64::STURSi:
2338  OffsetStride = 4;
2339  break;
2340  }
2341  // If the byte-offset isn't a multiple of the stride, we can't scale this
2342  // offset.
2343  if (Offset % OffsetStride != 0)
2344  return false;
2345 
2346  // Convert the byte-offset used by unscaled into an "element" offset used
2347  // by the scaled pair load/store instructions.
2348  Offset /= OffsetStride;
2349  return true;
2350 }
2351 
2352 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
2353  if (FirstOpc == SecondOpc)
2354  return true;
2355  // We can also pair sign-ext and zero-ext instructions.
2356  switch (FirstOpc) {
2357  default:
2358  return false;
2359  case AArch64::LDRWui:
2360  case AArch64::LDURWi:
2361  return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
2362  case AArch64::LDRSWui:
2363  case AArch64::LDURSWi:
2364  return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
2365  }
2366  // These instructions can't be paired based on their opcodes.
2367  return false;
2368 }
2369 
2370 /// Detect opportunities for ldp/stp formation.
2371 ///
2372 /// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
2374  unsigned BaseReg1,
2375  MachineInstr &SecondLdSt,
2376  unsigned BaseReg2,
2377  unsigned NumLoads) const {
2378  if (BaseReg1 != BaseReg2)
2379  return false;
2380 
2381  // Only cluster up to a single pair.
2382  if (NumLoads > 1)
2383  return false;
2384 
2385  if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
2386  return false;
2387 
2388  // Can we pair these instructions based on their opcodes?
2389  unsigned FirstOpc = FirstLdSt.getOpcode();
2390  unsigned SecondOpc = SecondLdSt.getOpcode();
2391  if (!canPairLdStOpc(FirstOpc, SecondOpc))
2392  return false;
2393 
2394  // Can't merge volatiles or load/stores that have a hint to avoid pair
2395  // formation, for example.
2396  if (!isCandidateToMergeOrPair(FirstLdSt) ||
2397  !isCandidateToMergeOrPair(SecondLdSt))
2398  return false;
2399 
2400  // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
2401  int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
2402  if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
2403  return false;
2404 
2405  int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
2406  if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
2407  return false;
2408 
2409  // Pairwise instructions have a 7-bit signed offset field.
2410  if (Offset1 > 63 || Offset1 < -64)
2411  return false;
2412 
2413  // The caller should already have ordered First/SecondLdSt by offset.
2414  assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
2415  return Offset1 + 1 == Offset2;
2416 }
2417 
2419  unsigned Reg, unsigned SubIdx,
2420  unsigned State,
2421  const TargetRegisterInfo *TRI) {
2422  if (!SubIdx)
2423  return MIB.addReg(Reg, State);
2424 
2426  return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
2427  return MIB.addReg(Reg, State, SubIdx);
2428 }
2429 
2430 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
2431  unsigned NumRegs) {
2432  // We really want the positive remainder mod 32 here, that happens to be
2433  // easily obtainable with a mask.
2434  return ((DestReg - SrcReg) & 0x1f) < NumRegs;
2435 }
2436 
2439  const DebugLoc &DL, unsigned DestReg,
2440  unsigned SrcReg, bool KillSrc,
2441  unsigned Opcode,
2442  ArrayRef<unsigned> Indices) const {
2443  assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
2445  uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2446  uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2447  unsigned NumRegs = Indices.size();
2448 
2449  int SubReg = 0, End = NumRegs, Incr = 1;
2450  if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
2451  SubReg = NumRegs - 1;
2452  End = -1;
2453  Incr = -1;
2454  }
2455 
2456  for (; SubReg != End; SubReg += Incr) {
2457  const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2458  AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2459  AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
2460  AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2461  }
2462 }
2463 
2466  const DebugLoc &DL, unsigned DestReg,
2467  unsigned SrcReg, bool KillSrc) const {
2468  if (AArch64::GPR32spRegClass.contains(DestReg) &&
2469  (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
2471 
2472  if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
2473  // If either operand is WSP, expand to ADD #0.
2474  if (Subtarget.hasZeroCycleRegMove()) {
2475  // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
2476  unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2477  &AArch64::GPR64spRegClass);
2478  unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2479  &AArch64::GPR64spRegClass);
2480  // This instruction is reading and writing X registers. This may upset
2481  // the register scavenger and machine verifier, so we need to indicate
2482  // that we are reading an undefined value from SrcRegX, but a proper
2483  // value from SrcReg.
2484  BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
2485  .addReg(SrcRegX, RegState::Undef)
2486  .addImm(0)
2488  .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2489  } else {
2490  BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
2491  .addReg(SrcReg, getKillRegState(KillSrc))
2492  .addImm(0)
2494  }
2495  } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
2496  BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
2497  .addImm(0)
2499  } else {
2500  if (Subtarget.hasZeroCycleRegMove()) {
2501  // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
2502  unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2503  &AArch64::GPR64spRegClass);
2504  unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2505  &AArch64::GPR64spRegClass);
2506  // This instruction is reading and writing X registers. This may upset
2507  // the register scavenger and machine verifier, so we need to indicate
2508  // that we are reading an undefined value from SrcRegX, but a proper
2509  // value from SrcReg.
2510  BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
2511  .addReg(AArch64::XZR)
2512  .addReg(SrcRegX, RegState::Undef)
2513  .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2514  } else {
2515  // Otherwise, expand to ORR WZR.
2516  BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
2517  .addReg(AArch64::WZR)
2518  .addReg(SrcReg, getKillRegState(KillSrc));
2519  }
2520  }
2521  return;
2522  }
2523 
2524  if (AArch64::GPR64spRegClass.contains(DestReg) &&
2525  (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
2526  if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
2527  // If either operand is SP, expand to ADD #0.
2528  BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
2529  .addReg(SrcReg, getKillRegState(KillSrc))
2530  .addImm(0)
2532  } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
2533  BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
2534  .addImm(0)
2536  } else {
2537  // Otherwise, expand to ORR XZR.
2538  BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
2539  .addReg(AArch64::XZR)
2540  .addReg(SrcReg, getKillRegState(KillSrc));
2541  }
2542  return;
2543  }
2544 
2545  // Copy a DDDD register quad by copying the individual sub-registers.
2546  if (AArch64::DDDDRegClass.contains(DestReg) &&
2547  AArch64::DDDDRegClass.contains(SrcReg)) {
2548  static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2549  AArch64::dsub2, AArch64::dsub3};
2550  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2551  Indices);
2552  return;
2553  }
2554 
2555  // Copy a DDD register triple by copying the individual sub-registers.
2556  if (AArch64::DDDRegClass.contains(DestReg) &&
2557  AArch64::DDDRegClass.contains(SrcReg)) {
2558  static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2559  AArch64::dsub2};
2560  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2561  Indices);
2562  return;
2563  }
2564 
2565  // Copy a DD register pair by copying the individual sub-registers.
2566  if (AArch64::DDRegClass.contains(DestReg) &&
2567  AArch64::DDRegClass.contains(SrcReg)) {
2568  static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
2569  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2570  Indices);
2571  return;
2572  }
2573 
2574  // Copy a QQQQ register quad by copying the individual sub-registers.
2575  if (AArch64::QQQQRegClass.contains(DestReg) &&
2576  AArch64::QQQQRegClass.contains(SrcReg)) {
2577  static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2578  AArch64::qsub2, AArch64::qsub3};
2579  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2580  Indices);
2581  return;
2582  }
2583 
2584  // Copy a QQQ register triple by copying the individual sub-registers.
2585  if (AArch64::QQQRegClass.contains(DestReg) &&
2586  AArch64::QQQRegClass.contains(SrcReg)) {
2587  static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2588  AArch64::qsub2};
2589  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2590  Indices);
2591  return;
2592  }
2593 
2594  // Copy a QQ register pair by copying the individual sub-registers.
2595  if (AArch64::QQRegClass.contains(DestReg) &&
2596  AArch64::QQRegClass.contains(SrcReg)) {
2597  static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
2598  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2599  Indices);
2600  return;
2601  }
2602 
2603  if (AArch64::FPR128RegClass.contains(DestReg) &&
2604  AArch64::FPR128RegClass.contains(SrcReg)) {
2605  if (Subtarget.hasNEON()) {
2606  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2607  .addReg(SrcReg)
2608  .addReg(SrcReg, getKillRegState(KillSrc));
2609  } else {
2610  BuildMI(MBB, I, DL, get(AArch64::STRQpre))
2611  .addReg(AArch64::SP, RegState::Define)
2612  .addReg(SrcReg, getKillRegState(KillSrc))
2613  .addReg(AArch64::SP)
2614  .addImm(-16);
2615  BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
2616  .addReg(AArch64::SP, RegState::Define)
2617  .addReg(DestReg, RegState::Define)
2618  .addReg(AArch64::SP)
2619  .addImm(16);
2620  }
2621  return;
2622  }
2623 
2624  if (AArch64::FPR64RegClass.contains(DestReg) &&
2625  AArch64::FPR64RegClass.contains(SrcReg)) {
2626  if (Subtarget.hasNEON()) {
2627  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2628  &AArch64::FPR128RegClass);
2629  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2630  &AArch64::FPR128RegClass);
2631  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2632  .addReg(SrcReg)
2633  .addReg(SrcReg, getKillRegState(KillSrc));
2634  } else {
2635  BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2636  .addReg(SrcReg, getKillRegState(KillSrc));
2637  }
2638  return;
2639  }
2640 
2641  if (AArch64::FPR32RegClass.contains(DestReg) &&
2642  AArch64::FPR32RegClass.contains(SrcReg)) {
2643  if (Subtarget.hasNEON()) {
2644  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2645  &AArch64::FPR128RegClass);
2646  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2647  &AArch64::FPR128RegClass);
2648  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2649  .addReg(SrcReg)
2650  .addReg(SrcReg, getKillRegState(KillSrc));
2651  } else {
2652  BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2653  .addReg(SrcReg, getKillRegState(KillSrc));
2654  }
2655  return;
2656  }
2657 
2658  if (AArch64::FPR16RegClass.contains(DestReg) &&
2659  AArch64::FPR16RegClass.contains(SrcReg)) {
2660  if (Subtarget.hasNEON()) {
2661  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2662  &AArch64::FPR128RegClass);
2663  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2664  &AArch64::FPR128RegClass);
2665  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2666  .addReg(SrcReg)
2667  .addReg(SrcReg, getKillRegState(KillSrc));
2668  } else {
2669  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2670  &AArch64::FPR32RegClass);
2671  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2672  &AArch64::FPR32RegClass);
2673  BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2674  .addReg(SrcReg, getKillRegState(KillSrc));
2675  }
2676  return;
2677  }
2678 
2679  if (AArch64::FPR8RegClass.contains(DestReg) &&
2680  AArch64::FPR8RegClass.contains(SrcReg)) {
2681  if (Subtarget.hasNEON()) {
2682  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2683  &AArch64::FPR128RegClass);
2684  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2685  &AArch64::FPR128RegClass);
2686  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2687  .addReg(SrcReg)
2688  .addReg(SrcReg, getKillRegState(KillSrc));
2689  } else {
2690  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2691  &AArch64::FPR32RegClass);
2692  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2693  &AArch64::FPR32RegClass);
2694  BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2695  .addReg(SrcReg, getKillRegState(KillSrc));
2696  }
2697  return;
2698  }
2699 
2700  // Copies between GPR64 and FPR64.
2701  if (AArch64::FPR64RegClass.contains(DestReg) &&
2702  AArch64::GPR64RegClass.contains(SrcReg)) {
2703  BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2704  .addReg(SrcReg, getKillRegState(KillSrc));
2705  return;
2706  }
2707  if (AArch64::GPR64RegClass.contains(DestReg) &&
2708  AArch64::FPR64RegClass.contains(SrcReg)) {
2709  BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2710  .addReg(SrcReg, getKillRegState(KillSrc));
2711  return;
2712  }
2713  // Copies between GPR32 and FPR32.
2714  if (AArch64::FPR32RegClass.contains(DestReg) &&
2715  AArch64::GPR32RegClass.contains(SrcReg)) {
2716  BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2717  .addReg(SrcReg, getKillRegState(KillSrc));
2718  return;
2719  }
2720  if (AArch64::GPR32RegClass.contains(DestReg) &&
2721  AArch64::FPR32RegClass.contains(SrcReg)) {
2722  BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2723  .addReg(SrcReg, getKillRegState(KillSrc));
2724  return;
2725  }
2726 
2727  if (DestReg == AArch64::NZCV) {
2728  assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2729  BuildMI(MBB, I, DL, get(AArch64::MSR))
2730  .addImm(AArch64SysReg::NZCV)
2731  .addReg(SrcReg, getKillRegState(KillSrc))
2732  .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
2733  return;
2734  }
2735 
2736  if (SrcReg == AArch64::NZCV) {
2737  assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
2738  BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
2739  .addImm(AArch64SysReg::NZCV)
2740  .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
2741  return;
2742  }
2743 
2744  llvm_unreachable("unimplemented reg-to-reg copy");
2745 }
2746 
2748  MachineBasicBlock &MBB,
2749  MachineBasicBlock::iterator InsertBefore,
2750  const MCInstrDesc &MCID,
2751  unsigned SrcReg, bool IsKill,
2752  unsigned SubIdx0, unsigned SubIdx1, int FI,
2753  MachineMemOperand *MMO) {
2754  unsigned SrcReg0 = SrcReg;
2755  unsigned SrcReg1 = SrcReg;
2757  SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
2758  SubIdx0 = 0;
2759  SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
2760  SubIdx1 = 0;
2761  }
2762  BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
2763  .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
2764  .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
2765  .addFrameIndex(FI)
2766  .addImm(0)
2767  .addMemOperand(MMO);
2768 }
2769 
2771  MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2772  bool isKill, int FI, const TargetRegisterClass *RC,
2773  const TargetRegisterInfo *TRI) const {
2774  MachineFunction &MF = *MBB.getParent();
2775  MachineFrameInfo &MFI = MF.getFrameInfo();
2776  unsigned Align = MFI.getObjectAlignment(FI);
2777 
2780  PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2781  unsigned Opc = 0;
2782  bool Offset = true;
2783  switch (TRI->getSpillSize(*RC)) {
2784  case 1:
2785  if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2786  Opc = AArch64::STRBui;
2787  break;
2788  case 2:
2789  if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2790  Opc = AArch64::STRHui;
2791  break;
2792  case 4:
2793  if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2794  Opc = AArch64::STRWui;
2796  MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2797  else
2798  assert(SrcReg != AArch64::WSP);
2799  } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2800  Opc = AArch64::STRSui;
2801  break;
2802  case 8:
2803  if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2804  Opc = AArch64::STRXui;
2806  MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2807  else
2808  assert(SrcReg != AArch64::SP);
2809  } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
2810  Opc = AArch64::STRDui;
2811  } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
2813  get(AArch64::STPWi), SrcReg, isKill,
2814  AArch64::sube32, AArch64::subo32, FI, MMO);
2815  return;
2816  }
2817  break;
2818  case 16:
2819  if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2820  Opc = AArch64::STRQui;
2821  else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2822  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2823  Opc = AArch64::ST1Twov1d;
2824  Offset = false;
2825  } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
2827  get(AArch64::STPXi), SrcReg, isKill,
2828  AArch64::sube64, AArch64::subo64, FI, MMO);
2829  return;
2830  }
2831  break;
2832  case 24:
2833  if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2834  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2835  Opc = AArch64::ST1Threev1d;
2836  Offset = false;
2837  }
2838  break;
2839  case 32:
2840  if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2841  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2842  Opc = AArch64::ST1Fourv1d;
2843  Offset = false;
2844  } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2845  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2846  Opc = AArch64::ST1Twov2d;
2847  Offset = false;
2848  }
2849  break;
2850  case 48:
2851  if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2852  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2853  Opc = AArch64::ST1Threev2d;
2854  Offset = false;
2855  }
2856  break;
2857  case 64:
2858  if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2859  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2860  Opc = AArch64::ST1Fourv2d;
2861  Offset = false;
2862  }
2863  break;
2864  }
2865  assert(Opc && "Unknown register class");
2866 
2867  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
2868  .addReg(SrcReg, getKillRegState(isKill))
2869  .addFrameIndex(FI);
2870 
2871  if (Offset)
2872  MI.addImm(0);
2873  MI.addMemOperand(MMO);
2874 }
2875 
2877  MachineBasicBlock &MBB,
2878  MachineBasicBlock::iterator InsertBefore,
2879  const MCInstrDesc &MCID,
2880  unsigned DestReg, unsigned SubIdx0,
2881  unsigned SubIdx1, int FI,
2882  MachineMemOperand *MMO) {
2883  unsigned DestReg0 = DestReg;
2884  unsigned DestReg1 = DestReg;
2885  bool IsUndef = true;
2887  DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
2888  SubIdx0 = 0;
2889  DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
2890  SubIdx1 = 0;
2891  IsUndef = false;
2892  }
2893  BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
2894  .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
2895  .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
2896  .addFrameIndex(FI)
2897  .addImm(0)
2898  .addMemOperand(MMO);
2899 }
2900 
2902  MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2903  int FI, const TargetRegisterClass *RC,
2904  const TargetRegisterInfo *TRI) const {
2905  MachineFunction &MF = *MBB.getParent();
2906  MachineFrameInfo &MFI = MF.getFrameInfo();
2907  unsigned Align = MFI.getObjectAlignment(FI);
2910  PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2911 
2912  unsigned Opc = 0;
2913  bool Offset = true;
2914  switch (TRI->getSpillSize(*RC)) {
2915  case 1:
2916  if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2917  Opc = AArch64::LDRBui;
2918  break;
2919  case 2:
2920  if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2921  Opc = AArch64::LDRHui;
2922  break;
2923  case 4:
2924  if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2925  Opc = AArch64::LDRWui;
2927  MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
2928  else
2929  assert(DestReg != AArch64::WSP);
2930  } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2931  Opc = AArch64::LDRSui;
2932  break;
2933  case 8:
2934  if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2935  Opc = AArch64::LDRXui;
2937  MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
2938  else
2939  assert(DestReg != AArch64::SP);
2940  } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
2941  Opc = AArch64::LDRDui;
2942  } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
2944  get(AArch64::LDPWi), DestReg, AArch64::sube32,
2945  AArch64::subo32, FI, MMO);
2946  return;
2947  }
2948  break;
2949  case 16:
2950  if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2951  Opc = AArch64::LDRQui;
2952  else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2953  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2954  Opc = AArch64::LD1Twov1d;
2955  Offset = false;
2956  } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
2958  get(AArch64::LDPXi), DestReg, AArch64::sube64,
2959  AArch64::subo64, FI, MMO);
2960  return;
2961  }
2962  break;
2963  case 24:
2964  if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2965  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2966  Opc = AArch64::LD1Threev1d;
2967  Offset = false;
2968  }
2969  break;
2970  case 32:
2971  if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2972  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2973  Opc = AArch64::LD1Fourv1d;
2974  Offset = false;
2975  } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2976  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2977  Opc = AArch64::LD1Twov2d;
2978  Offset = false;
2979  }
2980  break;
2981  case 48:
2982  if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2983  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2984  Opc = AArch64::LD1Threev2d;
2985  Offset = false;
2986  }
2987  break;
2988  case 64:
2989  if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2990  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2991  Opc = AArch64::LD1Fourv2d;
2992  Offset = false;
2993  }
2994  break;
2995  }
2996  assert(Opc && "Unknown register class");
2997 
2998  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
2999  .addReg(DestReg, getDefRegState(true))
3000  .addFrameIndex(FI);
3001  if (Offset)
3002  MI.addImm(0);
3003  MI.addMemOperand(MMO);
3004 }
3005 
3007  MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
3008  unsigned DestReg, unsigned SrcReg, int Offset,
3009  const TargetInstrInfo *TII,
3010  MachineInstr::MIFlag Flag, bool SetNZCV) {
3011  if (DestReg == SrcReg && Offset == 0)
3012  return;
3013 
3014  assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
3015  "SP increment/decrement not 16-byte aligned");
3016 
3017  bool isSub = Offset < 0;
3018  if (isSub)
3019  Offset = -Offset;
3020 
3021  // FIXME: If the offset won't fit in 24-bits, compute the offset into a
3022  // scratch register. If DestReg is a virtual register, use it as the
3023  // scratch register; otherwise, create a new virtual register (to be
3024  // replaced by the scavenger at the end of PEI). That case can be optimized
3025  // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
3026  // register can be loaded with offset%8 and the add/sub can use an extending
3027  // instruction with LSL#3.
3028  // Currently the function handles any offsets but generates a poor sequence
3029  // of code.
3030  // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
3031 
3032  unsigned Opc;
3033  if (SetNZCV)
3034  Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
3035  else
3036  Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
3037  const unsigned MaxEncoding = 0xfff;
3038  const unsigned ShiftSize = 12;
3039  const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
3040  while (((unsigned)Offset) >= (1 << ShiftSize)) {
3041  unsigned ThisVal;
3042  if (((unsigned)Offset) > MaxEncodableValue) {
3043  ThisVal = MaxEncodableValue;
3044  } else {
3045  ThisVal = Offset & MaxEncodableValue;
3046  }
3047  assert((ThisVal >> ShiftSize) <= MaxEncoding &&
3048  "Encoding cannot handle value that big");
3049  BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
3050  .addReg(SrcReg)
3051  .addImm(ThisVal >> ShiftSize)
3053  .setMIFlag(Flag);
3054 
3055  SrcReg = DestReg;
3056  Offset -= ThisVal;
3057  if (Offset == 0)
3058  return;
3059  }
3060  BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
3061  .addReg(SrcReg)
3062  .addImm(Offset)
3064  .setMIFlag(Flag);
3065 }
3066 
3070  LiveIntervals *LIS) const {
3071  // This is a bit of a hack. Consider this instruction:
3072  //
3073  // %0 = COPY %sp; GPR64all:%0
3074  //
3075  // We explicitly chose GPR64all for the virtual register so such a copy might
3076  // be eliminated by RegisterCoalescer. However, that may not be possible, and
3077  // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
3078  // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
3079  //
3080  // To prevent that, we are going to constrain the %0 register class here.
3081  //
3082  // <rdar://problem/11522048>
3083  //
3084  if (MI.isFullCopy()) {
3085  unsigned DstReg = MI.getOperand(0).getReg();
3086  unsigned SrcReg = MI.getOperand(1).getReg();
3087  if (SrcReg == AArch64::SP &&
3089  MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
3090  return nullptr;
3091  }
3092  if (DstReg == AArch64::SP &&
3094  MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
3095  return nullptr;
3096  }
3097  }
3098 
3099  // Handle the case where a copy is being spilled or filled but the source
3100  // and destination register class don't match. For example:
3101  //
3102  // %0 = COPY %xzr; GPR64common:%0
3103  //
3104  // In this case we can still safely fold away the COPY and generate the
3105  // following spill code:
3106  //
3107  // STRXui %xzr, %stack.0
3108  //
3109  // This also eliminates spilled cross register class COPYs (e.g. between x and
3110  // d regs) of the same size. For example:
3111  //
3112  // %0 = COPY %1; GPR64:%0, FPR64:%1
3113  //
3114  // will be filled as
3115  //
3116  // LDRDui %0, fi<#0>
3117  //
3118  // instead of
3119  //
3120  // LDRXui %Temp, fi<#0>
3121  // %0 = FMOV %Temp
3122  //
3123  if (MI.isCopy() && Ops.size() == 1 &&
3124  // Make sure we're only folding the explicit COPY defs/uses.
3125  (Ops[0] == 0 || Ops[0] == 1)) {
3126  bool IsSpill = Ops[0] == 0;
3127  bool IsFill = !IsSpill;
3129  const MachineRegisterInfo &MRI = MF.getRegInfo();
3130  MachineBasicBlock &MBB = *MI.getParent();
3131  const MachineOperand &DstMO = MI.getOperand(0);
3132  const MachineOperand &SrcMO = MI.getOperand(1);
3133  unsigned DstReg = DstMO.getReg();
3134  unsigned SrcReg = SrcMO.getReg();
3135  // This is slightly expensive to compute for physical regs since
3136  // getMinimalPhysRegClass is slow.
3137  auto getRegClass = [&](unsigned Reg) {
3139  ? MRI.getRegClass(Reg)
3140  : TRI.getMinimalPhysRegClass(Reg);
3141  };
3142 
3143  if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
3144  assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
3145  TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
3146  "Mismatched register size in non subreg COPY");
3147  if (IsSpill)
3148  storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
3149  getRegClass(SrcReg), &TRI);
3150  else
3151  loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
3152  getRegClass(DstReg), &TRI);
3153  return &*--InsertPt;
3154  }
3155 
3156  // Handle cases like spilling def of:
3157  //
3158  // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
3159  //
3160  // where the physical register source can be widened and stored to the full
3161  // virtual reg destination stack slot, in this case producing:
3162  //
3163  // STRXui %xzr, %stack.0
3164  //
3165  if (IsSpill && DstMO.isUndef() &&
3167  assert(SrcMO.getSubReg() == 0 &&
3168  "Unexpected subreg on physical register");
3169  const TargetRegisterClass *SpillRC;
3170  unsigned SpillSubreg;
3171  switch (DstMO.getSubReg()) {
3172  default:
3173  SpillRC = nullptr;
3174  break;
3175  case AArch64::sub_32:
3176  case AArch64::ssub:
3177  if (AArch64::GPR32RegClass.contains(SrcReg)) {
3178  SpillRC = &AArch64::GPR64RegClass;
3179  SpillSubreg = AArch64::sub_32;
3180  } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
3181  SpillRC = &AArch64::FPR64RegClass;
3182  SpillSubreg = AArch64::ssub;
3183  } else
3184  SpillRC = nullptr;
3185  break;
3186  case AArch64::dsub:
3187  if (AArch64::FPR64RegClass.contains(SrcReg)) {
3188  SpillRC = &AArch64::FPR128RegClass;
3189  SpillSubreg = AArch64::dsub;
3190  } else
3191  SpillRC = nullptr;
3192  break;
3193  }
3194 
3195  if (SpillRC)
3196  if (unsigned WidenedSrcReg =
3197  TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
3198  storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
3199  FrameIndex, SpillRC, &TRI);
3200  return &*--InsertPt;
3201  }
3202  }
3203 
3204  // Handle cases like filling use of:
3205  //
3206  // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
3207  //
3208  // where we can load the full virtual reg source stack slot, into the subreg
3209  // destination, in this case producing:
3210  //
3211  // LDRWui %0:sub_32<def,read-undef>, %stack.0
3212  //
3213  if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
3214  const TargetRegisterClass *FillRC;
3215  switch (DstMO.getSubReg()) {
3216  default:
3217  FillRC = nullptr;
3218  break;
3219  case AArch64::sub_32:
3220  FillRC = &AArch64::GPR32RegClass;
3221  break;
3222  case AArch64::ssub:
3223  FillRC = &AArch64::FPR32RegClass;
3224  break;
3225  case AArch64::dsub:
3226  FillRC = &AArch64::FPR64RegClass;
3227  break;
3228  }
3229 
3230  if (FillRC) {
3231  assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
3232  TRI.getRegSizeInBits(*FillRC) &&
3233  "Mismatched regclass size on folded subreg COPY");
3234  loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
3235  MachineInstr &LoadMI = *--InsertPt;
3236  MachineOperand &LoadDst = LoadMI.getOperand(0);
3237  assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
3238  LoadDst.setSubReg(DstMO.getSubReg());
3239  LoadDst.setIsUndef();
3240  return &LoadMI;
3241  }
3242  }
3243  }
3244 
3245  // Cannot fold.
3246  return nullptr;
3247 }
3248 
3250  bool *OutUseUnscaledOp,
3251  unsigned *OutUnscaledOp,
3252  int *EmittableOffset) {
3253  int Scale = 1;
3254  bool IsSigned = false;
3255  // The ImmIdx should be changed case by case if it is not 2.
3256  unsigned ImmIdx = 2;
3257  unsigned UnscaledOp = 0;
3258  // Set output values in case of early exit.
3259  if (EmittableOffset)
3260  *EmittableOffset = 0;
3261  if (OutUseUnscaledOp)
3262  *OutUseUnscaledOp = false;
3263  if (OutUnscaledOp)
3264  *OutUnscaledOp = 0;
3265  switch (MI.getOpcode()) {
3266  default:
3267  llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
3268  // Vector spills/fills can't take an immediate offset.
3269  case AArch64::LD1Twov2d:
3270  case AArch64::LD1Threev2d:
3271  case AArch64::LD1Fourv2d:
3272  case AArch64::LD1Twov1d:
3273  case AArch64::LD1Threev1d:
3274  case AArch64::LD1Fourv1d:
3275  case AArch64::ST1Twov2d:
3276  case AArch64::ST1Threev2d:
3277  case AArch64::ST1Fourv2d:
3278  case AArch64::ST1Twov1d:
3279  case AArch64::ST1Threev1d:
3280  case AArch64::ST1Fourv1d:
3282  case AArch64::PRFMui:
3283  Scale = 8;
3284  UnscaledOp = AArch64::PRFUMi;
3285  break;
3286  case AArch64::LDRXui:
3287  Scale = 8;
3288  UnscaledOp = AArch64::LDURXi;
3289  break;
3290  case AArch64::LDRWui:
3291  Scale = 4;
3292  UnscaledOp = AArch64::LDURWi;
3293  break;
3294  case AArch64::LDRBui:
3295  Scale = 1;
3296  UnscaledOp = AArch64::LDURBi;
3297  break;
3298  case AArch64::LDRHui:
3299  Scale = 2;
3300  UnscaledOp = AArch64::LDURHi;
3301  break;
3302  case AArch64::LDRSui:
3303  Scale = 4;
3304  UnscaledOp = AArch64::LDURSi;
3305  break;
3306  case AArch64::LDRDui:
3307  Scale = 8;
3308  UnscaledOp = AArch64::LDURDi;
3309  break;
3310  case AArch64::LDRQui:
3311  Scale = 16;
3312  UnscaledOp = AArch64::LDURQi;
3313  break;
3314  case AArch64::LDRBBui:
3315  Scale = 1;
3316  UnscaledOp = AArch64::LDURBBi;
3317  break;
3318  case AArch64::LDRHHui:
3319  Scale = 2;
3320  UnscaledOp = AArch64::LDURHHi;
3321  break;
3322  case AArch64::LDRSBXui:
3323  Scale = 1;
3324  UnscaledOp = AArch64::LDURSBXi;
3325  break;
3326  case AArch64::LDRSBWui:
3327  Scale = 1;
3328  UnscaledOp = AArch64::LDURSBWi;
3329  break;
3330  case AArch64::LDRSHXui:
3331  Scale = 2;
3332  UnscaledOp = AArch64::LDURSHXi;
3333  break;
3334  case AArch64::LDRSHWui:
3335  Scale = 2;
3336  UnscaledOp = AArch64::LDURSHWi;
3337  break;
3338  case AArch64::LDRSWui:
3339  Scale = 4;
3340  UnscaledOp = AArch64::LDURSWi;
3341  break;
3342 
3343  case AArch64::STRXui:
3344  Scale = 8;
3345  UnscaledOp = AArch64::STURXi;
3346  break;
3347  case AArch64::STRWui:
3348  Scale = 4;
3349  UnscaledOp = AArch64::STURWi;
3350  break;
3351  case AArch64::STRBui:
3352  Scale = 1;
3353  UnscaledOp = AArch64::STURBi;
3354  break;
3355  case AArch64::STRHui:
3356  Scale = 2;
3357  UnscaledOp = AArch64::STURHi;
3358  break;
3359  case AArch64::STRSui:
3360  Scale = 4;
3361  UnscaledOp = AArch64::STURSi;
3362  break;
3363  case AArch64::STRDui:
3364  Scale = 8;
3365  UnscaledOp = AArch64::STURDi;
3366  break;
3367  case AArch64::STRQui:
3368  Scale = 16;
3369  UnscaledOp = AArch64::STURQi;
3370  break;
3371  case AArch64::STRBBui:
3372  Scale = 1;
3373  UnscaledOp = AArch64::STURBBi;
3374  break;
3375  case AArch64::STRHHui:
3376  Scale = 2;
3377  UnscaledOp = AArch64::STURHHi;
3378  break;
3379 
3380  case AArch64::LDPXi:
3381  case AArch64::LDPDi:
3382  case AArch64::STPXi:
3383  case AArch64::STPDi:
3384  case AArch64::LDNPXi:
3385  case AArch64::LDNPDi:
3386  case AArch64::STNPXi:
3387  case AArch64::STNPDi:
3388  ImmIdx = 3;
3389  IsSigned = true;
3390  Scale = 8;
3391  break;
3392  case AArch64::LDPQi:
3393  case AArch64::STPQi:
3394  case AArch64::LDNPQi:
3395  case AArch64::STNPQi:
3396  ImmIdx = 3;
3397  IsSigned = true;
3398  Scale = 16;
3399  break;
3400  case AArch64::LDPWi:
3401  case AArch64::LDPSi:
3402  case AArch64::STPWi:
3403  case AArch64::STPSi:
3404  case AArch64::LDNPWi:
3405  case AArch64::LDNPSi:
3406  case AArch64::STNPWi:
3407  case AArch64::STNPSi:
3408  ImmIdx = 3;
3409  IsSigned = true;
3410  Scale = 4;
3411  break;
3412 
3413  case AArch64::LDURXi:
3414  case AArch64::LDURWi:
3415  case AArch64::LDURBi:
3416  case AArch64::LDURHi:
3417  case AArch64::LDURSi:
3418  case AArch64::LDURDi:
3419  case AArch64::LDURQi:
3420  case AArch64::LDURHHi:
3421  case AArch64::LDURBBi:
3422  case AArch64::LDURSBXi:
3423  case AArch64::LDURSBWi:
3424  case AArch64::LDURSHXi:
3425  case AArch64::LDURSHWi:
3426  case AArch64::LDURSWi:
3427  case AArch64::STURXi:
3428  case AArch64::STURWi:
3429  case AArch64::STURBi:
3430  case AArch64::STURHi:
3431  case AArch64::STURSi:
3432  case AArch64::STURDi:
3433  case AArch64::STURQi:
3434  case AArch64::STURBBi:
3435  case AArch64::STURHHi:
3436  Scale = 1;
3437  break;
3438  }
3439 
3440  Offset += MI.getOperand(ImmIdx).getImm() * Scale;
3441 
3442  bool useUnscaledOp = false;
3443  // If the offset doesn't match the scale, we rewrite the instruction to
3444  // use the unscaled instruction instead. Likewise, if we have a negative
3445  // offset (and have an unscaled op to use).
3446  if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
3447  useUnscaledOp = true;
3448 
3449  // Use an unscaled addressing mode if the instruction has a negative offset
3450  // (or if the instruction is already using an unscaled addressing mode).
3451  unsigned MaskBits;
3452  if (IsSigned) {
3453  // ldp/stp instructions.
3454  MaskBits = 7;
3455  Offset /= Scale;
3456  } else if (UnscaledOp == 0 || useUnscaledOp) {
3457  MaskBits = 9;
3458  IsSigned = true;
3459  Scale = 1;
3460  } else {
3461  MaskBits = 12;
3462  IsSigned = false;
3463  Offset /= Scale;
3464  }
3465 
3466  // Attempt to fold address computation.
3467  int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
3468  int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
3469  if (Offset >= MinOff && Offset <= MaxOff) {
3470  if (EmittableOffset)
3471  *EmittableOffset = Offset;
3472  Offset = 0;
3473  } else {
3474  int NewOff = Offset < 0 ? MinOff : MaxOff;
3475  if (EmittableOffset)
3476  *EmittableOffset = NewOff;
3477  Offset = (Offset - NewOff) * Scale;
3478  }
3479  if (OutUseUnscaledOp)
3480  *OutUseUnscaledOp = useUnscaledOp;
3481  if (OutUnscaledOp)
3482  *OutUnscaledOp = UnscaledOp;
3484  (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
3485 }
3486 
3487 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
3488  unsigned FrameReg, int &Offset,
3489  const AArch64InstrInfo *TII) {
3490  unsigned Opcode = MI.getOpcode();
3491  unsigned ImmIdx = FrameRegIdx + 1;
3492 
3493  if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
3494  Offset += MI.getOperand(ImmIdx).getImm();
3495  emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
3496  MI.getOperand(0).getReg(), FrameReg, Offset, TII,
3497  MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
3498  MI.eraseFromParent();
3499  Offset = 0;
3500  return true;
3501  }
3502 
3503  int NewOffset;
3504  unsigned UnscaledOp;
3505  bool UseUnscaledOp;
3506  int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
3507  &UnscaledOp, &NewOffset);
3508  if (Status & AArch64FrameOffsetCanUpdate) {
3509  if (Status & AArch64FrameOffsetIsLegal)
3510  // Replace the FrameIndex with FrameReg.
3511  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
3512  if (UseUnscaledOp)
3513  MI.setDesc(TII->get(UnscaledOp));
3514 
3515  MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
3516  return Offset == 0;
3517  }
3518 
3519  return false;
3520 }
3521 
3522 void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
3523  NopInst.setOpcode(AArch64::HINT);
3524  NopInst.addOperand(MCOperand::createImm(0));
3525 }
3526 
3527 // AArch64 supports MachineCombiner.
3528 bool AArch64InstrInfo::useMachineCombiner() const { return true; }
3529 
3530 // True when Opc sets flag
3531 static bool isCombineInstrSettingFlag(unsigned Opc) {
3532  switch (Opc) {
3533  case AArch64::ADDSWrr:
3534  case AArch64::ADDSWri:
3535  case AArch64::ADDSXrr:
3536  case AArch64::ADDSXri:
3537  case AArch64::SUBSWrr:
3538  case AArch64::SUBSXrr:
3539  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3540  case AArch64::SUBSWri:
3541  case AArch64::SUBSXri:
3542  return true;
3543  default:
3544  break;
3545  }
3546  return false;
3547 }
3548 
3549 // 32b Opcodes that can be combined with a MUL
3550 static bool isCombineInstrCandidate32(unsigned Opc) {
3551  switch (Opc) {
3552  case AArch64::ADDWrr:
3553  case AArch64::ADDWri:
3554  case AArch64::SUBWrr:
3555  case AArch64::ADDSWrr:
3556  case AArch64::ADDSWri:
3557  case AArch64::SUBSWrr:
3558  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3559  case AArch64::SUBWri:
3560  case AArch64::SUBSWri:
3561  return true;
3562  default:
3563  break;
3564  }
3565  return false;
3566 }
3567 
3568 // 64b Opcodes that can be combined with a MUL
3569 static bool isCombineInstrCandidate64(unsigned Opc) {
3570  switch (Opc) {
3571  case AArch64::ADDXrr:
3572  case AArch64::ADDXri:
3573  case AArch64::SUBXrr:
3574  case AArch64::ADDSXrr:
3575  case AArch64::ADDSXri:
3576  case AArch64::SUBSXrr:
3577  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3578  case AArch64::SUBXri:
3579  case AArch64::SUBSXri:
3580  return true;
3581  default:
3582  break;
3583  }
3584  return false;
3585 }
3586 
3587 // FP Opcodes that can be combined with a FMUL
3588 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
3589  switch (Inst.getOpcode()) {
3590  default:
3591  break;
3592  case AArch64::FADDSrr:
3593  case AArch64::FADDDrr:
3594  case AArch64::FADDv2f32:
3595  case AArch64::FADDv2f64:
3596  case AArch64::FADDv4f32:
3597  case AArch64::FSUBSrr:
3598  case AArch64::FSUBDrr:
3599  case AArch64::FSUBv2f32:
3600  case AArch64::FSUBv2f64:
3601  case AArch64::FSUBv4f32:
3602  TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
3603  return (Options.UnsafeFPMath ||
3604  Options.AllowFPOpFusion == FPOpFusion::Fast);
3605  }
3606  return false;
3607 }
3608 
3609 // Opcodes that can be combined with a MUL
3610 static bool isCombineInstrCandidate(unsigned Opc) {
3612 }
3613 
3614 //
3615 // Utility routine that checks if \param MO is defined by an
3616 // \param CombineOpc instruction in the basic block \param MBB
3618  unsigned CombineOpc, unsigned ZeroReg = 0,
3619  bool CheckZeroReg = false) {
3620  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3621  MachineInstr *MI = nullptr;
3622 
3624  MI = MRI.getUniqueVRegDef(MO.getReg());
3625  // And it needs to be in the trace (otherwise, it won't have a depth).
3626  if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
3627  return false;
3628  // Must only used by the user we combine with.
3629  if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
3630  return false;
3631 
3632  if (CheckZeroReg) {
3633  assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
3634  MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
3635  MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
3636  // The third input reg must be zero.
3637  if (MI->getOperand(3).getReg() != ZeroReg)
3638  return false;
3639  }
3640 
3641  return true;
3642 }
3643 
3644 //
3645 // Is \param MO defined by an integer multiply and can be combined?
3647  unsigned MulOpc, unsigned ZeroReg) {
3648  return canCombine(MBB, MO, MulOpc, ZeroReg, true);
3649 }
3650 
3651 //
3652 // Is \param MO defined by a floating-point multiply and can be combined?
3654  unsigned MulOpc) {
3655  return canCombine(MBB, MO, MulOpc);
3656 }
3657 
3658 // TODO: There are many more machine instruction opcodes to match:
3659 // 1. Other data types (integer, vectors)
3660 // 2. Other math / logic operations (xor, or)
3661 // 3. Other forms of the same operation (intrinsics and other variants)
3663  const MachineInstr &Inst) const {
3664  switch (Inst.getOpcode()) {
3665  case AArch64::FADDDrr:
3666  case AArch64::FADDSrr:
3667  case AArch64::FADDv2f32:
3668  case AArch64::FADDv2f64:
3669  case AArch64::FADDv4f32:
3670  case AArch64::FMULDrr:
3671  case AArch64::FMULSrr:
3672  case AArch64::FMULX32:
3673  case AArch64::FMULX64:
3674  case AArch64::FMULXv2f32:
3675  case AArch64::FMULXv2f64:
3676  case AArch64::FMULXv4f32:
3677  case AArch64::FMULv2f32:
3678  case AArch64::FMULv2f64:
3679  case AArch64::FMULv4f32:
3680  return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
3681  default:
3682  return false;
3683  }
3684 }
3685 
3686 /// Find instructions that can be turned into madd.
3687 static bool getMaddPatterns(MachineInstr &Root,
3689  unsigned Opc = Root.getOpcode();
3690  MachineBasicBlock &MBB = *Root.getParent();
3691  bool Found = false;
3692 
3693  if (!isCombineInstrCandidate(Opc))
3694  return false;
3695  if (isCombineInstrSettingFlag(Opc)) {
3696  int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
3697  // When NZCV is live bail out.
3698  if (Cmp_NZCV == -1)
3699  return false;
3700  unsigned NewOpc = convertToNonFlagSettingOpc(Root);
3701  // When opcode can't change bail out.
3702  // CHECKME: do we miss any cases for opcode conversion?
3703  if (NewOpc == Opc)
3704  return false;
3705  Opc = NewOpc;
3706  }
3707 
3708  switch (Opc) {
3709  default:
3710  break;
3711  case AArch64::ADDWrr:
3712  assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3713  "ADDWrr does not have register operands");
3714  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3715  AArch64::WZR)) {
3717  Found = true;
3718  }
3719  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3720  AArch64::WZR)) {
3722  Found = true;
3723  }
3724  break;
3725  case AArch64::ADDXrr:
3726  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3727  AArch64::XZR)) {
3729  Found = true;
3730  }
3731  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3732  AArch64::XZR)) {
3734  Found = true;
3735  }
3736  break;
3737  case AArch64::SUBWrr:
3738  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3739  AArch64::WZR)) {
3741  Found = true;
3742  }
3743  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3744  AArch64::WZR)) {
3746  Found = true;
3747  }
3748  break;
3749  case AArch64::SUBXrr:
3750  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3751  AArch64::XZR)) {
3753  Found = true;
3754  }
3755  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3756  AArch64::XZR)) {
3758  Found = true;
3759  }
3760  break;
3761  case AArch64::ADDWri:
3762  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3763  AArch64::WZR)) {
3765  Found = true;
3766  }
3767  break;
3768  case AArch64::ADDXri:
3769  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3770  AArch64::XZR)) {
3772  Found = true;
3773  }
3774  break;
3775  case AArch64::SUBWri:
3776  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3777  AArch64::WZR)) {
3779  Found = true;
3780  }
3781  break;
3782  case AArch64::SUBXri:
3783  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3784  AArch64::XZR)) {
3786  Found = true;
3787  }
3788  break;
3789  }
3790  return Found;
3791 }
3792 /// Floating-Point Support
3793 
3794 /// Find instructions that can be turned into madd.
3795 static bool getFMAPatterns(MachineInstr &Root,
3797 
3798  if (!isCombineInstrCandidateFP(Root))
3799  return false;
3800 
3801  MachineBasicBlock &MBB = *Root.getParent();
3802  bool Found = false;
3803 
3804  switch (Root.getOpcode()) {
3805  default:
3806  assert(false && "Unsupported FP instruction in combiner\n");
3807  break;
3808  case AArch64::FADDSrr:
3809  assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3810  "FADDWrr does not have register operands");
3811  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3813  Found = true;
3814  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3815  AArch64::FMULv1i32_indexed)) {
3817  Found = true;
3818  }
3819  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3821  Found = true;
3822  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3823  AArch64::FMULv1i32_indexed)) {
3825  Found = true;
3826  }
3827  break;
3828  case AArch64::FADDDrr:
3829  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3831  Found = true;
3832  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3833  AArch64::FMULv1i64_indexed)) {
3835  Found = true;
3836  }
3837  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3839  Found = true;
3840  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3841  AArch64::FMULv1i64_indexed)) {
3843  Found = true;
3844  }
3845  break;
3846  case AArch64::FADDv2f32:
3847  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3848  AArch64::FMULv2i32_indexed)) {
3850  Found = true;
3851  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3852  AArch64::FMULv2f32)) {
3854  Found = true;
3855  }
3856  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3857  AArch64::FMULv2i32_indexed)) {
3859  Found = true;
3860  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3861  AArch64::FMULv2f32)) {
3863  Found = true;
3864  }
3865  break;
3866  case AArch64::FADDv2f64:
3867  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3868  AArch64::FMULv2i64_indexed)) {
3870  Found = true;
3871  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3872  AArch64::FMULv2f64)) {
3874  Found = true;
3875  }
3876  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3877  AArch64::FMULv2i64_indexed)) {
3879  Found = true;
3880  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3881  AArch64::FMULv2f64)) {
3883  Found = true;
3884  }
3885  break;
3886  case AArch64::FADDv4f32:
3887  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3888  AArch64::FMULv4i32_indexed)) {
3890  Found = true;
3891  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3892  AArch64::FMULv4f32)) {
3894  Found = true;
3895  }
3896  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3897  AArch64::FMULv4i32_indexed)) {
3899  Found = true;
3900  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3901  AArch64::FMULv4f32)) {
3903  Found = true;
3904  }
3905  break;
3906 
3907  case AArch64::FSUBSrr:
3908  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3910  Found = true;
3911  }
3912  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3914  Found = true;
3915  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3916  AArch64::FMULv1i32_indexed)) {
3918  Found = true;
3919  }
3920  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) {
3922  Found = true;
3923  }
3924  break;
3925  case AArch64::FSUBDrr:
3926  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3928  Found = true;
3929  }
3930  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3932  Found = true;
3933  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3934  AArch64::FMULv1i64_indexed)) {
3936  Found = true;
3937  }
3938  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) {
3940  Found = true;
3941  }
3942  break;
3943  case AArch64::FSUBv2f32:
3944  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3945  AArch64::FMULv2i32_indexed)) {
3947  Found = true;
3948  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3949  AArch64::FMULv2f32)) {
3951  Found = true;
3952  }
3953  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3954  AArch64::FMULv2i32_indexed)) {
3956  Found = true;
3957  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3958  AArch64::FMULv2f32)) {
3960  Found = true;
3961  }
3962  break;
3963  case AArch64::FSUBv2f64:
3964  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3965  AArch64::FMULv2i64_indexed)) {
3967  Found = true;
3968  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3969  AArch64::FMULv2f64)) {
3971  Found = true;
3972  }
3973  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3974  AArch64::FMULv2i64_indexed)) {
3976  Found = true;
3977  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3978  AArch64::FMULv2f64)) {
3980  Found = true;
3981  }
3982  break;
3983  case AArch64::FSUBv4f32:
3984  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3985  AArch64::FMULv4i32_indexed)) {
3987  Found = true;
3988  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3989  AArch64::FMULv4f32)) {
3991  Found = true;
3992  }
3993  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3994  AArch64::FMULv4i32_indexed)) {
3996  Found = true;
3997  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3998  AArch64::FMULv4f32)) {
4000  Found = true;
4001  }
4002  break;
4003  }
4004  return Found;
4005 }
4006 
4007 /// Return true when a code sequence can improve throughput. It
4008 /// should be called only for instructions in loops.
4009 /// \param Pattern - combiner pattern
4011  MachineCombinerPattern Pattern) const {
4012  switch (Pattern) {
4013  default:
4014  break;
4049  return true;
4050  } // end switch (Pattern)
4051  return false;
4052 }
4053 /// Return true when there is potentially a faster code sequence for an
4054 /// instruction chain ending in \p Root. All potential patterns are listed in
4055 /// the \p Pattern vector. Pattern should be sorted in priority order since the
4056 /// pattern evaluator stops checking as soon as it finds a faster sequence.
4057 
4059  MachineInstr &Root,
4060  SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
4061  // Integer patterns
4062  if (getMaddPatterns(Root, Patterns))
4063  return true;
4064  // Floating point patterns
4065  if (getFMAPatterns(Root, Patterns))
4066  return true;
4067 
4068  return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
4069 }
4070 
4072 /// genFusedMultiply - Generate fused multiply instructions.
4073 /// This function supports both integer and floating point instructions.
4074 /// A typical example:
4075 /// F|MUL I=A,B,0
4076 /// F|ADD R,I,C
4077 /// ==> F|MADD R,A,B,C
4078 /// \param MF Containing MachineFunction
4079 /// \param MRI Register information
4080 /// \param TII Target information
4081 /// \param Root is the F|ADD instruction
4082 /// \param [out] InsInstrs is a vector of machine instructions and will
4083 /// contain the generated madd instruction
4084 /// \param IdxMulOpd is index of operand in Root that is the result of
4085 /// the F|MUL. In the example above IdxMulOpd is 1.
4086 /// \param MaddOpc the opcode fo the f|madd instruction
4087 /// \param RC Register class of operands
4088 /// \param kind of fma instruction (addressing mode) to be generated
4089 /// \param ReplacedAddend is the result register from the instruction
4090 /// replacing the non-combined operand, if any.
4091 static MachineInstr *
4093  const TargetInstrInfo *TII, MachineInstr &Root,
4094  SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
4095  unsigned MaddOpc, const TargetRegisterClass *RC,
4097  const unsigned *ReplacedAddend = nullptr) {
4098  assert(IdxMulOpd == 1 || IdxMulOpd == 2);
4099 
4100  unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
4101  MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
4102  unsigned ResultReg = Root.getOperand(0).getReg();
4103  unsigned SrcReg0 = MUL->getOperand(1).getReg();
4104  bool Src0IsKill = MUL->getOperand(1).isKill();
4105  unsigned SrcReg1 = MUL->getOperand(2).getReg();
4106  bool Src1IsKill = MUL->getOperand(2).isKill();
4107 
4108  unsigned SrcReg2;
4109  bool Src2IsKill;
4110  if (ReplacedAddend) {
4111  // If we just generated a new addend, we must be it's only use.
4112  SrcReg2 = *ReplacedAddend;
4113  Src2IsKill = true;
4114  } else {
4115  SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
4116  Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
4117  }
4118 
4120  MRI.constrainRegClass(ResultReg, RC);
4122  MRI.constrainRegClass(SrcReg0, RC);
4124  MRI.constrainRegClass(SrcReg1, RC);
4126  MRI.constrainRegClass(SrcReg2, RC);
4127 
4128  MachineInstrBuilder MIB;
4129  if (kind == FMAInstKind::Default)
4130  MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4131  .addReg(SrcReg0, getKillRegState(Src0IsKill))
4132  .addReg(SrcReg1, getKillRegState(Src1IsKill))
4133  .addReg(SrcReg2, getKillRegState(Src2IsKill));
4134  else if (kind == FMAInstKind::Indexed)
4135  MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4136  .addReg(SrcReg2, getKillRegState(Src2IsKill))
4137  .addReg(SrcReg0, getKillRegState(Src0IsKill))
4138  .addReg(SrcReg1, getKillRegState(Src1IsKill))
4139  .addImm(MUL->getOperand(3).getImm());
4140  else if (kind == FMAInstKind::Accumulator)
4141  MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4142  .addReg(SrcReg2, getKillRegState(Src2IsKill))
4143  .addReg(SrcReg0, getKillRegState(Src0IsKill))
4144  .addReg(SrcReg1, getKillRegState(Src1IsKill));
4145  else
4146  assert(false && "Invalid FMA instruction kind \n");
4147  // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
4148  InsInstrs.push_back(MIB);
4149  return MUL;
4150 }
4151 
4152 /// genMaddR - Generate madd instruction and combine mul and add using
4153 /// an extra virtual register
4154 /// Example - an ADD intermediate needs to be stored in a register:
4155 /// MUL I=A,B,0
4156 /// ADD R,I,Imm
4157 /// ==> ORR V, ZR, Imm
4158 /// ==> MADD R,A,B,V
4159 /// \param MF Containing MachineFunction
4160 /// \param MRI Register information
4161 /// \param TII Target information
4162 /// \param Root is the ADD instruction
4163 /// \param [out] InsInstrs is a vector of machine instructions and will
4164 /// contain the generated madd instruction
4165 /// \param IdxMulOpd is index of operand in Root that is the result of
4166 /// the MUL. In the example above IdxMulOpd is 1.
4167 /// \param MaddOpc the opcode fo the madd instruction
4168 /// \param VR is a virtual register that holds the value of an ADD operand
4169 /// (V in the example above).
4170 /// \param RC Register class of operands
4172  const TargetInstrInfo *TII, MachineInstr &Root,
4174  unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
4175  const TargetRegisterClass *RC) {
4176  assert(IdxMulOpd == 1 || IdxMulOpd == 2);
4177 
4178  MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
4179  unsigned ResultReg = Root.getOperand(0).getReg();
4180  unsigned SrcReg0 = MUL->getOperand(1).getReg();
4181  bool Src0IsKill = MUL->getOperand(1).isKill();
4182  unsigned SrcReg1 = MUL->getOperand(2).getReg();
4183  bool Src1IsKill = MUL->getOperand(2).isKill();
4184 
4186  MRI.constrainRegClass(ResultReg, RC);
4188  MRI.constrainRegClass(SrcReg0, RC);
4190  MRI.constrainRegClass(SrcReg1, RC);
4192  MRI.constrainRegClass(VR, RC);
4193 
4194  MachineInstrBuilder MIB =
4195  BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4196  .addReg(SrcReg0, getKillRegState(Src0IsKill))
4197  .addReg(SrcReg1, getKillRegState(Src1IsKill))
4198  .addReg(VR);
4199  // Insert the MADD
4200  InsInstrs.push_back(MIB);
4201  return MUL;
4202 }
4203 
4204 /// When getMachineCombinerPatterns() finds potential patterns,
4205 /// this function generates the instructions that could replace the
4206 /// original code sequence
4208  MachineInstr &Root, MachineCombinerPattern Pattern,
4211  DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
4212  MachineBasicBlock &MBB = *Root.getParent();
4213  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
4214  MachineFunction &MF = *MBB.getParent();
4215  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
4216 
4217  MachineInstr *MUL;
4218  const TargetRegisterClass *RC;
4219  unsigned Opc;
4220  switch (Pattern) {
4221  default:
4222  // Reassociate instructions.
4223  TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
4224  DelInstrs, InstrIdxForVirtReg);
4225  return;
4228  // MUL I=A,B,0
4229  // ADD R,I,C
4230  // ==> MADD R,A,B,C
4231  // --- Create(MADD);
4232  if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
4233  Opc = AArch64::MADDWrrr;
4234  RC = &AArch64::GPR32RegClass;
4235  } else {
4236  Opc = AArch64::MADDXrrr;
4237  RC = &AArch64::GPR64RegClass;
4238  }
4239  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4240  break;
4243  // MUL I=A,B,0
4244  // ADD R,C,I
4245  // ==> MADD R,A,B,C
4246  // --- Create(MADD);
4247  if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
4248  Opc = AArch64::MADDWrrr;
4249  RC = &AArch64::GPR32RegClass;
4250  } else {
4251  Opc = AArch64::MADDXrrr;
4252  RC = &AArch64::GPR64RegClass;
4253  }
4254  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4255  break;
4258  // MUL I=A,B,0
4259  // ADD R,I,Imm
4260  // ==> ORR V, ZR, Imm
4261  // ==> MADD R,A,B,V
4262  // --- Create(MADD);
4263  const TargetRegisterClass *OrrRC;
4264  unsigned BitSize, OrrOpc, ZeroReg;
4265  if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
4266  OrrOpc = AArch64::ORRWri;
4267  OrrRC = &AArch64::GPR32spRegClass;
4268  BitSize = 32;
4269  ZeroReg = AArch64::WZR;
4270  Opc = AArch64::MADDWrrr;
4271  RC = &AArch64::GPR32RegClass;
4272  } else {
4273  OrrOpc = AArch64::ORRXri;
4274  OrrRC = &AArch64::GPR64spRegClass;
4275  BitSize = 64;
4276  ZeroReg = AArch64::XZR;
4277  Opc = AArch64::MADDXrrr;
4278  RC = &AArch64::GPR64RegClass;
4279  }
4280  unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4281  uint64_t Imm = Root.getOperand(2).getImm();
4282 
4283  if (Root.getOperand(3).isImm()) {
4284  unsigned Val = Root.getOperand(3).getImm();
4285  Imm = Imm << Val;
4286  }
4287  uint64_t UImm = SignExtend64(Imm, BitSize);
4288  uint64_t Encoding;
4289  if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4290  MachineInstrBuilder MIB1 =
4291  BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4292  .addReg(ZeroReg)
4293  .addImm(Encoding);
4294  InsInstrs.push_back(MIB1);
4295  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4296  MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4297  }
4298  break;
4299  }
4302  // MUL I=A,B,0
4303  // SUB R,I, C
4304  // ==> SUB V, 0, C
4305  // ==> MADD R,A,B,V // = -C + A*B
4306  // --- Create(MADD);
4307  const TargetRegisterClass *SubRC;
4308  unsigned SubOpc, ZeroReg;
4309  if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
4310  SubOpc = AArch64::SUBWrr;
4311  SubRC = &AArch64::GPR32spRegClass;
4312  ZeroReg = AArch64::WZR;
4313  Opc = AArch64::MADDWrrr;
4314  RC = &AArch64::GPR32RegClass;
4315  } else {
4316  SubOpc = AArch64::SUBXrr;
4317  SubRC = &AArch64::GPR64spRegClass;
4318  ZeroReg = AArch64::XZR;
4319  Opc = AArch64::MADDXrrr;
4320  RC = &AArch64::GPR64RegClass;
4321  }
4322  unsigned NewVR = MRI.createVirtualRegister(SubRC);
4323  // SUB NewVR, 0, C
4324  MachineInstrBuilder MIB1 =
4325  BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
4326  .addReg(ZeroReg)
4327  .add(Root.getOperand(2));
4328  InsInstrs.push_back(MIB1);
4329  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4330  MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4331  break;
4332  }
4335  // MUL I=A,B,0
4336  // SUB R,C,I
4337  // ==> MSUB R,A,B,C (computes C - A*B)
4338  // --- Create(MSUB);
4339  if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
4340  Opc = AArch64::MSUBWrrr;
4341  RC = &AArch64::GPR32RegClass;
4342  } else {
4343  Opc = AArch64::MSUBXrrr;
4344  RC = &AArch64::GPR64RegClass;
4345  }
4346  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4347  break;
4350  // MUL I=A,B,0
4351  // SUB R,I, Imm
4352  // ==> ORR V, ZR, -Imm
4353  // ==> MADD R,A,B,V // = -Imm + A*B
4354  // --- Create(MADD);
4355  const TargetRegisterClass *OrrRC;
4356  unsigned BitSize, OrrOpc, ZeroReg;
4357  if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
4358  OrrOpc = AArch64::ORRWri;
4359  OrrRC = &AArch64::GPR32spRegClass;
4360  BitSize = 32;
4361  ZeroReg = AArch64::WZR;
4362  Opc = AArch64::MADDWrrr;
4363  RC = &AArch64::GPR32RegClass;
4364  } else {
4365  OrrOpc = AArch64::ORRXri;
4366  OrrRC = &AArch64::GPR64spRegClass;
4367  BitSize = 64;
4368  ZeroReg = AArch64::XZR;
4369  Opc = AArch64::MADDXrrr;
4370  RC = &AArch64::GPR64RegClass;
4371  }
4372  unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4373  uint64_t Imm = Root.getOperand(2).getImm();
4374  if (Root.getOperand(3).isImm()) {
4375  unsigned Val = Root.getOperand(3).getImm();
4376  Imm = Imm << Val;
4377  }
4378  uint64_t UImm = SignExtend64(-Imm, BitSize);
4379  uint64_t Encoding;
4380  if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4381  MachineInstrBuilder MIB1 =
4382  BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4383  .addReg(ZeroReg)
4384  .addImm(Encoding);
4385  InsInstrs.push_back(MIB1);
4386  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4387  MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4388  }
4389  break;
4390  }
4391  // Floating Point Support
4394  // MUL I=A,B,0
4395  // ADD R,I,C
4396  // ==> MADD R,A,B,C
4397  // --- Create(MADD);
4398  if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
4399  Opc = AArch64::FMADDSrrr;
4400  RC = &AArch64::FPR32RegClass;
4401  } else {
4402  Opc = AArch64::FMADDDrrr;
4403  RC = &AArch64::FPR64RegClass;
4404  }
4405  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4406  break;
4409  // FMUL I=A,B,0
4410  // FADD R,C,I
4411  // ==> FMADD R,A,B,C
4412  // --- Create(FMADD);
4413  if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
4414  Opc = AArch64::FMADDSrrr;
4415  RC = &AArch64::FPR32RegClass;
4416  } else {
4417  Opc = AArch64::FMADDDrrr;
4418  RC = &AArch64::FPR64RegClass;
4419  }
4420  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4421  break;
4422 
4424  Opc = AArch64::FMLAv1i32_indexed;
4425  RC = &AArch64::FPR32RegClass;
4426  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4428  break;
4430  Opc = AArch64::FMLAv1i32_indexed;
4431  RC = &AArch64::FPR32RegClass;
4432  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4434  break;
4435 
4437  Opc = AArch64::FMLAv1i64_indexed;
4438  RC = &AArch64::FPR64RegClass;
4439  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4441  break;
4443  Opc = AArch64::FMLAv1i64_indexed;
4444  RC = &AArch64::FPR64RegClass;
4445  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4447  break;
4448 
4451  RC = &AArch64::FPR64RegClass;
4453  Opc = AArch64::FMLAv2i32_indexed;
4454  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4456  } else {
4457  Opc = AArch64::FMLAv2f32;
4458  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4460  }
4461  break;
4464  RC = &AArch64::FPR64RegClass;
4466  Opc = AArch64::FMLAv2i32_indexed;
4467  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4469  } else {
4470  Opc = AArch64::FMLAv2f32;
4471  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4473  }
4474  break;
4475 
4478  RC = &AArch64::FPR128RegClass;
4480  Opc = AArch64::FMLAv2i64_indexed;
4481  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4483  } else {
4484  Opc = AArch64::FMLAv2f64;
4485  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4487  }
4488  break;
4491  RC = &AArch64::FPR128RegClass;
4493  Opc = AArch64::FMLAv2i64_indexed;
4494  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4496  } else {
4497  Opc = AArch64::FMLAv2f64;
4498  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4500  }
4501  break;
4502 
4505  RC = &AArch64::FPR128RegClass;
4507  Opc = AArch64::FMLAv4i32_indexed;
4508  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4510  } else {
4511  Opc = AArch64::FMLAv4f32;
4512  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4514  }
4515  break;
4516 
4519  RC = &AArch64::FPR128RegClass;
4521  Opc = AArch64::FMLAv4i32_indexed;
4522  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4524  } else {
4525  Opc = AArch64::FMLAv4f32;
4526  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4528  }
4529  break;
4530 
4533  // FMUL I=A,B,0
4534  // FSUB R,I,C
4535  // ==> FNMSUB R,A,B,C // = -C + A*B
4536  // --- Create(FNMSUB);
4537  if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
4538  Opc = AArch64::FNMSUBSrrr;
4539  RC = &AArch64::FPR32RegClass;
4540  } else {
4541  Opc = AArch64::FNMSUBDrrr;
4542  RC = &AArch64::FPR64RegClass;
4543  }
4544  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4545  break;
4546  }
4547 
4550  // FNMUL I=A,B,0
4551  // FSUB R,I,C
4552  // ==> FNMADD R,A,B,C // = -A*B - C
4553  // --- Create(FNMADD);
4554  if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) {
4555  Opc = AArch64::FNMADDSrrr;
4556  RC = &AArch64::FPR32RegClass;
4557  } else {
4558  Opc = AArch64::FNMADDDrrr;
4559  RC = &AArch64::FPR64RegClass;
4560  }
4561  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4562  break;
4563  }
4564 
4567  // FMUL I=A,B,0
4568  // FSUB R,C,I
4569  // ==> FMSUB R,A,B,C (computes C - A*B)
4570  // --- Create(FMSUB);
4571  if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
4572  Opc = AArch64::FMSUBSrrr;
4573  RC = &AArch64::FPR32RegClass;
4574  } else {
4575  Opc = AArch64::FMSUBDrrr;
4576  RC = &AArch64::FPR64RegClass;
4577  }
4578  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4579  break;
4580  }
4581 
4583  Opc = AArch64::FMLSv1i32_indexed;
4584  RC = &AArch64::FPR32RegClass;
4585  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4587  break;
4588 
4590  Opc = AArch64::FMLSv1i64_indexed;
4591  RC = &AArch64::FPR64RegClass;
4592  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4594  break;
4595 
4598  RC = &AArch64::FPR64RegClass;
4600  Opc = AArch64::FMLSv2i32_indexed;
4601  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4603  } else {
4604  Opc = AArch64::FMLSv2f32;
4605  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4607  }
4608  break;
4609 
4612  RC = &AArch64::FPR128RegClass;
4614  Opc = AArch64::FMLSv2i64_indexed;
4615  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4617  } else {
4618  Opc = AArch64::FMLSv2f64;
4619  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4621  }
4622  break;
4623 
4626  RC = &AArch64::FPR128RegClass;
4628  Opc = AArch64::FMLSv4i32_indexed;
4629  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4631  } else {
4632  Opc = AArch64::FMLSv4f32;
4633  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4635  }
4636  break;
4639  RC = &AArch64::FPR64RegClass;
4640  unsigned NewVR = MRI.createVirtualRegister(RC);
4641  MachineInstrBuilder MIB1 =
4642  BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
4643  .add(Root.getOperand(2));
4644  InsInstrs.push_back(MIB1);
4645  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4647  Opc = AArch64::FMLAv2i32_indexed;
4648  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4649  FMAInstKind::Indexed, &NewVR);
4650  } else {
4651  Opc = AArch64::FMLAv2f32;
4652  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4653  FMAInstKind::Accumulator, &NewVR);
4654  }
4655  break;
4656  }
4659  RC = &AArch64::FPR128RegClass;
4660  unsigned NewVR = MRI.createVirtualRegister(RC);
4661  MachineInstrBuilder MIB1 =
4662  BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
4663  .add(Root.getOperand(2));
4664  InsInstrs.push_back(MIB1);
4665  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4667  Opc = AArch64::FMLAv4i32_indexed;
4668  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4669  FMAInstKind::Indexed, &NewVR);
4670  } else {
4671  Opc = AArch64::FMLAv4f32;
4672  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4673  FMAInstKind::Accumulator, &NewVR);
4674  }
4675  break;
4676  }
4679  RC = &AArch64::FPR128RegClass;
4680  unsigned NewVR = MRI.createVirtualRegister(RC);
4681  MachineInstrBuilder MIB1 =
4682  BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
4683  .add(Root.getOperand(2));
4684  InsInstrs.push_back(MIB1);
4685  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4687  Opc = AArch64::FMLAv2i64_indexed;
4688  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4689  FMAInstKind::Indexed, &NewVR);
4690  } else {
4691  Opc = AArch64::FMLAv2f64;
4692  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4693  FMAInstKind::Accumulator, &NewVR);
4694  }
4695  break;
4696  }
4697  } // end switch (Pattern)
4698  // Record MUL and ADD/SUB for deletion
4699  DelInstrs.push_back(MUL);
4700  DelInstrs.push_back(&Root);
4701 }
4702 
4703 /// Replace csincr-branch sequence by simple conditional branch
4704 ///
4705 /// Examples:
4706 /// 1. \code
4707 /// csinc w9, wzr, wzr, <condition code>
4708 /// tbnz w9, #0, 0x44
4709 /// \endcode
4710 /// to
4711 /// \code
4712 /// b.<inverted condition code>
4713 /// \endcode
4714 ///
4715 /// 2. \code
4716 /// csinc w9, wzr, wzr, <condition code>
4717 /// tbz w9, #0, 0x44
4718 /// \endcode
4719 /// to
4720 /// \code
4721 /// b.<condition code>
4722 /// \endcode
4723 ///
4724 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
4725 /// compare's constant operand is power of 2.
4726 ///
4727 /// Examples:
4728 /// \code
4729 /// and w8, w8, #0x400
4730 /// cbnz w8, L1
4731 /// \endcode
4732 /// to
4733 /// \code
4734 /// tbnz w8, #10, L1
4735 /// \endcode
4736 ///
4737 /// \param MI Conditional Branch
4738 /// \return True when the simple conditional branch is generated
4739 ///
4741  bool IsNegativeBranch = false;
4742  bool IsTestAndBranch = false;
4743  unsigned TargetBBInMI = 0;
4744  switch (MI.getOpcode()) {
4745  default:
4746  llvm_unreachable("Unknown branch instruction?");
4747  case AArch64::Bcc:
4748  return false;
4749  case AArch64::CBZW:
4750  case AArch64::CBZX:
4751  TargetBBInMI = 1;
4752  break;
4753  case AArch64::CBNZW:
4754  case AArch64::CBNZX:
4755  TargetBBInMI = 1;
4756  IsNegativeBranch = true;
4757  break;
4758  case AArch64::TBZW:
4759  case AArch64::TBZX:
4760  TargetBBInMI = 2;
4761  IsTestAndBranch = true;
4762  break;
4763  case AArch64::TBNZW:
4764  case AArch64::TBNZX:
4765  TargetBBInMI = 2;
4766  IsNegativeBranch = true;
4767  IsTestAndBranch = true;
4768  break;
4769  }
4770  // So we increment a zero register and test for bits other
4771  // than bit 0? Conservatively bail out in case the verifier
4772  // missed this case.
4773  if (IsTestAndBranch && MI.getOperand(1).getImm())
4774  return false;
4775 
4776  // Find Definition.
4777  assert(MI.getParent() && "Incomplete machine instruciton\n");
4778  MachineBasicBlock *MBB = MI.getParent();
4779  MachineFunction *MF = MBB->getParent();
4780  MachineRegisterInfo *MRI = &MF->getRegInfo();
4781  unsigned VReg = MI.getOperand(0).getReg();
4783  return false;
4784 
4785  MachineInstr *DefMI = MRI->getVRegDef(VReg);
4786 
4787  // Look through COPY instructions to find definition.
4788  while (DefMI->isCopy()) {
4789  unsigned CopyVReg = DefMI->getOperand(1).getReg();
4790  if (!MRI->hasOneNonDBGUse(CopyVReg))
4791  return false;
4792  if (!MRI->hasOneDef(CopyVReg))
4793  return false;
4794  DefMI = MRI->getVRegDef(CopyVReg);
4795  }
4796 
4797  switch (DefMI->getOpcode()) {
4798  default:
4799  return false;
4800  // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
4801  case AArch64::ANDWri:
4802  case AArch64::ANDXri: {
4803  if (IsTestAndBranch)
4804  return false;
4805  if (DefMI->getParent() != MBB)
4806  return false;
4807  if (!MRI->hasOneNonDBGUse(VReg))
4808  return false;
4809 
4810  bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
4812  DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
4813  if (!isPowerOf2_64(Mask))
4814  return false;
4815 
4816  MachineOperand &MO = DefMI->getOperand(1);
4817  unsigned NewReg = MO.getReg();
4819  return false;
4820 
4821  assert(!MRI->def_empty(NewReg) && "Register must be defined.");
4822 
4823  MachineBasicBlock &RefToMBB = *MBB;
4824  MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
4825  DebugLoc DL = MI.getDebugLoc();
4826  unsigned Imm = Log2_64(Mask);
4827  unsigned Opc = (Imm < 32)
4828  ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
4829  : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
4830  MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
4831  .addReg(NewReg)
4832  .addImm(Imm)
4833  .addMBB(TBB);
4834  // Register lives on to the CBZ now.
4835  MO.setIsKill(false);
4836 
4837  // For immediate smaller than 32, we need to use the 32-bit
4838  // variant (W) in all cases. Indeed the 64-bit variant does not
4839  // allow to encode them.
4840  // Therefore, if the input register is 64-bit, we need to take the
4841  // 32-bit sub-part.
4842  if (!Is32Bit && Imm < 32)
4843  NewMI->getOperand(0).setSubReg(AArch64::sub_32);
4844  MI.eraseFromParent();
4845  return true;
4846  }
4847  // Look for CSINC
4848  case AArch64::CSINCWr:
4849  case AArch64::CSINCXr: {
4850  if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
4851  DefMI->getOperand(2).getReg() == AArch64::WZR) &&
4852  !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
4853  DefMI->getOperand(2).getReg() == AArch64::XZR))
4854  return false;
4855 
4856  if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
4857  return false;
4858 
4860  // Convert only when the condition code is not modified between
4861  // the CSINC and the branch. The CC may be used by other
4862  // instructions in between.
4864  return false;
4865  MachineBasicBlock &RefToMBB = *MBB;
4866  MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
4867  DebugLoc DL = MI.getDebugLoc();
4868  if (IsNegativeBranch)
4870  BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
4871  MI.eraseFromParent();
4872  return true;
4873  }
4874  }
4875 }
4876 
4877 std::pair<unsigned, unsigned>
4879  const unsigned Mask = AArch64II::MO_FRAGMENT;
4880  return std::make_pair(TF & Mask, TF & ~Mask);
4881 }
4882 
4885  using namespace AArch64II;
4886 
4887  static const std::pair<unsigned, const char *> TargetFlags[] = {
4888  {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
4889  {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
4890  {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
4891  {MO_HI12, "aarch64-hi12"}};
4892  return makeArrayRef(TargetFlags);
4893 }
4894 
4897  using namespace AArch64II;
4898 
4899  static const std::pair<unsigned, const char *> TargetFlags[] = {
4900  {MO_COFFSTUB, "aarch64-coffstub"},
4901  {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"},
4902  {MO_TLS, "aarch64-tls"}, {MO_DLLIMPORT, "aarch64-dllimport"}};
4903  return makeArrayRef(TargetFlags);
4904 }
4905 
4908  static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
4909  {{MOSuppressPair, "aarch64-suppress-pair"},
4910  {MOStridedAccess, "aarch64-strided-access"}};
4911  return makeArrayRef(TargetFlags);
4912 }
4913 
4914 /// Constants defining how certain sequences should be outlined.
4915 /// This encompasses how an outlined function should be called, and what kind of
4916 /// frame should be emitted for that outlined function.
4917 ///
4918 /// \p MachineOutlinerDefault implies that the function should be called with
4919 /// a save and restore of LR to the stack.
4920 ///
4921 /// That is,
4922 ///
4923 /// I1 Save LR OUTLINED_FUNCTION:
4924 /// I2 --> BL OUTLINED_FUNCTION I1
4925 /// I3 Restore LR I2
4926 /// I3
4927 /// RET
4928 ///
4929 /// * Call construction overhead: 3 (save + BL + restore)
4930 /// * Frame construction overhead: 1 (ret)
4931 /// * Requires stack fixups? Yes
4932 ///
4933 /// \p MachineOutlinerTailCall implies that the function is being created from
4934 /// a sequence of instructions ending in a return.
4935 ///
4936 /// That is,
4937 ///
4938 /// I1 OUTLINED_FUNCTION:
4939 /// I2 --> B OUTLINED_FUNCTION I1
4940 /// RET I2
4941 /// RET
4942 ///
4943 /// * Call construction overhead: 1 (B)
4944 /// * Frame construction overhead: 0 (Return included in sequence)
4945 /// * Requires stack fixups? No
4946 ///
4947 /// \p MachineOutlinerNoLRSave implies that the function should be called using
4948 /// a BL instruction, but doesn't require LR to be saved and restored. This
4949 /// happens when LR is known to be dead.
4950 ///
4951 /// That is,
4952 ///
4953 /// I1 OUTLINED_FUNCTION:
4954 /// I2 --> BL OUTLINED_FUNCTION I1
4955 /// I3 I2
4956 /// I3
4957 /// RET
4958 ///
4959 /// * Call construction overhead: 1 (BL)
4960 /// * Frame construction overhead: 1 (RET)
4961 /// * Requires stack fixups? No
4962 ///
4963 /// \p MachineOutlinerThunk implies that the function is being created from
4964 /// a sequence of instructions ending in a call. The outlined function is
4965 /// called with a BL instruction, and the outlined function tail-calls the
4966 /// original call destination.
4967 ///
4968 /// That is,
4969 ///
4970 /// I1 OUTLINED_FUNCTION:
4971 /// I2 --> BL OUTLINED_FUNCTION I1
4972 /// BL f I2
4973 /// B f
4974 /// * Call construction overhead: 1 (BL)
4975 /// * Frame construction overhead: 0
4976 /// * Requires stack fixups? No
4977 ///
4978 /// \p MachineOutlinerRegSave implies that the function should be called with a
4979 /// save and restore of LR to an available register. This allows us to avoid
4980 /// stack fixups. Note that this outlining variant is compatible with the
4981 /// NoLRSave case.
4982 ///
4983 /// That is,
4984 ///
4985 /// I1 Save LR OUTLINED_FUNCTION:
4986 /// I2 --> BL OUTLINED_FUNCTION I1
4987 /// I3 Restore LR I2
4988 /// I3
4989 /// RET
4990 ///
4991 /// * Call construction overhead: 3 (save + BL + restore)
4992 /// * Frame construction overhead: 1 (ret)
4993 /// * Requires stack fixups? No
4995  MachineOutlinerDefault, /// Emit a save, restore, call, and return.
4996  MachineOutlinerTailCall, /// Only emit a branch.
4997  MachineOutlinerNoLRSave, /// Emit a call and return.
4998  MachineOutlinerThunk, /// Emit a call and tail-call.
4999  MachineOutlinerRegSave /// Same as default, but save to a register.
5000 };
5001 
5004  HasCalls = 0x4
5005 };
5006 
5007 unsigned
5008 AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
5009  MachineFunction *MF = C.getMF();
5010  const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
5011  MF->getSubtarget().getRegisterInfo());
5012 
5013  // Check if there is an available register across the sequence that we can
5014  // use.
5015  for (unsigned Reg : AArch64::GPR64RegClass) {
5016  if (!ARI->isReservedReg(*MF, Reg) &&
5017  Reg != AArch64::LR && // LR is not reserved, but don't use it.
5018  Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
5019  Reg != AArch64::X17 && // Ditto for X17.
5021  return Reg;
5022  }
5023 
5024  // No suitable register. Return 0.
5025  return 0u;
5026 }
5027 
5030  std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
5031  unsigned SequenceSize = std::accumulate(
5032  RepeatedSequenceLocs[0].front(),
5033  std::next(RepeatedSequenceLocs[0].back()),
5034  0, [this](unsigned Sum, const MachineInstr &MI) {
5035  return Sum + getInstSizeInBytes(MI);
5036  });
5037 
5038  // Compute liveness information for each candidate.
5040  std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
5041  [&TRI](outliner::Candidate &C) { C.initLRU(TRI); });
5042 
5043  // According to the AArch64 Procedure Call Standard, the following are
5044  // undefined on entry/exit from a function call:
5045  //
5046  // * Registers x16, x17, (and thus w16, w17)
5047  // * Condition codes (and thus the NZCV register)
5048  //
5049  // Because if this, we can't outline any sequence of instructions where
5050  // one
5051  // of these registers is live into/across it. Thus, we need to delete
5052  // those
5053  // candidates.
5054  auto CantGuaranteeValueAcrossCall = [](outliner::Candidate &C) {
5055  LiveRegUnits LRU = C.LRU;
5056  return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) ||
5057  !LRU.available(AArch64::NZCV));
5058  };
5059 
5060  // Erase every candidate that violates the restrictions above. (It could be
5061  // true that we have viable candidates, so it's not worth bailing out in
5062  // the case that, say, 1 out of 20 candidates violate the restructions.)
5063  RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
5064  RepeatedSequenceLocs.end(),
5065  CantGuaranteeValueAcrossCall),
5066  RepeatedSequenceLocs.end());
5067 
5068  // If the sequence is empty, we're done.
5069  if (RepeatedSequenceLocs.empty())
5070  return outliner::OutlinedFunction();
5071 
5072  // At this point, we have only "safe" candidates to outline. Figure out
5073  // frame + call instruction information.
5074 
5075  unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
5076 
5077  // Helper lambda which sets call information for every candidate.
5078  auto SetCandidateCallInfo =
5079  [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
5080  for (outliner::Candidate &C : RepeatedSequenceLocs)
5081  C.setCallInfo(CallID, NumBytesForCall);
5082  };
5083 
5084  unsigned FrameID = MachineOutlinerDefault;
5085  unsigned NumBytesToCreateFrame = 4;
5086 
5087  bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
5088  return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement");
5089  });
5090 
5091  // If the last instruction in any candidate is a terminator, then we should
5092  // tail call all of the candidates.
5093  if (RepeatedSequenceLocs[0].back()->isTerminator()) {
5094  FrameID = MachineOutlinerTailCall;
5095  NumBytesToCreateFrame = 0;
5096  SetCandidateCallInfo(MachineOutlinerTailCall, 4);
5097  }
5098 
5099  else if (LastInstrOpcode == AArch64::BL ||
5100  (LastInstrOpcode == AArch64::BLR && !HasBTI)) {
5101  // FIXME: Do we need to check if the code after this uses the value of LR?
5102  FrameID = MachineOutlinerThunk;
5103  NumBytesToCreateFrame = 0;
5104  SetCandidateCallInfo(MachineOutlinerThunk, 4);
5105  }
5106 
5107  // Make sure that LR isn't live on entry to this candidate. The only
5108  // instructions that use LR that could possibly appear in a repeated sequence
5109  // are calls. Therefore, we only have to check and see if LR is dead on entry
5110  // to (or exit from) some candidate.
5111  else if (std::all_of(RepeatedSequenceLocs.begin(),
5112  RepeatedSequenceLocs.end(),
5113  [](outliner::Candidate &C) {
5114  return C.LRU.available(AArch64::LR);
5115  })) {
5116  FrameID = MachineOutlinerNoLRSave;
5117  NumBytesToCreateFrame = 4;
5118  SetCandidateCallInfo(MachineOutlinerNoLRSave, 4);
5119  }
5120 
5121  // LR is live, so we need to save it. Decide whether it should be saved to
5122  // the stack, or if it can be saved to a register.
5123  else {
5124  if (all_of(RepeatedSequenceLocs, [this](outliner::Candidate &C) {
5125  return findRegisterToSaveLRTo(C);
5126  })) {
5127  // Every candidate has an available callee-saved register for the save.
5128  // We can save LR to a register.
5129  FrameID = MachineOutlinerRegSave;
5130  NumBytesToCreateFrame = 4;
5131  SetCandidateCallInfo(MachineOutlinerRegSave, 12);
5132  }
5133 
5134  else {
5135  // At least one candidate does not have an available callee-saved
5136  // register. We must save LR to the stack.
5137  FrameID = MachineOutlinerDefault;
5138  NumBytesToCreateFrame = 4;
5139  SetCandidateCallInfo(MachineOutlinerDefault, 12);
5140  }
5141  }
5142 
5143  // Check if the range contains a call. These require a save + restore of the
5144  // link register.
5145  if (std::any_of(RepeatedSequenceLocs[0].front(),
5146  RepeatedSequenceLocs[0].back(),
5147  [](const MachineInstr &MI) { return MI.isCall(); }))
5148  NumBytesToCreateFrame += 8; // Save + restore the link register.
5149 
5150  // Handle the last instruction separately. If this is a tail call, then the
5151  // last instruction is a call. We don't want to save + restore in this case.
5152  // However, it could be possible that the last instruction is a call without
5153  // it being valid to tail call this sequence. We should consider this as well.
5154  else if (FrameID != MachineOutlinerThunk &&
5155  FrameID != MachineOutlinerTailCall &&
5156  RepeatedSequenceLocs[0].back()->isCall())
5157  NumBytesToCreateFrame += 8;
5158 
5159  return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
5160  NumBytesToCreateFrame, FrameID);
5161 }
5162 
5164  MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
5165  const Function &F = MF.getFunction();
5166 
5167  // Can F be deduplicated by the linker? If it can, don't outline from it.
5168  if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
5169  return false;
5170 
5171  // Don't outline from functions with section markings; the program could
5172  // expect that all the code is in the named section.
5173  // FIXME: Allow outlining from multiple functions with the same section
5174  // marking.
5175  if (F.hasSection())
5176  return false;
5177 
5178  // Outlining from functions with redzones is unsafe since the outliner may
5179  // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
5180  // outline from it.
5182  if (!AFI || AFI->hasRedZone().getValueOr(true))
5183  return false;
5184 
5185  // It's safe to outline from MF.
5186  return true;
5187 }
5188 
5189 unsigned
5191  unsigned Flags = 0x0;
5192  // Check if there's a call inside this MachineBasicBlock. If there is, then
5193  // set a flag.
5194  if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
5196 
5197  // Check if LR is available through all of the MBB. If it's not, then set
5198  // a flag.
5200  "Suitable Machine Function for outlining must track liveness");
5202  LRU.addLiveOuts(MBB);
5203 
5204  std::for_each(MBB.rbegin(),
5205  MBB.rend(),
5206  [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
5207 
5208  if (!LRU.available(AArch64::LR))
5210 
5211  return Flags;
5212 }
5213 
5216  unsigned Flags) const {
5217  MachineInstr &MI = *MIT;
5218  MachineBasicBlock *MBB = MI.getParent();
5219  MachineFunction *MF = MBB->getParent();
5220  AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
5221 
5222  // Don't outline LOHs.
5223  if (FuncInfo->getLOHRelated().count(&MI))
5225 
5226  // Don't allow debug values to impact outlining type.
5227  if (MI.isDebugInstr() || MI.isIndirectDebugValue())
5229 
5230  // At this point, KILL instructions don't really tell us much so we can go
5231  // ahead and skip over them.
5232  if (MI.isKill())
5234 
5235  // Is this a terminator for a basic block?
5236  if (MI.isTerminator()) {
5237 
5238  // Is this the end of a function?
5239  if (MI.getParent()->succ_empty())
5241 
5242  // It's not, so don't outline it.
5244  }
5245 
5246  // Make sure none of the operands are un-outlinable.
5247  for (const MachineOperand &MOP : MI.operands()) {
5248  if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
5249  MOP.isTargetIndex())
5251 
5252  // If it uses LR or W30 explicitly, then don't touch it.
5253  if (MOP.isReg() && !MOP.isImplicit() &&
5254  (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
5256  }
5257 
5258  // Special cases for instructions that can always be outlined, but will fail
5259  // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
5260  // be outlined because they don't require a *specific* value to be in LR.
5261  if (MI.getOpcode() == AArch64::ADRP)
5263 
5264  // If MI is a call we might be able to outline it. We don't want to outline
5265  // any calls that rely on the position of items on the stack. When we outline
5266  // something containing a call, we have to emit a save and restore of LR in
5267  // the outlined function. Currently, this always happens by saving LR to the
5268  // stack. Thus, if we outline, say, half the parameters for a function call
5269  // plus the call, then we'll break the callee's expectations for the layout
5270  // of the stack.
5271  //
5272  // FIXME: Allow calls to functions which construct a stack frame, as long
5273  // as they don't access arguments on the stack.
5274  // FIXME: Figure out some way to analyze functions defined in other modules.
5275  // We should be able to compute the memory usage based on the IR calling
5276  // convention, even if we can't see the definition.
5277  if (MI.isCall()) {
5278  // Get the function associated with the call. Look at each operand and find
5279  // the one that represents the callee and get its name.
5280  const Function *Callee = nullptr;
5281  for (const MachineOperand &MOP : MI.operands()) {
5282  if (MOP.isGlobal()) {
5283  Callee = dyn_cast<Function>(MOP.getGlobal());
5284  break;
5285  }
5286  }
5287 
5288  // Never outline calls to mcount. There isn't any rule that would require
5289  // this, but the Linux kernel's "ftrace" feature depends on it.
5290  if (Callee && Callee->getName() == "\01_mcount")
5292 
5293  // If we don't know anything about the callee, assume it depends on the
5294  // stack layout of the caller. In that case, it's only legal to outline
5295  // as a tail-call. Whitelist the call instructions we know about so we
5296  // don't get unexpected results with call pseudo-instructions.
5297  auto UnknownCallOutlineType = outliner::InstrType::Illegal;
5298  if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL)
5299  UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
5300 
5301  if (!Callee)
5302  return UnknownCallOutlineType;
5303 
5304  // We have a function we have information about. Check it if it's something
5305  // can safely outline.
5306  MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
5307 
5308  // We don't know what's going on with the callee at all. Don't touch it.
5309  if (!CalleeMF)
5310  return UnknownCallOutlineType;
5311 
5312  // Check if we know anything about the callee saves on the function. If we
5313  // don't, then don't touch it, since that implies that we haven't
5314  // computed anything about its stack frame yet.
5315  MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
5316  if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
5317  MFI.getNumObjects() > 0)
5318  return UnknownCallOutlineType;
5319 
5320  // At this point, we can say that CalleeMF ought to not pass anything on the
5321  // stack. Therefore, we can outline it.
5323  }
5324 
5325  // Don't outline positions.
5326  if (MI.isPosition())
5328 
5329  // Don't touch the link register or W30.
5330  if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
5331  MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
5333 
5334  // Does this use the stack?
5335  if (MI.modifiesRegister(AArch64::SP, &RI) ||
5336  MI.readsRegister(AArch64::SP, &RI)) {
5337  // True if there is no chance that any outlined candidate from this range
5338  // could require stack fixups. That is, both
5339  // * LR is available in the range (No save/restore around call)
5340  // * The range doesn't include calls (No save/restore in outlined frame)
5341  // are true.
5342  // FIXME: This is very restrictive; the flags check the whole block,
5343  // not just the bit we will try to outline.
5344  bool MightNeedStackFixUp =
5347 
5348  // If this instruction is in a range where it *never* needs to be fixed
5349  // up, then we can *always* outline it. This is true even if it's not
5350  // possible to fix that instruction up.
5351  //
5352  // Why? Consider two equivalent instructions I1, I2 where both I1 and I2
5353  // use SP. Suppose that I1 sits within a range that definitely doesn't
5354  // need stack fixups, while I2 sits in a range that does.
5355  //
5356  // First, I1 can be outlined as long as we *never* fix up the stack in
5357  // any sequence containing it. I1 is already a safe instruction in the
5358  // original program, so as long as we don't modify it we're good to go.
5359  // So this leaves us with showing that outlining I2 won't break our
5360  // program.
5361  //
5362  // Suppose I1 and I2 belong to equivalent candidate sequences. When we
5363  // look at I2, we need to see if it can be fixed up. Suppose I2, (and
5364  // thus I1) cannot be fixed up. Then I2 will be assigned an unique
5365  // integer label; thus, I2 cannot belong to any candidate sequence (a
5366  // contradiction). Suppose I2 can be fixed up. Then I1 can be fixed up
5367  // as well, so we're good. Thus, I1 is always safe to outline.
5368  //
5369  // This gives us two things: first off, it buys us some more instructions
5370  // for our search space by deeming stack instructions illegal only when
5371  // they can't be fixed up AND we might have to fix them up. Second off,
5372  // This allows us to catch tricky instructions like, say,
5373  // %xi = ADDXri %sp, n, 0. We can't safely outline these since they might
5374  // be paired with later SUBXris, which might *not* end up being outlined.
5375  // If we mess with the stack to save something, then an ADDXri messes with
5376  // it *after*, then we aren't going to restore the right something from
5377  // the stack if we don't outline the corresponding SUBXri first. ADDXris and
5378  // SUBXris are extremely common in prologue/epilogue code, so supporting
5379  // them in the outliner can be a pretty big win!
5380  if (!MightNeedStackFixUp)
5382 
5383  // Any modification of SP will break our code to save/restore LR.
5384  // FIXME: We could handle some instructions which add a constant offset to
5385  // SP, with a bit more work.
5386  if (MI.modifiesRegister(AArch64::SP, &RI))
5388 
5389  // At this point, we have a stack instruction that we might need to fix
5390  // up. We'll handle it if it's a load or store.
5391  if (MI.mayLoadOrStore()) {
5392  unsigned Base; // Filled with the base regiser of MI.
5393  int64_t Offset; // Filled with the offset of MI.
5394  unsigned DummyWidth;
5395 
5396  // Does it allow us to offset the base register and is the base SP?
5397  if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) ||
5398  Base != AArch64::SP)
5400 
5401  // Find the minimum/maximum offset for this instruction and check if
5402  // fixing it up would be in range.
5403  int64_t MinOffset, MaxOffset; // Unscaled offsets for the instruction.
5404  unsigned Scale; // The scale to multiply the offsets by.
5405  getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
5406 
5407  // TODO: We should really test what happens if an instruction overflows.
5408  // This is tricky to test with IR tests, but when the outliner is moved
5409  // to a MIR test, it really ought to be checked.
5410  Offset += 16; // Update the offset to what it would be if we outlined.
5411  if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
5413 
5414  // It's in range, so we can outline it.
5416  }
5417 
5418  // FIXME: Add handling for instructions like "add x0, sp, #8".
5419 
5420  // We can't fix it up, so don't outline it.
5422  }
5423 
5425 }
5426 
5427 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
5428  for (MachineInstr &MI : MBB) {
5429  unsigned Base, Width;
5430  int64_t Offset;
5431 
5432  // Is this a load or store with an immediate offset with SP as the base?
5433  if (!MI.mayLoadOrStore() ||
5434  !getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Width, &RI) ||
5435  Base != AArch64::SP)
5436  continue;
5437 
5438  // It is, so we have to fix it up.
5439  unsigned Scale;
5440  int64_t Dummy1, Dummy2;
5441 
5442  MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
5443  assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
5444  getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
5445  assert(Scale != 0 && "Unexpected opcode!");
5446 
5447  // We've pushed the return address to the stack, so add 16 to the offset.
5448  // This is safe, since we already checked if it would overflow when we
5449  // checked if this instruction was legal to outline.
5450  int64_t NewImm = (Offset + 16) / Scale;
5451  StackOffsetOperand.setImm(NewImm);
5452  }
5453 }
5454 
5457  const outliner::OutlinedFunction &OF) const {
5458  // For thunk outlining, rewrite the last instruction from a call to a
5459  // tail-call.
5461  MachineInstr *Call = &*--MBB.instr_end();
5462  unsigned TailOpcode;
5463  if (Call->getOpcode() == AArch64::BL) {
5464  TailOpcode = AArch64::TCRETURNdi;
5465  } else {
5466  assert(Call->getOpcode() == AArch64::BLR);
5467  TailOpcode = AArch64::TCRETURNriALL;
5468  }
5469  MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
5470  .add(Call->getOperand(0))
5471  .addImm(0);
5472  MBB.insert(MBB.end(), TC);
5473  Call->eraseFromParent();
5474  }
5475 
5476  // Is there a call in the outlined range?
5477  auto IsNonTailCall = [](MachineInstr &MI) {
5478  return MI.isCall() && !MI.isReturn();
5479  };
5480  if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) {
5481  // Fix up the instructions in the range, since we're going to modify the
5482  // stack.
5484  "Can only fix up stack references once");
5485  fixupPostOutline(MBB);
5486 
5487  // LR has to be a live in so that we can save it.
5488  MBB.addLiveIn(AArch64::LR);
5489 
5491  MachineBasicBlock::iterator Et = MBB.end();
5492 
5495  Et = std::prev(MBB.end());
5496 
5497  // Insert a save before the outlined region
5498  MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
5499  .addReg(AArch64::SP, RegState::Define)
5500  .addReg(AArch64::LR)
5501  .addReg(AArch64::SP)
5502  .addImm(-16);
5503  It = MBB.insert(It, STRXpre);
5504 
5505  const TargetSubtargetInfo &STI = MF.getSubtarget();
5506  const MCRegisterInfo *MRI = STI.getRegisterInfo();
5507  unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
5508 
5509  // Add a CFI saying the stack was moved 16 B down.
5510  int64_t StackPosEntry =
5512  BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
5513  .addCFIIndex(StackPosEntry)
5515 
5516  // Add a CFI saying that the LR that we want to find is now 16 B higher than
5517  // before.
5518  int64_t LRPosEntry =
5519  MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, 16));
5520  BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
5521  .addCFIIndex(LRPosEntry)
5523 
5524  // Insert a restore before the terminator for the function.
5525  MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
5526  .addReg(AArch64::SP, RegState::Define)
5527  .addReg(AArch64::LR, RegState::Define)
5528  .addReg(AArch64::SP)
5529  .addImm(16);
5530  Et = MBB.insert(Et, LDRXpost);
5531  }
5532 
5533  // If this is a tail call outlined function, then there's already a return.
5536  return;
5537 
5538  // It's not a tail call, so we have to insert the return ourselves.
5539  MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
5540  .addReg(AArch64::LR, RegState::Undef);
5541  MBB.insert(MBB.end(), ret);
5542 
5543  // Did we have to modify the stack by saving the link register?
5545  return;
5546 
5547  // We modified the stack.
5548  // Walk over the basic block and fix up all the stack accesses.
5549  fixupPostOutline(MBB);
5550 }
5551 
5554  MachineFunction &MF, const outliner::Candidate &C) const {
5555 
5556  // Are we tail calling?
5558  // If yes, then we can just branch to the label.
5559  It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
5560  .addGlobalAddress(M.getNamedValue(MF.getName()))
5561  .addImm(0));
5562  return It;
5563  }
5564 
5565  // Are we saving the link register?
5568  // No, so just insert the call.
5569  It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
5570  .addGlobalAddress(M.getNamedValue(MF.getName())));
5571  return It;
5572  }
5573 
5574  // We want to return the spot where we inserted the call.
5576 
5577  // Instructions for saving and restoring LR around the call instruction we're
5578  // going to insert.
5579  MachineInstr *Save;
5580  MachineInstr *Restore;
5581  // Can we save to a register?
5583  // FIXME: This logic should be sunk into a target-specific interface so that
5584  // we don't have to recompute the register.
5585  unsigned Reg = findRegisterToSaveLRTo(C);
5586  assert(Reg != 0 && "No callee-saved register available?");
5587 
5588  // Save and restore LR from that register.
5589  Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
5590  .addReg(AArch64::XZR)
5591  .addReg(AArch64::LR)
5592  .addImm(0);
5593  Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
5594  .addReg(AArch64::XZR)
5595  .addReg(Reg)
5596  .addImm(0);
5597  } else {
5598  // We have the default case. Save and restore from SP.
5599  Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
5600  .addReg(AArch64::SP, RegState::Define)
5601  .addReg(AArch64::LR)
5602  .addReg(AArch64::SP)
5603  .<