LLVM  7.0.0svn
AArch64InstrInfo.cpp
Go to the documentation of this file.
1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the AArch64 implementation of the TargetInstrInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64InstrInfo.h"
16 #include "AArch64Subtarget.h"
18 #include "Utils/AArch64BaseInfo.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/CodeGen/StackMaps.h"
34 #include "llvm/IR/DebugLoc.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/MC/MCInst.h"
37 #include "llvm/MC/MCInstrDesc.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/CodeGen.h"
41 #include "llvm/Support/Compiler.h"
46 #include <cassert>
47 #include <cstdint>
48 #include <iterator>
49 #include <utility>
50 
51 using namespace llvm;
52 
53 #define GET_INSTRINFO_CTOR_DTOR
54 #include "AArch64GenInstrInfo.inc"
55 
57  "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
58  cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
59 
61  "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
62  cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
63 
64 static cl::opt<unsigned>
65  BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
66  cl::desc("Restrict range of Bcc instructions (DEBUG)"));
67 
69  : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
70  RI(STI.getTargetTriple()), Subtarget(STI) {}
71 
72 /// GetInstSize - Return the number of bytes of code the specified
73 /// instruction may be. This returns the maximum number of bytes.
75  const MachineBasicBlock &MBB = *MI.getParent();
76  const MachineFunction *MF = MBB.getParent();
77  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
78 
79  if (MI.getOpcode() == AArch64::INLINEASM)
80  return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
81 
82  // FIXME: We currently only handle pseudoinstructions that don't get expanded
83  // before the assembly printer.
84  unsigned NumBytes = 0;
85  const MCInstrDesc &Desc = MI.getDesc();
86  switch (Desc.getOpcode()) {
87  default:
88  // Anything not explicitly designated otherwise is a normal 4-byte insn.
89  NumBytes = 4;
90  break;
91  case TargetOpcode::DBG_VALUE:
93  case TargetOpcode::IMPLICIT_DEF:
94  case TargetOpcode::KILL:
95  NumBytes = 0;
96  break;
97  case TargetOpcode::STACKMAP:
98  // The upper bound for a stackmap intrinsic is the full length of its shadow
99  NumBytes = StackMapOpers(&MI).getNumPatchBytes();
100  assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
101  break;
102  case TargetOpcode::PATCHPOINT:
103  // The size of the patchpoint intrinsic is the number of bytes requested
104  NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
105  assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
106  break;
108  // This gets lowered to an instruction sequence which takes 16 bytes
109  NumBytes = 16;
110  break;
111  }
112 
113  return NumBytes;
114 }
115 
118  // Block ends with fall-through condbranch.
119  switch (LastInst->getOpcode()) {
120  default:
121  llvm_unreachable("Unknown branch instruction?");
122  case AArch64::Bcc:
123  Target = LastInst->getOperand(1).getMBB();
124  Cond.push_back(LastInst->getOperand(0));
125  break;
126  case AArch64::CBZW:
127  case AArch64::CBZX:
128  case AArch64::CBNZW:
129  case AArch64::CBNZX:
130  Target = LastInst->getOperand(1).getMBB();
132  Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
133  Cond.push_back(LastInst->getOperand(0));
134  break;
135  case AArch64::TBZW:
136  case AArch64::TBZX:
137  case AArch64::TBNZW:
138  case AArch64::TBNZX:
139  Target = LastInst->getOperand(2).getMBB();
141  Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
142  Cond.push_back(LastInst->getOperand(0));
143  Cond.push_back(LastInst->getOperand(1));
144  }
145 }
146 
147 static unsigned getBranchDisplacementBits(unsigned Opc) {
148  switch (Opc) {
149  default:
150  llvm_unreachable("unexpected opcode!");
151  case AArch64::B:
152  return 64;
153  case AArch64::TBNZW:
154  case AArch64::TBZW:
155  case AArch64::TBNZX:
156  case AArch64::TBZX:
157  return TBZDisplacementBits;
158  case AArch64::CBNZW:
159  case AArch64::CBZW:
160  case AArch64::CBNZX:
161  case AArch64::CBZX:
162  return CBZDisplacementBits;
163  case AArch64::Bcc:
164  return BCCDisplacementBits;
165  }
166 }
167 
169  int64_t BrOffset) const {
170  unsigned Bits = getBranchDisplacementBits(BranchOp);
171  assert(Bits >= 3 && "max branch displacement must be enough to jump"
172  "over conditional branch expansion");
173  return isIntN(Bits, BrOffset / 4);
174 }
175 
178  switch (MI.getOpcode()) {
179  default:
180  llvm_unreachable("unexpected opcode!");
181  case AArch64::B:
182  return MI.getOperand(0).getMBB();
183  case AArch64::TBZW:
184  case AArch64::TBNZW:
185  case AArch64::TBZX:
186  case AArch64::TBNZX:
187  return MI.getOperand(2).getMBB();
188  case AArch64::CBZW:
189  case AArch64::CBNZW:
190  case AArch64::CBZX:
191  case AArch64::CBNZX:
192  case AArch64::Bcc:
193  return MI.getOperand(1).getMBB();
194  }
195 }
196 
197 // Branch analysis.
199  MachineBasicBlock *&TBB,
200  MachineBasicBlock *&FBB,
202  bool AllowModify) const {
203  // If the block has no terminators, it just falls into the block after it.
205  if (I == MBB.end())
206  return false;
207 
208  if (!isUnpredicatedTerminator(*I))
209  return false;
210 
211  // Get the last instruction in the block.
212  MachineInstr *LastInst = &*I;
213 
214  // If there is only one terminator instruction, process it.
215  unsigned LastOpc = LastInst->getOpcode();
216  if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
217  if (isUncondBranchOpcode(LastOpc)) {
218  TBB = LastInst->getOperand(0).getMBB();
219  return false;
220  }
221  if (isCondBranchOpcode(LastOpc)) {
222  // Block ends with fall-through condbranch.
223  parseCondBranch(LastInst, TBB, Cond);
224  return false;
225  }
226  return true; // Can't handle indirect branch.
227  }
228 
229  // Get the instruction before it if it is a terminator.
230  MachineInstr *SecondLastInst = &*I;
231  unsigned SecondLastOpc = SecondLastInst->getOpcode();
232 
233  // If AllowModify is true and the block ends with two or more unconditional
234  // branches, delete all but the first unconditional branch.
235  if (AllowModify && isUncondBranchOpcode(LastOpc)) {
236  while (isUncondBranchOpcode(SecondLastOpc)) {
237  LastInst->eraseFromParent();
238  LastInst = SecondLastInst;
239  LastOpc = LastInst->getOpcode();
240  if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
241  // Return now the only terminator is an unconditional branch.
242  TBB = LastInst->getOperand(0).getMBB();
243  return false;
244  } else {
245  SecondLastInst = &*I;
246  SecondLastOpc = SecondLastInst->getOpcode();
247  }
248  }
249  }
250 
251  // If there are three terminators, we don't know what sort of block this is.
252  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
253  return true;
254 
255  // If the block ends with a B and a Bcc, handle it.
256  if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
257  parseCondBranch(SecondLastInst, TBB, Cond);
258  FBB = LastInst->getOperand(0).getMBB();
259  return false;
260  }
261 
262  // If the block ends with two unconditional branches, handle it. The second
263  // one is not executed, so remove it.
264  if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
265  TBB = SecondLastInst->getOperand(0).getMBB();
266  I = LastInst;
267  if (AllowModify)
268  I->eraseFromParent();
269  return false;
270  }
271 
272  // ...likewise if it ends with an indirect branch followed by an unconditional
273  // branch.
274  if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
275  I = LastInst;
276  if (AllowModify)
277  I->eraseFromParent();
278  return true;
279  }
280 
281  // Otherwise, can't handle this.
282  return true;
283 }
284 
286  SmallVectorImpl<MachineOperand> &Cond) const {
287  if (Cond[0].getImm() != -1) {
288  // Regular Bcc
289  AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
290  Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
291  } else {
292  // Folded compare-and-branch
293  switch (Cond[1].getImm()) {
294  default:
295  llvm_unreachable("Unknown conditional branch!");
296  case AArch64::CBZW:
297  Cond[1].setImm(AArch64::CBNZW);
298  break;
299  case AArch64::CBNZW:
300  Cond[1].setImm(AArch64::CBZW);
301  break;
302  case AArch64::CBZX:
303  Cond[1].setImm(AArch64::CBNZX);
304  break;
305  case AArch64::CBNZX:
306  Cond[1].setImm(AArch64::CBZX);
307  break;
308  case AArch64::TBZW:
309  Cond[1].setImm(AArch64::TBNZW);
310  break;
311  case AArch64::TBNZW:
312  Cond[1].setImm(AArch64::TBZW);
313  break;
314  case AArch64::TBZX:
315  Cond[1].setImm(AArch64::TBNZX);
316  break;
317  case AArch64::TBNZX:
318  Cond[1].setImm(AArch64::TBZX);
319  break;
320  }
321  }
322 
323  return false;
324 }
325 
327  int *BytesRemoved) const {
329  if (I == MBB.end())
330  return 0;
331 
332  if (!isUncondBranchOpcode(I->getOpcode()) &&
333  !isCondBranchOpcode(I->getOpcode()))
334  return 0;
335 
336  // Remove the branch.
337  I->eraseFromParent();
338 
339  I = MBB.end();
340 
341  if (I == MBB.begin()) {
342  if (BytesRemoved)
343  *BytesRemoved = 4;
344  return 1;
345  }
346  --I;
347  if (!isCondBranchOpcode(I->getOpcode())) {
348  if (BytesRemoved)
349  *BytesRemoved = 4;
350  return 1;
351  }
352 
353  // Remove the branch.
354  I->eraseFromParent();
355  if (BytesRemoved)
356  *BytesRemoved = 8;
357 
358  return 2;
359 }
360 
361 void AArch64InstrInfo::instantiateCondBranch(
362  MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
363  ArrayRef<MachineOperand> Cond) const {
364  if (Cond[0].getImm() != -1) {
365  // Regular Bcc
366  BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
367  } else {
368  // Folded compare-and-branch
369  // Note that we use addOperand instead of addReg to keep the flags.
370  const MachineInstrBuilder MIB =
371  BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
372  if (Cond.size() > 3)
373  MIB.addImm(Cond[3].getImm());
374  MIB.addMBB(TBB);
375  }
376 }
377 
380  ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
381  // Shouldn't be a fall through.
382  assert(TBB && "insertBranch must not be told to insert a fallthrough");
383 
384  if (!FBB) {
385  if (Cond.empty()) // Unconditional branch?
386  BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
387  else
388  instantiateCondBranch(MBB, DL, TBB, Cond);
389 
390  if (BytesAdded)
391  *BytesAdded = 4;
392 
393  return 1;
394  }
395 
396  // Two-way conditional branch.
397  instantiateCondBranch(MBB, DL, TBB, Cond);
398  BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
399 
400  if (BytesAdded)
401  *BytesAdded = 8;
402 
403  return 2;
404 }
405 
406 // Find the original register that VReg is copied from.
407 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
409  const MachineInstr *DefMI = MRI.getVRegDef(VReg);
410  if (!DefMI->isFullCopy())
411  return VReg;
412  VReg = DefMI->getOperand(1).getReg();
413  }
414  return VReg;
415 }
416 
417 // Determine if VReg is defined by an instruction that can be folded into a
418 // csel instruction. If so, return the folded opcode, and the replacement
419 // register.
420 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
421  unsigned *NewVReg = nullptr) {
422  VReg = removeCopies(MRI, VReg);
424  return 0;
425 
426  bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
427  const MachineInstr *DefMI = MRI.getVRegDef(VReg);
428  unsigned Opc = 0;
429  unsigned SrcOpNum = 0;
430  switch (DefMI->getOpcode()) {
431  case AArch64::ADDSXri:
432  case AArch64::ADDSWri:
433  // if NZCV is used, do not fold.
434  if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
435  return 0;
436  // fall-through to ADDXri and ADDWri.
438  case AArch64::ADDXri:
439  case AArch64::ADDWri:
440  // add x, 1 -> csinc.
441  if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
442  DefMI->getOperand(3).getImm() != 0)
443  return 0;
444  SrcOpNum = 1;
445  Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
446  break;
447 
448  case AArch64::ORNXrr:
449  case AArch64::ORNWrr: {
450  // not x -> csinv, represented as orn dst, xzr, src.
451  unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
452  if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
453  return 0;
454  SrcOpNum = 2;
455  Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
456  break;
457  }
458 
459  case AArch64::SUBSXrr:
460  case AArch64::SUBSWrr:
461  // if NZCV is used, do not fold.
462  if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
463  return 0;
464  // fall-through to SUBXrr and SUBWrr.
466  case AArch64::SUBXrr:
467  case AArch64::SUBWrr: {
468  // neg x -> csneg, represented as sub dst, xzr, src.
469  unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
470  if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
471  return 0;
472  SrcOpNum = 2;
473  Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
474  break;
475  }
476  default:
477  return 0;
478  }
479  assert(Opc && SrcOpNum && "Missing parameters");
480 
481  if (NewVReg)
482  *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
483  return Opc;
484 }
485 
488  unsigned TrueReg, unsigned FalseReg,
489  int &CondCycles, int &TrueCycles,
490  int &FalseCycles) const {
491  // Check register classes.
492  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
493  const TargetRegisterClass *RC =
494  RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
495  if (!RC)
496  return false;
497 
498  // Expanding cbz/tbz requires an extra cycle of latency on the condition.
499  unsigned ExtraCondLat = Cond.size() != 1;
500 
501  // GPRs are handled by csel.
502  // FIXME: Fold in x+1, -x, and ~x when applicable.
503  if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
504  AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
505  // Single-cycle csel, csinc, csinv, and csneg.
506  CondCycles = 1 + ExtraCondLat;
507  TrueCycles = FalseCycles = 1;
508  if (canFoldIntoCSel(MRI, TrueReg))
509  TrueCycles = 0;
510  else if (canFoldIntoCSel(MRI, FalseReg))
511  FalseCycles = 0;
512  return true;
513  }
514 
515  // Scalar floating point is handled by fcsel.
516  // FIXME: Form fabs, fmin, and fmax when applicable.
517  if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
518  AArch64::FPR32RegClass.hasSubClassEq(RC)) {
519  CondCycles = 5 + ExtraCondLat;
520  TrueCycles = FalseCycles = 2;
521  return true;
522  }
523 
524  // Can't do vectors.
525  return false;
526 }
527 
530  const DebugLoc &DL, unsigned DstReg,
532  unsigned TrueReg, unsigned FalseReg) const {
534 
535  // Parse the condition code, see parseCondBranch() above.
537  switch (Cond.size()) {
538  default:
539  llvm_unreachable("Unknown condition opcode in Cond");
540  case 1: // b.cc
541  CC = AArch64CC::CondCode(Cond[0].getImm());
542  break;
543  case 3: { // cbz/cbnz
544  // We must insert a compare against 0.
545  bool Is64Bit;
546  switch (Cond[1].getImm()) {
547  default:
548  llvm_unreachable("Unknown branch opcode in Cond");
549  case AArch64::CBZW:
550  Is64Bit = false;
551  CC = AArch64CC::EQ;
552  break;
553  case AArch64::CBZX:
554  Is64Bit = true;
555  CC = AArch64CC::EQ;
556  break;
557  case AArch64::CBNZW:
558  Is64Bit = false;
559  CC = AArch64CC::NE;
560  break;
561  case AArch64::CBNZX:
562  Is64Bit = true;
563  CC = AArch64CC::NE;
564  break;
565  }
566  unsigned SrcReg = Cond[2].getReg();
567  if (Is64Bit) {
568  // cmp reg, #0 is actually subs xzr, reg, #0.
569  MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
570  BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
571  .addReg(SrcReg)
572  .addImm(0)
573  .addImm(0);
574  } else {
575  MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
576  BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
577  .addReg(SrcReg)
578  .addImm(0)
579  .addImm(0);
580  }
581  break;
582  }
583  case 4: { // tbz/tbnz
584  // We must insert a tst instruction.
585  switch (Cond[1].getImm()) {
586  default:
587  llvm_unreachable("Unknown branch opcode in Cond");
588  case AArch64::TBZW:
589  case AArch64::TBZX:
590  CC = AArch64CC::EQ;
591  break;
592  case AArch64::TBNZW:
593  case AArch64::TBNZX:
594  CC = AArch64CC::NE;
595  break;
596  }
597  // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
598  if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
599  BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
600  .addReg(Cond[2].getReg())
601  .addImm(
602  AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
603  else
604  BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
605  .addReg(Cond[2].getReg())
606  .addImm(
607  AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
608  break;
609  }
610  }
611 
612  unsigned Opc = 0;
613  const TargetRegisterClass *RC = nullptr;
614  bool TryFold = false;
615  if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
616  RC = &AArch64::GPR64RegClass;
617  Opc = AArch64::CSELXr;
618  TryFold = true;
619  } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
620  RC = &AArch64::GPR32RegClass;
621  Opc = AArch64::CSELWr;
622  TryFold = true;
623  } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
624  RC = &AArch64::FPR64RegClass;
625  Opc = AArch64::FCSELDrrr;
626  } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
627  RC = &AArch64::FPR32RegClass;
628  Opc = AArch64::FCSELSrrr;
629  }
630  assert(RC && "Unsupported regclass");
631 
632  // Try folding simple instructions into the csel.
633  if (TryFold) {
634  unsigned NewVReg = 0;
635  unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
636  if (FoldedOpc) {
637  // The folded opcodes csinc, csinc and csneg apply the operation to
638  // FalseReg, so we need to invert the condition.
640  TrueReg = FalseReg;
641  } else
642  FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
643 
644  // Fold the operation. Leave any dead instructions for DCE to clean up.
645  if (FoldedOpc) {
646  FalseReg = NewVReg;
647  Opc = FoldedOpc;
648  // The extends the live range of NewVReg.
649  MRI.clearKillFlags(NewVReg);
650  }
651  }
652 
653  // Pull all virtual register into the appropriate class.
654  MRI.constrainRegClass(TrueReg, RC);
655  MRI.constrainRegClass(FalseReg, RC);
656 
657  // Insert the csel.
658  BuildMI(MBB, I, DL, get(Opc), DstReg)
659  .addReg(TrueReg)
660  .addReg(FalseReg)
661  .addImm(CC);
662 }
663 
664 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
665 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
666  uint64_t Imm = MI.getOperand(1).getImm();
667  uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
668  uint64_t Encoding;
669  return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
670 }
671 
672 // FIXME: this implementation should be micro-architecture dependent, so a
673 // micro-architecture target hook should be introduced here in future.
675  if (!Subtarget.hasCustomCheapAsMoveHandling())
676  return MI.isAsCheapAsAMove();
677 
678  if (Subtarget.hasExynosCheapAsMoveHandling()) {
680  return true;
681  else
682  return MI.isAsCheapAsAMove();
683  }
684 
685  switch (MI.getOpcode()) {
686  default:
687  return false;
688 
689  // add/sub on register without shift
690  case AArch64::ADDWri:
691  case AArch64::ADDXri:
692  case AArch64::SUBWri:
693  case AArch64::SUBXri:
694  return (MI.getOperand(3).getImm() == 0);
695 
696  // logical ops on immediate
697  case AArch64::ANDWri:
698  case AArch64::ANDXri:
699  case AArch64::EORWri:
700  case AArch64::EORXri:
701  case AArch64::ORRWri:
702  case AArch64::ORRXri:
703  return true;
704 
705  // logical ops on register without shift
706  case AArch64::ANDWrr:
707  case AArch64::ANDXrr:
708  case AArch64::BICWrr:
709  case AArch64::BICXrr:
710  case AArch64::EONWrr:
711  case AArch64::EONXrr:
712  case AArch64::EORWrr:
713  case AArch64::EORXrr:
714  case AArch64::ORNWrr:
715  case AArch64::ORNXrr:
716  case AArch64::ORRWrr:
717  case AArch64::ORRXrr:
718  return true;
719 
720  // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
721  // ORRXri, it is as cheap as MOV
722  case AArch64::MOVi32imm:
723  return canBeExpandedToORR(MI, 32);
724  case AArch64::MOVi64imm:
725  return canBeExpandedToORR(MI, 64);
726 
727  // It is cheap to zero out registers if the subtarget has ZeroCycleZeroing
728  // feature.
729  case AArch64::FMOVH0:
730  case AArch64::FMOVS0:
731  case AArch64::FMOVD0:
732  return Subtarget.hasZeroCycleZeroing();
733  case TargetOpcode::COPY:
734  return (Subtarget.hasZeroCycleZeroing() &&
735  (MI.getOperand(1).getReg() == AArch64::WZR ||
736  MI.getOperand(1).getReg() == AArch64::XZR));
737  }
738 
739  llvm_unreachable("Unknown opcode to check as cheap as a move!");
740 }
741 
743  unsigned Reg, Imm, Shift;
744 
745  switch (MI.getOpcode()) {
746  default:
747  return false;
748 
749  // MOV Rd, SP
750  case AArch64::ADDWri:
751  case AArch64::ADDXri:
752  if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
753  return false;
754 
755  Reg = MI.getOperand(1).getReg();
756  Imm = MI.getOperand(2).getImm();
757  return ((Reg == AArch64::WSP || Reg == AArch64::SP) && Imm == 0);
758 
759  // Literal
760  case AArch64::ADR:
761  case AArch64::ADRP:
762  return true;
763 
764  // MOVI Vd, #0
765  case AArch64::MOVID:
766  case AArch64::MOVIv8b_ns:
767  case AArch64::MOVIv2d_ns:
768  case AArch64::MOVIv16b_ns:
769  Imm = MI.getOperand(1).getImm();
770  return (Imm == 0);
771 
772  // MOVI Vd, #0
773  case AArch64::MOVIv2i32:
774  case AArch64::MOVIv4i16:
775  case AArch64::MOVIv4i32:
776  case AArch64::MOVIv8i16:
777  Imm = MI.getOperand(1).getImm();
778  Shift = MI.getOperand(2).getImm();
779  return (Imm == 0 && Shift == 0);
780 
781  // MOV Rd, Imm
782  case AArch64::MOVNWi:
783  case AArch64::MOVNXi:
784 
785  // MOV Rd, Imm
786  case AArch64::MOVZWi:
787  case AArch64::MOVZXi:
788  return true;
789 
790  // MOV Rd, Imm
791  case AArch64::ORRWri:
792  case AArch64::ORRXri:
793  if (!MI.getOperand(1).isReg())
794  return false;
795 
796  Reg = MI.getOperand(1).getReg();
797  Imm = MI.getOperand(2).getImm();
798  return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Imm == 0);
799 
800  // MOV Rd, Rm
801  case AArch64::ORRWrs:
802  case AArch64::ORRXrs:
803  if (!MI.getOperand(1).isReg())
804  return false;
805 
806  Reg = MI.getOperand(1).getReg();
807  Imm = MI.getOperand(3).getImm();
808  Shift = AArch64_AM::getShiftValue(Imm);
809  return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Shift == 0);
810  }
811 }
812 
814  unsigned Imm, Shift;
816 
817  switch (MI.getOpcode()) {
818  default:
819  return false;
820 
821  // WriteI
822  case AArch64::ADDSWri:
823  case AArch64::ADDSXri:
824  case AArch64::ADDWri:
825  case AArch64::ADDXri:
826  case AArch64::SUBSWri:
827  case AArch64::SUBSXri:
828  case AArch64::SUBWri:
829  case AArch64::SUBXri:
830  return true;
831 
832  // WriteISReg
833  case AArch64::ADDSWrs:
834  case AArch64::ADDSXrs:
835  case AArch64::ADDWrs:
836  case AArch64::ADDXrs:
837  case AArch64::ANDSWrs:
838  case AArch64::ANDSXrs:
839  case AArch64::ANDWrs:
840  case AArch64::ANDXrs:
841  case AArch64::BICSWrs:
842  case AArch64::BICSXrs:
843  case AArch64::BICWrs:
844  case AArch64::BICXrs:
845  case AArch64::EONWrs:
846  case AArch64::EONXrs:
847  case AArch64::EORWrs:
848  case AArch64::EORXrs:
849  case AArch64::ORNWrs:
850  case AArch64::ORNXrs:
851  case AArch64::ORRWrs:
852  case AArch64::ORRXrs:
853  case AArch64::SUBSWrs:
854  case AArch64::SUBSXrs:
855  case AArch64::SUBWrs:
856  case AArch64::SUBXrs:
857  Imm = MI.getOperand(3).getImm();
858  Shift = AArch64_AM::getShiftValue(Imm);
859  Ext = AArch64_AM::getShiftType(Imm);
860  return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::LSL));
861 
862  // WriteIEReg
863  case AArch64::ADDSWrx:
864  case AArch64::ADDSXrx:
865  case AArch64::ADDSXrx64:
866  case AArch64::ADDWrx:
867  case AArch64::ADDXrx:
868  case AArch64::ADDXrx64:
869  case AArch64::SUBSWrx:
870  case AArch64::SUBSXrx:
871  case AArch64::SUBSXrx64:
872  case AArch64::SUBWrx:
873  case AArch64::SUBXrx:
874  case AArch64::SUBXrx64:
875  Imm = MI.getOperand(3).getImm();
876  Shift = AArch64_AM::getArithShiftValue(Imm);
878  return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::UXTX));
879 
880  case AArch64::PRFMroW:
881  case AArch64::PRFMroX:
882 
883  // WriteLDIdx
884  case AArch64::LDRBBroW:
885  case AArch64::LDRBBroX:
886  case AArch64::LDRHHroW:
887  case AArch64::LDRHHroX:
888  case AArch64::LDRSBWroW:
889  case AArch64::LDRSBWroX:
890  case AArch64::LDRSBXroW:
891  case AArch64::LDRSBXroX:
892  case AArch64::LDRSHWroW:
893  case AArch64::LDRSHWroX:
894  case AArch64::LDRSHXroW:
895  case AArch64::LDRSHXroX:
896  case AArch64::LDRSWroW:
897  case AArch64::LDRSWroX:
898  case AArch64::LDRWroW:
899  case AArch64::LDRWroX:
900  case AArch64::LDRXroW:
901  case AArch64::LDRXroX:
902 
903  case AArch64::LDRBroW:
904  case AArch64::LDRBroX:
905  case AArch64::LDRDroW:
906  case AArch64::LDRDroX:
907  case AArch64::LDRHroW:
908  case AArch64::LDRHroX:
909  case AArch64::LDRSroW:
910  case AArch64::LDRSroX:
911 
912  // WriteSTIdx
913  case AArch64::STRBBroW:
914  case AArch64::STRBBroX:
915  case AArch64::STRHHroW:
916  case AArch64::STRHHroX:
917  case AArch64::STRWroW:
918  case AArch64::STRWroX:
919  case AArch64::STRXroW:
920  case AArch64::STRXroX:
921 
922  case AArch64::STRBroW:
923  case AArch64::STRBroX:
924  case AArch64::STRDroW:
925  case AArch64::STRDroX:
926  case AArch64::STRHroW:
927  case AArch64::STRHroX:
928  case AArch64::STRSroW:
929  case AArch64::STRSroX:
930  Imm = MI.getOperand(3).getImm();
931  Ext = AArch64_AM::getMemExtendType(Imm);
932  return (Ext == AArch64_AM::SXTX || Ext == AArch64_AM::UXTX);
933  }
934 }
935 
937  switch (MI.getOpcode()) {
938  default:
939  return false;
940 
941  case AArch64::ADDWrs:
942  case AArch64::ADDXrs:
943  case AArch64::ADDSWrs:
944  case AArch64::ADDSXrs: {
945  unsigned Imm = MI.getOperand(3).getImm();
946  unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
947  if (ShiftVal == 0)
948  return true;
949  return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
950  }
951 
952  case AArch64::ADDWrx:
953  case AArch64::ADDXrx:
954  case AArch64::ADDXrx64:
955  case AArch64::ADDSWrx:
956  case AArch64::ADDSXrx:
957  case AArch64::ADDSXrx64: {
958  unsigned Imm = MI.getOperand(3).getImm();
959  switch (AArch64_AM::getArithExtendType(Imm)) {
960  default:
961  return false;
962  case AArch64_AM::UXTB:
963  case AArch64_AM::UXTH:
964  case AArch64_AM::UXTW:
965  case AArch64_AM::UXTX:
966  return AArch64_AM::getArithShiftValue(Imm) <= 4;
967  }
968  }
969 
970  case AArch64::SUBWrs:
971  case AArch64::SUBSWrs: {
972  unsigned Imm = MI.getOperand(3).getImm();
973  unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
974  return ShiftVal == 0 ||
975  (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
976  }
977 
978  case AArch64::SUBXrs:
979  case AArch64::SUBSXrs: {
980  unsigned Imm = MI.getOperand(3).getImm();
981  unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
982  return ShiftVal == 0 ||
983  (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
984  }
985 
986  case AArch64::SUBWrx:
987  case AArch64::SUBXrx:
988  case AArch64::SUBXrx64:
989  case AArch64::SUBSWrx:
990  case AArch64::SUBSXrx:
991  case AArch64::SUBSXrx64: {
992  unsigned Imm = MI.getOperand(3).getImm();
993  switch (AArch64_AM::getArithExtendType(Imm)) {
994  default:
995  return false;
996  case AArch64_AM::UXTB:
997  case AArch64_AM::UXTH:
998  case AArch64_AM::UXTW:
999  case AArch64_AM::UXTX:
1000  return AArch64_AM::getArithShiftValue(Imm) == 0;
1001  }
1002  }
1003 
1004  case AArch64::LDRBBroW:
1005  case AArch64::LDRBBroX:
1006  case AArch64::LDRBroW:
1007  case AArch64::LDRBroX:
1008  case AArch64::LDRDroW:
1009  case AArch64::LDRDroX:
1010  case AArch64::LDRHHroW:
1011  case AArch64::LDRHHroX:
1012  case AArch64::LDRHroW:
1013  case AArch64::LDRHroX:
1014  case AArch64::LDRQroW:
1015  case AArch64::LDRQroX:
1016  case AArch64::LDRSBWroW:
1017  case AArch64::LDRSBWroX:
1018  case AArch64::LDRSBXroW:
1019  case AArch64::LDRSBXroX:
1020  case AArch64::LDRSHWroW:
1021  case AArch64::LDRSHWroX:
1022  case AArch64::LDRSHXroW:
1023  case AArch64::LDRSHXroX:
1024  case AArch64::LDRSWroW:
1025  case AArch64::LDRSWroX:
1026  case AArch64::LDRSroW:
1027  case AArch64::LDRSroX:
1028  case AArch64::LDRWroW:
1029  case AArch64::LDRWroX:
1030  case AArch64::LDRXroW:
1031  case AArch64::LDRXroX:
1032  case AArch64::PRFMroW:
1033  case AArch64::PRFMroX:
1034  case AArch64::STRBBroW:
1035  case AArch64::STRBBroX:
1036  case AArch64::STRBroW:
1037  case AArch64::STRBroX:
1038  case AArch64::STRDroW:
1039  case AArch64::STRDroX:
1040  case AArch64::STRHHroW:
1041  case AArch64::STRHHroX:
1042  case AArch64::STRHroW:
1043  case AArch64::STRHroX:
1044  case AArch64::STRQroW:
1045  case AArch64::STRQroX:
1046  case AArch64::STRSroW:
1047  case AArch64::STRSroX:
1048  case AArch64::STRWroW:
1049  case AArch64::STRWroX:
1050  case AArch64::STRXroW:
1051  case AArch64::STRXroX: {
1052  unsigned IsSigned = MI.getOperand(3).getImm();
1053  return !IsSigned;
1054  }
1055  }
1056 }
1057 
1059  unsigned &SrcReg, unsigned &DstReg,
1060  unsigned &SubIdx) const {
1061  switch (MI.getOpcode()) {
1062  default:
1063  return false;
1064  case AArch64::SBFMXri: // aka sxtw
1065  case AArch64::UBFMXri: // aka uxtw
1066  // Check for the 32 -> 64 bit extension case, these instructions can do
1067  // much more.
1068  if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1069  return false;
1070  // This is a signed or unsigned 32 -> 64 bit extension.
1071  SrcReg = MI.getOperand(1).getReg();
1072  DstReg = MI.getOperand(0).getReg();
1073  SubIdx = AArch64::sub_32;
1074  return true;
1075  }
1076 }
1077 
1079  MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
1081  unsigned BaseRegA = 0, BaseRegB = 0;
1082  int64_t OffsetA = 0, OffsetB = 0;
1083  unsigned WidthA = 0, WidthB = 0;
1084 
1085  assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1086  assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1087 
1088  if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
1090  return false;
1091 
1092  // Retrieve the base register, offset from the base register and width. Width
1093  // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1094  // base registers are identical, and the offset of a lower memory access +
1095  // the width doesn't overlap the offset of a higher memory access,
1096  // then the memory accesses are different.
1097  if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
1098  getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
1099  if (BaseRegA == BaseRegB) {
1100  int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1101  int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1102  int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1103  if (LowOffset + LowWidth <= HighOffset)
1104  return true;
1105  }
1106  }
1107  return false;
1108 }
1109 
1110 /// analyzeCompare - For a comparison instruction, return the source registers
1111 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1112 /// Return true if the comparison instruction can be analyzed.
1113 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
1114  unsigned &SrcReg2, int &CmpMask,
1115  int &CmpValue) const {
1116  // The first operand can be a frame index where we'd normally expect a
1117  // register.
1118  assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1119  if (!MI.getOperand(1).isReg())
1120  return false;
1121 
1122  switch (MI.getOpcode()) {
1123  default:
1124  break;
1125  case AArch64::SUBSWrr:
1126  case AArch64::SUBSWrs:
1127  case AArch64::SUBSWrx:
1128  case AArch64::SUBSXrr:
1129  case AArch64::SUBSXrs:
1130  case AArch64::SUBSXrx:
1131  case AArch64::ADDSWrr:
1132  case AArch64::ADDSWrs:
1133  case AArch64::ADDSWrx:
1134  case AArch64::ADDSXrr:
1135  case AArch64::ADDSXrs:
1136  case AArch64::ADDSXrx:
1137  // Replace SUBSWrr with SUBWrr if NZCV is not used.
1138  SrcReg = MI.getOperand(1).getReg();
1139  SrcReg2 = MI.getOperand(2).getReg();
1140  CmpMask = ~0;
1141  CmpValue = 0;
1142  return true;
1143  case AArch64::SUBSWri:
1144  case AArch64::ADDSWri:
1145  case AArch64::SUBSXri:
1146  case AArch64::ADDSXri:
1147  SrcReg = MI.getOperand(1).getReg();
1148  SrcReg2 = 0;
1149  CmpMask = ~0;
1150  // FIXME: In order to convert CmpValue to 0 or 1
1151  CmpValue = MI.getOperand(2).getImm() != 0;
1152  return true;
1153  case AArch64::ANDSWri:
1154  case AArch64::ANDSXri:
1155  // ANDS does not use the same encoding scheme as the others xxxS
1156  // instructions.
1157  SrcReg = MI.getOperand(1).getReg();
1158  SrcReg2 = 0;
1159  CmpMask = ~0;
1160  // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
1161  // while the type of CmpValue is int. When converting uint64_t to int,
1162  // the high 32 bits of uint64_t will be lost.
1163  // In fact it causes a bug in spec2006-483.xalancbmk
1164  // CmpValue is only used to compare with zero in OptimizeCompareInstr
1166  MI.getOperand(2).getImm(),
1167  MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
1168  return true;
1169  }
1170 
1171  return false;
1172 }
1173 
1175  MachineBasicBlock *MBB = Instr.getParent();
1176  assert(MBB && "Can't get MachineBasicBlock here");
1177  MachineFunction *MF = MBB->getParent();
1178  assert(MF && "Can't get MachineFunction here");
1179  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1182 
1183  for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1184  ++OpIdx) {
1185  MachineOperand &MO = Instr.getOperand(OpIdx);
1186  const TargetRegisterClass *OpRegCstraints =
1187  Instr.getRegClassConstraint(OpIdx, TII, TRI);
1188 
1189  // If there's no constraint, there's nothing to do.
1190  if (!OpRegCstraints)
1191  continue;
1192  // If the operand is a frame index, there's nothing to do here.
1193  // A frame index operand will resolve correctly during PEI.
1194  if (MO.isFI())
1195  continue;
1196 
1197  assert(MO.isReg() &&
1198  "Operand has register constraints without being a register!");
1199 
1200  unsigned Reg = MO.getReg();
1202  if (!OpRegCstraints->contains(Reg))
1203  return false;
1204  } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1205  !MRI->constrainRegClass(Reg, OpRegCstraints))
1206  return false;
1207  }
1208 
1209  return true;
1210 }
1211 
1212 /// Return the opcode that does not set flags when possible - otherwise
1213 /// return the original opcode. The caller is responsible to do the actual
1214 /// substitution and legality checking.
1215 static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
1216  // Don't convert all compare instructions, because for some the zero register
1217  // encoding becomes the sp register.
1218  bool MIDefinesZeroReg = false;
1219  if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
1220  MIDefinesZeroReg = true;
1221 
1222  switch (MI.getOpcode()) {
1223  default:
1224  return MI.getOpcode();
1225  case AArch64::ADDSWrr:
1226  return AArch64::ADDWrr;
1227  case AArch64::ADDSWri:
1228  return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1229  case AArch64::ADDSWrs:
1230  return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1231  case AArch64::ADDSWrx:
1232  return AArch64::ADDWrx;
1233  case AArch64::ADDSXrr:
1234  return AArch64::ADDXrr;
1235  case AArch64::ADDSXri:
1236  return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1237  case AArch64::ADDSXrs:
1238  return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1239  case AArch64::ADDSXrx:
1240  return AArch64::ADDXrx;
1241  case AArch64::SUBSWrr:
1242  return AArch64::SUBWrr;
1243  case AArch64::SUBSWri:
1244  return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1245  case AArch64::SUBSWrs:
1246  return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1247  case AArch64::SUBSWrx:
1248  return AArch64::SUBWrx;
1249  case AArch64::SUBSXrr:
1250  return AArch64::SUBXrr;
1251  case AArch64::SUBSXri:
1252  return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1253  case AArch64::SUBSXrs:
1254  return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1255  case AArch64::SUBSXrx:
1256  return AArch64::SUBXrx;
1257  }
1258 }
1259 
1260 enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1261 
1262 /// True when condition flags are accessed (either by writing or reading)
1263 /// on the instruction trace starting at From and ending at To.
1264 ///
1265 /// Note: If From and To are from different blocks it's assumed CC are accessed
1266 /// on the path.
1269  const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1270  // Early exit if To is at the beginning of the BB.
1271  if (To == To->getParent()->begin())
1272  return true;
1273 
1274  // Check whether the instructions are in the same basic block
1275  // If not, assume the condition flags might get modified somewhere.
1276  if (To->getParent() != From->getParent())
1277  return true;
1278 
1279  // From must be above To.
1280  assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
1281  [From](MachineInstr &MI) {
1282  return MI.getIterator() == From;
1283  }) != To->getParent()->rend());
1284 
1285  // We iterate backward starting \p To until we hit \p From.
1286  for (--To; To != From; --To) {
1287  const MachineInstr &Instr = *To;
1288 
1289  if (((AccessToCheck & AK_Write) &&
1290  Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1291  ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1292  return true;
1293  }
1294  return false;
1295 }
1296 
1297 /// Try to optimize a compare instruction. A compare instruction is an
1298 /// instruction which produces AArch64::NZCV. It can be truly compare
1299 /// instruction
1300 /// when there are no uses of its destination register.
1301 ///
1302 /// The following steps are tried in order:
1303 /// 1. Convert CmpInstr into an unconditional version.
1304 /// 2. Remove CmpInstr if above there is an instruction producing a needed
1305 /// condition code or an instruction which can be converted into such an
1306 /// instruction.
1307 /// Only comparison with zero is supported.
1309  MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
1310  int CmpValue, const MachineRegisterInfo *MRI) const {
1311  assert(CmpInstr.getParent());
1312  assert(MRI);
1313 
1314  // Replace SUBSWrr with SUBWrr if NZCV is not used.
1315  int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
1316  if (DeadNZCVIdx != -1) {
1317  if (CmpInstr.definesRegister(AArch64::WZR) ||
1318  CmpInstr.definesRegister(AArch64::XZR)) {
1319  CmpInstr.eraseFromParent();
1320  return true;
1321  }
1322  unsigned Opc = CmpInstr.getOpcode();
1323  unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1324  if (NewOpc == Opc)
1325  return false;
1326  const MCInstrDesc &MCID = get(NewOpc);
1327  CmpInstr.setDesc(MCID);
1328  CmpInstr.RemoveOperand(DeadNZCVIdx);
1329  bool succeeded = UpdateOperandRegClass(CmpInstr);
1330  (void)succeeded;
1331  assert(succeeded && "Some operands reg class are incompatible!");
1332  return true;
1333  }
1334 
1335  // Continue only if we have a "ri" where immediate is zero.
1336  // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1337  // function.
1338  assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
1339  if (CmpValue != 0 || SrcReg2 != 0)
1340  return false;
1341 
1342  // CmpInstr is a Compare instruction if destination register is not used.
1343  if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1344  return false;
1345 
1346  return substituteCmpToZero(CmpInstr, SrcReg, MRI);
1347 }
1348 
1349 /// Get opcode of S version of Instr.
1350 /// If Instr is S version its opcode is returned.
1351 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1352 /// or we are not interested in it.
1353 static unsigned sForm(MachineInstr &Instr) {
1354  switch (Instr.getOpcode()) {
1355  default:
1356  return AArch64::INSTRUCTION_LIST_END;
1357 
1358  case AArch64::ADDSWrr:
1359  case AArch64::ADDSWri:
1360  case AArch64::ADDSXrr:
1361  case AArch64::ADDSXri:
1362  case AArch64::SUBSWrr:
1363  case AArch64::SUBSWri:
1364  case AArch64::SUBSXrr:
1365  case AArch64::SUBSXri:
1366  return Instr.getOpcode();
1367 
1368  case AArch64::ADDWrr:
1369  return AArch64::ADDSWrr;
1370  case AArch64::ADDWri:
1371  return AArch64::ADDSWri;
1372  case AArch64::ADDXrr:
1373  return AArch64::ADDSXrr;
1374  case AArch64::ADDXri:
1375  return AArch64::ADDSXri;
1376  case AArch64::ADCWr:
1377  return AArch64::ADCSWr;
1378  case AArch64::ADCXr:
1379  return AArch64::ADCSXr;
1380  case AArch64::SUBWrr:
1381  return AArch64::SUBSWrr;
1382  case AArch64::SUBWri:
1383  return AArch64::SUBSWri;
1384  case AArch64::SUBXrr:
1385  return AArch64::SUBSXrr;
1386  case AArch64::SUBXri:
1387  return AArch64::SUBSXri;
1388  case AArch64::SBCWr:
1389  return AArch64::SBCSWr;
1390  case AArch64::SBCXr:
1391  return AArch64::SBCSXr;
1392  case AArch64::ANDWri:
1393  return AArch64::ANDSWri;
1394  case AArch64::ANDXri:
1395  return AArch64::ANDSXri;
1396  }
1397 }
1398 
1399 /// Check if AArch64::NZCV should be alive in successors of MBB.
1401  for (auto *BB : MBB->successors())
1402  if (BB->isLiveIn(AArch64::NZCV))
1403  return true;
1404  return false;
1405 }
1406 
1407 namespace {
1408 
1409 struct UsedNZCV {
1410  bool N = false;
1411  bool Z = false;
1412  bool C = false;
1413  bool V = false;
1414 
1415  UsedNZCV() = default;
1416 
1417  UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
1418  this->N |= UsedFlags.N;
1419  this->Z |= UsedFlags.Z;
1420  this->C |= UsedFlags.C;
1421  this->V |= UsedFlags.V;
1422  return *this;
1423  }
1424 };
1425 
1426 } // end anonymous namespace
1427 
1428 /// Find a condition code used by the instruction.
1429 /// Returns AArch64CC::Invalid if either the instruction does not use condition
1430 /// codes or we don't optimize CmpInstr in the presence of such instructions.
1432  switch (Instr.getOpcode()) {
1433  default:
1434  return AArch64CC::Invalid;
1435 
1436  case AArch64::Bcc: {
1437  int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1438  assert(Idx >= 2);
1439  return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1440  }
1441 
1442  case AArch64::CSINVWr:
1443  case AArch64::CSINVXr:
1444  case AArch64::CSINCWr:
1445  case AArch64::CSINCXr:
1446  case AArch64::CSELWr:
1447  case AArch64::CSELXr:
1448  case AArch64::CSNEGWr:
1449  case AArch64::CSNEGXr:
1450  case AArch64::FCSELSrrr:
1451  case AArch64::FCSELDrrr: {
1452  int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1453  assert(Idx >= 1);
1454  return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1455  }
1456  }
1457 }
1458 
1459 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1460  assert(CC != AArch64CC::Invalid);
1461  UsedNZCV UsedFlags;
1462  switch (CC) {
1463  default:
1464  break;
1465 
1466  case AArch64CC::EQ: // Z set
1467  case AArch64CC::NE: // Z clear
1468  UsedFlags.Z = true;
1469  break;
1470 
1471  case AArch64CC::HI: // Z clear and C set
1472  case AArch64CC::LS: // Z set or C clear
1473  UsedFlags.Z = true;
1475  case AArch64CC::HS: // C set
1476  case AArch64CC::LO: // C clear
1477  UsedFlags.C = true;
1478  break;
1479 
1480  case AArch64CC::MI: // N set
1481  case AArch64CC::PL: // N clear
1482  UsedFlags.N = true;
1483  break;
1484 
1485  case AArch64CC::VS: // V set
1486  case AArch64CC::VC: // V clear
1487  UsedFlags.V = true;
1488  break;
1489 
1490  case AArch64CC::GT: // Z clear, N and V the same
1491  case AArch64CC::LE: // Z set, N and V differ
1492  UsedFlags.Z = true;
1494  case AArch64CC::GE: // N and V the same
1495  case AArch64CC::LT: // N and V differ
1496  UsedFlags.N = true;
1497  UsedFlags.V = true;
1498  break;
1499  }
1500  return UsedFlags;
1501 }
1502 
1503 static bool isADDSRegImm(unsigned Opcode) {
1504  return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1505 }
1506 
1507 static bool isSUBSRegImm(unsigned Opcode) {
1508  return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1509 }
1510 
1511 /// Check if CmpInstr can be substituted by MI.
1512 ///
1513 /// CmpInstr can be substituted:
1514 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1515 /// - and, MI and CmpInstr are from the same MachineBB
1516 /// - and, condition flags are not alive in successors of the CmpInstr parent
1517 /// - and, if MI opcode is the S form there must be no defs of flags between
1518 /// MI and CmpInstr
1519 /// or if MI opcode is not the S form there must be neither defs of flags
1520 /// nor uses of flags between MI and CmpInstr.
1521 /// - and C/V flags are not used after CmpInstr
1523  const TargetRegisterInfo *TRI) {
1524  assert(MI);
1525  assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1526  assert(CmpInstr);
1527 
1528  const unsigned CmpOpcode = CmpInstr->getOpcode();
1529  if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1530  return false;
1531 
1532  if (MI->getParent() != CmpInstr->getParent())
1533  return false;
1534 
1535  if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1536  return false;
1537 
1538  AccessKind AccessToCheck = AK_Write;
1539  if (sForm(*MI) != MI->getOpcode())
1540  AccessToCheck = AK_All;
1541  if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1542  return false;
1543 
1544  UsedNZCV NZCVUsedAfterCmp;
1545  for (auto I = std::next(CmpInstr->getIterator()),
1546  E = CmpInstr->getParent()->instr_end();
1547  I != E; ++I) {
1548  const MachineInstr &Instr = *I;
1549  if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1551  if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1552  return false;
1553  NZCVUsedAfterCmp |= getUsedNZCV(CC);
1554  }
1555 
1556  if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1557  break;
1558  }
1559 
1560  return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1561 }
1562 
1563 /// Substitute an instruction comparing to zero with another instruction
1564 /// which produces needed condition flags.
1565 ///
1566 /// Return true on success.
1567 bool AArch64InstrInfo::substituteCmpToZero(
1568  MachineInstr &CmpInstr, unsigned SrcReg,
1569  const MachineRegisterInfo *MRI) const {
1570  assert(MRI);
1571  // Get the unique definition of SrcReg.
1572  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1573  if (!MI)
1574  return false;
1575 
1577 
1578  unsigned NewOpc = sForm(*MI);
1579  if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1580  return false;
1581 
1582  if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
1583  return false;
1584 
1585  // Update the instruction to set NZCV.
1586  MI->setDesc(get(NewOpc));
1587  CmpInstr.eraseFromParent();
1588  bool succeeded = UpdateOperandRegClass(*MI);
1589  (void)succeeded;
1590  assert(succeeded && "Some operands reg class are incompatible!");
1591  MI->addRegisterDefined(AArch64::NZCV, TRI);
1592  return true;
1593 }
1594 
1596  if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
1597  return false;
1598 
1599  MachineBasicBlock &MBB = *MI.getParent();
1600  DebugLoc DL = MI.getDebugLoc();
1601  unsigned Reg = MI.getOperand(0).getReg();
1602  const GlobalValue *GV =
1603  cast<GlobalValue>((*MI.memoperands_begin())->getValue());
1604  const TargetMachine &TM = MBB.getParent()->getTarget();
1605  unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1606  const unsigned char MO_NC = AArch64II::MO_NC;
1607 
1608  if ((OpFlags & AArch64II::MO_GOT) != 0) {
1609  BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1610  .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
1611  BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1612  .addReg(Reg, RegState::Kill)
1613  .addImm(0)
1615  } else if (TM.getCodeModel() == CodeModel::Large) {
1616  BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1617  .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
1618  .addImm(0);
1619  BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1620  .addReg(Reg, RegState::Kill)
1621  .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
1622  .addImm(16);
1623  BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1624  .addReg(Reg, RegState::Kill)
1625  .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
1626  .addImm(32);
1627  BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1628  .addReg(Reg, RegState::Kill)
1630  .addImm(48);
1631  BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1632  .addReg(Reg, RegState::Kill)
1633  .addImm(0)
1635  } else {
1636  BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1637  .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1638  unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1639  BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1640  .addReg(Reg, RegState::Kill)
1641  .addGlobalAddress(GV, 0, LoFlags)
1643  }
1644 
1645  MBB.erase(MI);
1646 
1647  return true;
1648 }
1649 
1650 /// Return true if this is this instruction has a non-zero immediate
1652  switch (MI.getOpcode()) {
1653  default:
1654  break;
1655  case AArch64::ADDSWrs:
1656  case AArch64::ADDSXrs:
1657  case AArch64::ADDWrs:
1658  case AArch64::ADDXrs:
1659  case AArch64::ANDSWrs:
1660  case AArch64::ANDSXrs:
1661  case AArch64::ANDWrs:
1662  case AArch64::ANDXrs:
1663  case AArch64::BICSWrs:
1664  case AArch64::BICSXrs:
1665  case AArch64::BICWrs:
1666  case AArch64::BICXrs:
1667  case AArch64::EONWrs:
1668  case AArch64::EONXrs:
1669  case AArch64::EORWrs:
1670  case AArch64::EORXrs:
1671  case AArch64::ORNWrs:
1672  case AArch64::ORNXrs:
1673  case AArch64::ORRWrs:
1674  case AArch64::ORRXrs:
1675  case AArch64::SUBSWrs:
1676  case AArch64::SUBSXrs:
1677  case AArch64::SUBWrs:
1678  case AArch64::SUBXrs:
1679  if (MI.getOperand(3).isImm()) {
1680  unsigned val = MI.getOperand(3).getImm();
1681  return (val != 0);
1682  }
1683  break;
1684  }
1685  return false;
1686 }
1687 
1688 /// Return true if this is this instruction has a non-zero immediate
1690  switch (MI.getOpcode()) {
1691  default:
1692  break;
1693  case AArch64::ADDSWrx:
1694  case AArch64::ADDSXrx:
1695  case AArch64::ADDSXrx64:
1696  case AArch64::ADDWrx:
1697  case AArch64::ADDXrx:
1698  case AArch64::ADDXrx64:
1699  case AArch64::SUBSWrx:
1700  case AArch64::SUBSXrx:
1701  case AArch64::SUBSXrx64:
1702  case AArch64::SUBWrx:
1703  case AArch64::SUBXrx:
1704  case AArch64::SUBXrx64:
1705  if (MI.getOperand(3).isImm()) {
1706  unsigned val = MI.getOperand(3).getImm();
1707  return (val != 0);
1708  }
1709  break;
1710  }
1711 
1712  return false;
1713 }
1714 
1715 // Return true if this instruction simply sets its single destination register
1716 // to zero. This is equivalent to a register rename of the zero-register.
1718  switch (MI.getOpcode()) {
1719  default:
1720  break;
1721  case AArch64::MOVZWi:
1722  case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
1723  if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1724  assert(MI.getDesc().getNumOperands() == 3 &&
1725  MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1726  return true;
1727  }
1728  break;
1729  case AArch64::ANDWri: // and Rd, Rzr, #imm
1730  return MI.getOperand(1).getReg() == AArch64::WZR;
1731  case AArch64::ANDXri:
1732  return MI.getOperand(1).getReg() == AArch64::XZR;
1733  case TargetOpcode::COPY:
1734  return MI.getOperand(1).getReg() == AArch64::WZR;
1735  }
1736  return false;
1737 }
1738 
1739 // Return true if this instruction simply renames a general register without
1740 // modifying bits.
1742  switch (MI.getOpcode()) {
1743  default:
1744  break;
1745  case TargetOpcode::COPY: {
1746  // GPR32 copies will by lowered to ORRXrs
1747  unsigned DstReg = MI.getOperand(0).getReg();
1748  return (AArch64::GPR32RegClass.contains(DstReg) ||
1749  AArch64::GPR64RegClass.contains(DstReg));
1750  }
1751  case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
1752  if (MI.getOperand(1).getReg() == AArch64::XZR) {
1753  assert(MI.getDesc().getNumOperands() == 4 &&
1754  MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1755  return true;
1756  }
1757  break;
1758  case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
1759  if (MI.getOperand(2).getImm() == 0) {
1760  assert(MI.getDesc().getNumOperands() == 4 &&
1761  MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1762  return true;
1763  }
1764  break;
1765  }
1766  return false;
1767 }
1768 
1769 // Return true if this instruction simply renames a general register without
1770 // modifying bits.
1772  switch (MI.getOpcode()) {
1773  default:
1774  break;
1775  case TargetOpcode::COPY: {
1776  // FPR64 copies will by lowered to ORR.16b
1777  unsigned DstReg = MI.getOperand(0).getReg();
1778  return (AArch64::FPR64RegClass.contains(DstReg) ||
1779  AArch64::FPR128RegClass.contains(DstReg));
1780  }
1781  case AArch64::ORRv16i8:
1782  if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1783  assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
1784  "invalid ORRv16i8 operands");
1785  return true;
1786  }
1787  break;
1788  }
1789  return false;
1790 }
1791 
1793  int &FrameIndex) const {
1794  switch (MI.getOpcode()) {
1795  default:
1796  break;
1797  case AArch64::LDRWui:
1798  case AArch64::LDRXui:
1799  case AArch64::LDRBui:
1800  case AArch64::LDRHui:
1801  case AArch64::LDRSui:
1802  case AArch64::LDRDui:
1803  case AArch64::LDRQui:
1804  if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1805  MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1806  FrameIndex = MI.getOperand(1).getIndex();
1807  return MI.getOperand(0).getReg();
1808  }
1809  break;
1810  }
1811 
1812  return 0;
1813 }
1814 
1816  int &FrameIndex) const {
1817  switch (MI.getOpcode()) {
1818  default:
1819  break;
1820  case AArch64::STRWui:
1821  case AArch64::STRXui:
1822  case AArch64::STRBui:
1823  case AArch64::STRHui:
1824  case AArch64::STRSui:
1825  case AArch64::STRDui:
1826  case AArch64::STRQui:
1827  if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1828  MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1829  FrameIndex = MI.getOperand(1).getIndex();
1830  return MI.getOperand(0).getReg();
1831  }
1832  break;
1833  }
1834  return 0;
1835 }
1836 
1837 /// Return true if this is load/store scales or extends its register offset.
1838 /// This refers to scaling a dynamic index as opposed to scaled immediates.
1839 /// MI should be a memory op that allows scaled addressing.
1841  switch (MI.getOpcode()) {
1842  default:
1843  break;
1844  case AArch64::LDRBBroW:
1845  case AArch64::LDRBroW:
1846  case AArch64::LDRDroW:
1847  case AArch64::LDRHHroW:
1848  case AArch64::LDRHroW:
1849  case AArch64::LDRQroW:
1850  case AArch64::LDRSBWroW:
1851  case AArch64::LDRSBXroW:
1852  case AArch64::LDRSHWroW:
1853  case AArch64::LDRSHXroW:
1854  case AArch64::LDRSWroW:
1855  case AArch64::LDRSroW:
1856  case AArch64::LDRWroW:
1857  case AArch64::LDRXroW:
1858  case AArch64::STRBBroW:
1859  case AArch64::STRBroW:
1860  case AArch64::STRDroW:
1861  case AArch64::STRHHroW:
1862  case AArch64::STRHroW:
1863  case AArch64::STRQroW:
1864  case AArch64::STRSroW:
1865  case AArch64::STRWroW:
1866  case AArch64::STRXroW:
1867  case AArch64::LDRBBroX:
1868  case AArch64::LDRBroX:
1869  case AArch64::LDRDroX:
1870  case AArch64::LDRHHroX:
1871  case AArch64::LDRHroX:
1872  case AArch64::LDRQroX:
1873  case AArch64::LDRSBWroX:
1874  case AArch64::LDRSBXroX:
1875  case AArch64::LDRSHWroX:
1876  case AArch64::LDRSHXroX:
1877  case AArch64::LDRSWroX:
1878  case AArch64::LDRSroX:
1879  case AArch64::LDRWroX:
1880  case AArch64::LDRXroX:
1881  case AArch64::STRBBroX:
1882  case AArch64::STRBroX:
1883  case AArch64::STRDroX:
1884  case AArch64::STRHHroX:
1885  case AArch64::STRHroX:
1886  case AArch64::STRQroX:
1887  case AArch64::STRSroX:
1888  case AArch64::STRWroX:
1889  case AArch64::STRXroX:
1890 
1891  unsigned Val = MI.getOperand(3).getImm();
1893  return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
1894  }
1895  return false;
1896 }
1897 
1898 /// Check all MachineMemOperands for a hint to suppress pairing.
1900  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1901  return MMO->getFlags() & MOSuppressPair;
1902  });
1903 }
1904 
1905 /// Set a flag on the first MachineMemOperand to suppress pairing.
1907  if (MI.memoperands_empty())
1908  return;
1909  (*MI.memoperands_begin())->setFlags(MOSuppressPair);
1910 }
1911 
1912 /// Check all MachineMemOperands for a hint that the load/store is strided.
1914  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1915  return MMO->getFlags() & MOStridedAccess;
1916  });
1917 }
1918 
1920  switch (Opc) {
1921  default:
1922  return false;
1923  case AArch64::STURSi:
1924  case AArch64::STURDi:
1925  case AArch64::STURQi:
1926  case AArch64::STURBBi:
1927  case AArch64::STURHHi:
1928  case AArch64::STURWi:
1929  case AArch64::STURXi:
1930  case AArch64::LDURSi:
1931  case AArch64::LDURDi:
1932  case AArch64::LDURQi:
1933  case AArch64::LDURWi:
1934  case AArch64::LDURXi:
1935  case AArch64::LDURSWi:
1936  case AArch64::LDURHHi:
1937  case AArch64::LDURBBi:
1938  case AArch64::LDURSBWi:
1939  case AArch64::LDURSHWi:
1940  return true;
1941  }
1942 }
1943 
1945  switch (MI.getOpcode()) {
1946  default:
1947  return false;
1948  // Scaled instructions.
1949  case AArch64::STRSui:
1950  case AArch64::STRDui:
1951  case AArch64::STRQui:
1952  case AArch64::STRXui:
1953  case AArch64::STRWui:
1954  case AArch64::LDRSui:
1955  case AArch64::LDRDui:
1956  case AArch64::LDRQui:
1957  case AArch64::LDRXui:
1958  case AArch64::LDRWui:
1959  case AArch64::LDRSWui:
1960  // Unscaled instructions.
1961  case AArch64::STURSi:
1962  case AArch64::STURDi:
1963  case AArch64::STURQi:
1964  case AArch64::STURWi:
1965  case AArch64::STURXi:
1966  case AArch64::LDURSi:
1967  case AArch64::LDURDi:
1968  case AArch64::LDURQi:
1969  case AArch64::LDURWi:
1970  case AArch64::LDURXi:
1971  case AArch64::LDURSWi:
1972  return true;
1973  }
1974 }
1975 
1977  bool &Is64Bit) {
1978  switch (Opc) {
1979  default:
1980  llvm_unreachable("Opcode has no flag setting equivalent!");
1981  // 32-bit cases:
1982  case AArch64::ADDWri:
1983  Is64Bit = false;
1984  return AArch64::ADDSWri;
1985  case AArch64::ADDWrr:
1986  Is64Bit = false;
1987  return AArch64::ADDSWrr;
1988  case AArch64::ADDWrs:
1989  Is64Bit = false;
1990  return AArch64::ADDSWrs;
1991  case AArch64::ADDWrx:
1992  Is64Bit = false;
1993  return AArch64::ADDSWrx;
1994  case AArch64::ANDWri:
1995  Is64Bit = false;
1996  return AArch64::ANDSWri;
1997  case AArch64::ANDWrr:
1998  Is64Bit = false;
1999  return AArch64::ANDSWrr;
2000  case AArch64::ANDWrs:
2001  Is64Bit = false;
2002  return AArch64::ANDSWrs;
2003  case AArch64::BICWrr:
2004  Is64Bit = false;
2005  return AArch64::BICSWrr;
2006  case AArch64::BICWrs:
2007  Is64Bit = false;
2008  return AArch64::BICSWrs;
2009  case AArch64::SUBWri:
2010  Is64Bit = false;
2011  return AArch64::SUBSWri;
2012  case AArch64::SUBWrr:
2013  Is64Bit = false;
2014  return AArch64::SUBSWrr;
2015  case AArch64::SUBWrs:
2016  Is64Bit = false;
2017  return AArch64::SUBSWrs;
2018  case AArch64::SUBWrx:
2019  Is64Bit = false;
2020  return AArch64::SUBSWrx;
2021  // 64-bit cases:
2022  case AArch64::ADDXri:
2023  Is64Bit = true;
2024  return AArch64::ADDSXri;
2025  case AArch64::ADDXrr:
2026  Is64Bit = true;
2027  return AArch64::ADDSXrr;
2028  case AArch64::ADDXrs:
2029  Is64Bit = true;
2030  return AArch64::ADDSXrs;
2031  case AArch64::ADDXrx:
2032  Is64Bit = true;
2033  return AArch64::ADDSXrx;
2034  case AArch64::ANDXri:
2035  Is64Bit = true;
2036  return AArch64::ANDSXri;
2037  case AArch64::ANDXrr:
2038  Is64Bit = true;
2039  return AArch64::ANDSXrr;
2040  case AArch64::ANDXrs:
2041  Is64Bit = true;
2042  return AArch64::ANDSXrs;
2043  case AArch64::BICXrr:
2044  Is64Bit = true;
2045  return AArch64::BICSXrr;
2046  case AArch64::BICXrs:
2047  Is64Bit = true;
2048  return AArch64::BICSXrs;
2049  case AArch64::SUBXri:
2050  Is64Bit = true;
2051  return AArch64::SUBSXri;
2052  case AArch64::SUBXrr:
2053  Is64Bit = true;
2054  return AArch64::SUBSXrr;
2055  case AArch64::SUBXrs:
2056  Is64Bit = true;
2057  return AArch64::SUBSXrs;
2058  case AArch64::SUBXrx:
2059  Is64Bit = true;
2060  return AArch64::SUBSXrx;
2061  }
2062 }
2063 
2064 // Is this a candidate for ld/st merging or pairing? For example, we don't
2065 // touch volatiles or load/stores that have a hint to avoid pair formation.
2067  // If this is a volatile load/store, don't mess with it.
2068  if (MI.hasOrderedMemoryRef())
2069  return false;
2070 
2071  // Make sure this is a reg+imm (as opposed to an address reloc).
2072  assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
2073  if (!MI.getOperand(2).isImm())
2074  return false;
2075 
2076  // Can't merge/pair if the instruction modifies the base register.
2077  // e.g., ldr x0, [x0]
2078  unsigned BaseReg = MI.getOperand(1).getReg();
2080  if (MI.modifiesRegister(BaseReg, TRI))
2081  return false;
2082 
2083  // Check if this load/store has a hint to avoid pair formation.
2084  // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2085  if (isLdStPairSuppressed(MI))
2086  return false;
2087 
2088  // On some CPUs quad load/store pairs are slower than two single load/stores.
2089  if (Subtarget.isPaired128Slow()) {
2090  switch (MI.getOpcode()) {
2091  default:
2092  break;
2093  case AArch64::LDURQi:
2094  case AArch64::STURQi:
2095  case AArch64::LDRQui:
2096  case AArch64::STRQui:
2097  return false;
2098  }
2099  }
2100 
2101  return true;
2102 }
2103 
2105  MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
2106  const TargetRegisterInfo *TRI) const {
2107  unsigned Width;
2108  return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
2109 }
2110 
2112  MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
2113  const TargetRegisterInfo *TRI) const {
2114  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
2115  // Handle only loads/stores with base register followed by immediate offset.
2116  if (LdSt.getNumExplicitOperands() == 3) {
2117  // Non-paired instruction (e.g., ldr x1, [x0, #8]).
2118  if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
2119  return false;
2120  } else if (LdSt.getNumExplicitOperands() == 4) {
2121  // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
2122  if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
2123  !LdSt.getOperand(3).isImm())
2124  return false;
2125  } else
2126  return false;
2127 
2128  // Get the scaling factor for the instruction and set the width for the
2129  // instruction.
2130  unsigned Scale = 0;
2131  int64_t Dummy1, Dummy2;
2132 
2133  // If this returns false, then it's an instruction we don't want to handle.
2134  if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
2135  return false;
2136 
2137  // Compute the offset. Offset is calculated as the immediate operand
2138  // multiplied by the scaling factor. Unscaled instructions have scaling factor
2139  // set to 1.
2140  if (LdSt.getNumExplicitOperands() == 3) {
2141  BaseReg = LdSt.getOperand(1).getReg();
2142  Offset = LdSt.getOperand(2).getImm() * Scale;
2143  } else {
2144  assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
2145  BaseReg = LdSt.getOperand(2).getReg();
2146  Offset = LdSt.getOperand(3).getImm() * Scale;
2147  }
2148  return true;
2149 }
2150 
2153  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
2154  MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
2155  assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
2156  return OfsOp;
2157 }
2158 
2159 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
2160  unsigned &Width, int64_t &MinOffset,
2161  int64_t &MaxOffset) const {
2162  switch (Opcode) {
2163  // Not a memory operation or something we want to handle.
2164  default:
2165  Scale = Width = 0;
2166  MinOffset = MaxOffset = 0;
2167  return false;
2168  case AArch64::STRWpost:
2169  case AArch64::LDRWpost:
2170  Width = 32;
2171  Scale = 4;
2172  MinOffset = -256;
2173  MaxOffset = 255;
2174  break;
2175  case AArch64::LDURQi:
2176  case AArch64::STURQi:
2177  Width = 16;
2178  Scale = 1;
2179  MinOffset = -256;
2180  MaxOffset = 255;
2181  break;
2182  case AArch64::LDURXi:
2183  case AArch64::LDURDi:
2184  case AArch64::STURXi:
2185  case AArch64::STURDi:
2186  Width = 8;
2187  Scale = 1;
2188  MinOffset = -256;
2189  MaxOffset = 255;
2190  break;
2191  case AArch64::LDURWi:
2192  case AArch64::LDURSi:
2193  case AArch64::LDURSWi:
2194  case AArch64::STURWi:
2195  case AArch64::STURSi:
2196  Width = 4;
2197  Scale = 1;
2198  MinOffset = -256;
2199  MaxOffset = 255;
2200  break;
2201  case AArch64::LDURHi:
2202  case AArch64::LDURHHi:
2203  case AArch64::LDURSHXi:
2204  case AArch64::LDURSHWi:
2205  case AArch64::STURHi:
2206  case AArch64::STURHHi:
2207  Width = 2;
2208  Scale = 1;
2209  MinOffset = -256;
2210  MaxOffset = 255;
2211  break;
2212  case AArch64::LDURBi:
2213  case AArch64::LDURBBi:
2214  case AArch64::LDURSBXi:
2215  case AArch64::LDURSBWi:
2216  case AArch64::STURBi:
2217  case AArch64::STURBBi:
2218  Width = 1;
2219  Scale = 1;
2220  MinOffset = -256;
2221  MaxOffset = 255;
2222  break;
2223  case AArch64::LDPQi:
2224  case AArch64::LDNPQi:
2225  case AArch64::STPQi:
2226  case AArch64::STNPQi:
2227  Scale = 16;
2228  Width = 32;
2229  MinOffset = -64;
2230  MaxOffset = 63;
2231  break;
2232  case AArch64::LDRQui:
2233  case AArch64::STRQui:
2234  Scale = Width = 16;
2235  MinOffset = 0;
2236  MaxOffset = 4095;
2237  break;
2238  case AArch64::LDPXi:
2239  case AArch64::LDPDi:
2240  case AArch64::LDNPXi:
2241  case AArch64::LDNPDi:
2242  case AArch64::STPXi:
2243  case AArch64::STPDi:
2244  case AArch64::STNPXi:
2245  case AArch64::STNPDi:
2246  Scale = 8;
2247  Width = 16;
2248  MinOffset = -64;
2249  MaxOffset = 63;
2250  break;
2251  case AArch64::LDRXui:
2252  case AArch64::LDRDui:
2253  case AArch64::STRXui:
2254  case AArch64::STRDui:
2255  Scale = Width = 8;
2256  MinOffset = 0;
2257  MaxOffset = 4095;
2258  break;
2259  case AArch64::LDPWi:
2260  case AArch64::LDPSi:
2261  case AArch64::LDNPWi:
2262  case AArch64::LDNPSi:
2263  case AArch64::STPWi:
2264  case AArch64::STPSi:
2265  case AArch64::STNPWi:
2266  case AArch64::STNPSi:
2267  Scale = 4;
2268  Width = 8;
2269  MinOffset = -64;
2270  MaxOffset = 63;
2271  break;
2272  case AArch64::LDRWui:
2273  case AArch64::LDRSui:
2274  case AArch64::LDRSWui:
2275  case AArch64::STRWui:
2276  case AArch64::STRSui:
2277  Scale = Width = 4;
2278  MinOffset = 0;
2279  MaxOffset = 4095;
2280  break;
2281  case AArch64::LDRHui:
2282  case AArch64::LDRHHui:
2283  case AArch64::STRHui:
2284  case AArch64::STRHHui:
2285  Scale = Width = 2;
2286  MinOffset = 0;
2287  MaxOffset = 4095;
2288  break;
2289  case AArch64::LDRBui:
2290  case AArch64::LDRBBui:
2291  case AArch64::STRBui:
2292  case AArch64::STRBBui:
2293  Scale = Width = 1;
2294  MinOffset = 0;
2295  MaxOffset = 4095;
2296  break;
2297  }
2298 
2299  return true;
2300 }
2301 
2302 // Scale the unscaled offsets. Returns false if the unscaled offset can't be
2303 // scaled.
2304 static bool scaleOffset(unsigned Opc, int64_t &Offset) {
2305  unsigned OffsetStride = 1;
2306  switch (Opc) {
2307  default:
2308  return false;
2309  case AArch64::LDURQi:
2310  case AArch64::STURQi:
2311  OffsetStride = 16;
2312  break;
2313  case AArch64::LDURXi:
2314  case AArch64::LDURDi:
2315  case AArch64::STURXi:
2316  case AArch64::STURDi:
2317  OffsetStride = 8;
2318  break;
2319  case AArch64::LDURWi:
2320  case AArch64::LDURSi:
2321  case AArch64::LDURSWi:
2322  case AArch64::STURWi:
2323  case AArch64::STURSi:
2324  OffsetStride = 4;
2325  break;
2326  }
2327  // If the byte-offset isn't a multiple of the stride, we can't scale this
2328  // offset.
2329  if (Offset % OffsetStride != 0)
2330  return false;
2331 
2332  // Convert the byte-offset used by unscaled into an "element" offset used
2333  // by the scaled pair load/store instructions.
2334  Offset /= OffsetStride;
2335  return true;
2336 }
2337 
2338 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
2339  if (FirstOpc == SecondOpc)
2340  return true;
2341  // We can also pair sign-ext and zero-ext instructions.
2342  switch (FirstOpc) {
2343  default:
2344  return false;
2345  case AArch64::LDRWui:
2346  case AArch64::LDURWi:
2347  return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
2348  case AArch64::LDRSWui:
2349  case AArch64::LDURSWi:
2350  return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
2351  }
2352  // These instructions can't be paired based on their opcodes.
2353  return false;
2354 }
2355 
2356 /// Detect opportunities for ldp/stp formation.
2357 ///
2358 /// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
2360  unsigned BaseReg1,
2361  MachineInstr &SecondLdSt,
2362  unsigned BaseReg2,
2363  unsigned NumLoads) const {
2364  if (BaseReg1 != BaseReg2)
2365  return false;
2366 
2367  // Only cluster up to a single pair.
2368  if (NumLoads > 1)
2369  return false;
2370 
2371  if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
2372  return false;
2373 
2374  // Can we pair these instructions based on their opcodes?
2375  unsigned FirstOpc = FirstLdSt.getOpcode();
2376  unsigned SecondOpc = SecondLdSt.getOpcode();
2377  if (!canPairLdStOpc(FirstOpc, SecondOpc))
2378  return false;
2379 
2380  // Can't merge volatiles or load/stores that have a hint to avoid pair
2381  // formation, for example.
2382  if (!isCandidateToMergeOrPair(FirstLdSt) ||
2383  !isCandidateToMergeOrPair(SecondLdSt))
2384  return false;
2385 
2386  // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
2387  int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
2388  if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
2389  return false;
2390 
2391  int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
2392  if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
2393  return false;
2394 
2395  // Pairwise instructions have a 7-bit signed offset field.
2396  if (Offset1 > 63 || Offset1 < -64)
2397  return false;
2398 
2399  // The caller should already have ordered First/SecondLdSt by offset.
2400  assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
2401  return Offset1 + 1 == Offset2;
2402 }
2403 
2405  unsigned Reg, unsigned SubIdx,
2406  unsigned State,
2407  const TargetRegisterInfo *TRI) {
2408  if (!SubIdx)
2409  return MIB.addReg(Reg, State);
2410 
2412  return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
2413  return MIB.addReg(Reg, State, SubIdx);
2414 }
2415 
2416 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
2417  unsigned NumRegs) {
2418  // We really want the positive remainder mod 32 here, that happens to be
2419  // easily obtainable with a mask.
2420  return ((DestReg - SrcReg) & 0x1f) < NumRegs;
2421 }
2422 
2425  const DebugLoc &DL, unsigned DestReg,
2426  unsigned SrcReg, bool KillSrc,
2427  unsigned Opcode,
2428  ArrayRef<unsigned> Indices) const {
2429  assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
2431  uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2432  uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2433  unsigned NumRegs = Indices.size();
2434 
2435  int SubReg = 0, End = NumRegs, Incr = 1;
2436  if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
2437  SubReg = NumRegs - 1;
2438  End = -1;
2439  Incr = -1;
2440  }
2441 
2442  for (; SubReg != End; SubReg += Incr) {
2443  const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2444  AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2445  AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
2446  AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2447  }
2448 }
2449 
2452  const DebugLoc &DL, unsigned DestReg,
2453  unsigned SrcReg, bool KillSrc) const {
2454  if (AArch64::GPR32spRegClass.contains(DestReg) &&
2455  (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
2457 
2458  if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
2459  // If either operand is WSP, expand to ADD #0.
2460  if (Subtarget.hasZeroCycleRegMove()) {
2461  // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
2462  unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2463  &AArch64::GPR64spRegClass);
2464  unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2465  &AArch64::GPR64spRegClass);
2466  // This instruction is reading and writing X registers. This may upset
2467  // the register scavenger and machine verifier, so we need to indicate
2468  // that we are reading an undefined value from SrcRegX, but a proper
2469  // value from SrcReg.
2470  BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
2471  .addReg(SrcRegX, RegState::Undef)
2472  .addImm(0)
2474  .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2475  } else {
2476  BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
2477  .addReg(SrcReg, getKillRegState(KillSrc))
2478  .addImm(0)
2480  }
2481  } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
2482  BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
2483  .addImm(0)
2485  } else {
2486  if (Subtarget.hasZeroCycleRegMove()) {
2487  // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
2488  unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2489  &AArch64::GPR64spRegClass);
2490  unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2491  &AArch64::GPR64spRegClass);
2492  // This instruction is reading and writing X registers. This may upset
2493  // the register scavenger and machine verifier, so we need to indicate
2494  // that we are reading an undefined value from SrcRegX, but a proper
2495  // value from SrcReg.
2496  BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
2497  .addReg(AArch64::XZR)
2498  .addReg(SrcRegX, RegState::Undef)
2499  .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2500  } else {
2501  // Otherwise, expand to ORR WZR.
2502  BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
2503  .addReg(AArch64::WZR)
2504  .addReg(SrcReg, getKillRegState(KillSrc));
2505  }
2506  }
2507  return;
2508  }
2509 
2510  if (AArch64::GPR64spRegClass.contains(DestReg) &&
2511  (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
2512  if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
2513  // If either operand is SP, expand to ADD #0.
2514  BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
2515  .addReg(SrcReg, getKillRegState(KillSrc))
2516  .addImm(0)
2518  } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
2519  BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
2520  .addImm(0)
2522  } else {
2523  // Otherwise, expand to ORR XZR.
2524  BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
2525  .addReg(AArch64::XZR)
2526  .addReg(SrcReg, getKillRegState(KillSrc));
2527  }
2528  return;
2529  }
2530 
2531  // Copy a DDDD register quad by copying the individual sub-registers.
2532  if (AArch64::DDDDRegClass.contains(DestReg) &&
2533  AArch64::DDDDRegClass.contains(SrcReg)) {
2534  static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2535  AArch64::dsub2, AArch64::dsub3};
2536  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2537  Indices);
2538  return;
2539  }
2540 
2541  // Copy a DDD register triple by copying the individual sub-registers.
2542  if (AArch64::DDDRegClass.contains(DestReg) &&
2543  AArch64::DDDRegClass.contains(SrcReg)) {
2544  static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2545  AArch64::dsub2};
2546  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2547  Indices);
2548  return;
2549  }
2550 
2551  // Copy a DD register pair by copying the individual sub-registers.
2552  if (AArch64::DDRegClass.contains(DestReg) &&
2553  AArch64::DDRegClass.contains(SrcReg)) {
2554  static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
2555  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2556  Indices);
2557  return;
2558  }
2559 
2560  // Copy a QQQQ register quad by copying the individual sub-registers.
2561  if (AArch64::QQQQRegClass.contains(DestReg) &&
2562  AArch64::QQQQRegClass.contains(SrcReg)) {
2563  static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2564  AArch64::qsub2, AArch64::qsub3};
2565  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2566  Indices);
2567  return;
2568  }
2569 
2570  // Copy a QQQ register triple by copying the individual sub-registers.
2571  if (AArch64::QQQRegClass.contains(DestReg) &&
2572  AArch64::QQQRegClass.contains(SrcReg)) {
2573  static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2574  AArch64::qsub2};
2575  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2576  Indices);
2577  return;
2578  }
2579 
2580  // Copy a QQ register pair by copying the individual sub-registers.
2581  if (AArch64::QQRegClass.contains(DestReg) &&
2582  AArch64::QQRegClass.contains(SrcReg)) {
2583  static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
2584  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2585  Indices);
2586  return;
2587  }
2588 
2589  if (AArch64::FPR128RegClass.contains(DestReg) &&
2590  AArch64::FPR128RegClass.contains(SrcReg)) {
2591  if (Subtarget.hasNEON()) {
2592  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2593  .addReg(SrcReg)
2594  .addReg(SrcReg, getKillRegState(KillSrc));
2595  } else {
2596  BuildMI(MBB, I, DL, get(AArch64::STRQpre))
2597  .addReg(AArch64::SP, RegState::Define)
2598  .addReg(SrcReg, getKillRegState(KillSrc))
2599  .addReg(AArch64::SP)
2600  .addImm(-16);
2601  BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
2602  .addReg(AArch64::SP, RegState::Define)
2603  .addReg(DestReg, RegState::Define)
2604  .addReg(AArch64::SP)
2605  .addImm(16);
2606  }
2607  return;
2608  }
2609 
2610  if (AArch64::FPR64RegClass.contains(DestReg) &&
2611  AArch64::FPR64RegClass.contains(SrcReg)) {
2612  if (Subtarget.hasNEON()) {
2613  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2614  &AArch64::FPR128RegClass);
2615  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2616  &AArch64::FPR128RegClass);
2617  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2618  .addReg(SrcReg)
2619  .addReg(SrcReg, getKillRegState(KillSrc));
2620  } else {
2621  BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2622  .addReg(SrcReg, getKillRegState(KillSrc));
2623  }
2624  return;
2625  }
2626 
2627  if (AArch64::FPR32RegClass.contains(DestReg) &&
2628  AArch64::FPR32RegClass.contains(SrcReg)) {
2629  if (Subtarget.hasNEON()) {
2630  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2631  &AArch64::FPR128RegClass);
2632  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2633  &AArch64::FPR128RegClass);
2634  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2635  .addReg(SrcReg)
2636  .addReg(SrcReg, getKillRegState(KillSrc));
2637  } else {
2638  BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2639  .addReg(SrcReg, getKillRegState(KillSrc));
2640  }
2641  return;
2642  }
2643 
2644  if (AArch64::FPR16RegClass.contains(DestReg) &&
2645  AArch64::FPR16RegClass.contains(SrcReg)) {
2646  if (Subtarget.hasNEON()) {
2647  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2648  &AArch64::FPR128RegClass);
2649  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2650  &AArch64::FPR128RegClass);
2651  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2652  .addReg(SrcReg)
2653  .addReg(SrcReg, getKillRegState(KillSrc));
2654  } else {
2655  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2656  &AArch64::FPR32RegClass);
2657  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2658  &AArch64::FPR32RegClass);
2659  BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2660  .addReg(SrcReg, getKillRegState(KillSrc));
2661  }
2662  return;
2663  }
2664 
2665  if (AArch64::FPR8RegClass.contains(DestReg) &&
2666  AArch64::FPR8RegClass.contains(SrcReg)) {
2667  if (Subtarget.hasNEON()) {
2668  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2669  &AArch64::FPR128RegClass);
2670  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2671  &AArch64::FPR128RegClass);
2672  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2673  .addReg(SrcReg)
2674  .addReg(SrcReg, getKillRegState(KillSrc));
2675  } else {
2676  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2677  &AArch64::FPR32RegClass);
2678  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2679  &AArch64::FPR32RegClass);
2680  BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2681  .addReg(SrcReg, getKillRegState(KillSrc));
2682  }
2683  return;
2684  }
2685 
2686  // Copies between GPR64 and FPR64.
2687  if (AArch64::FPR64RegClass.contains(DestReg) &&
2688  AArch64::GPR64RegClass.contains(SrcReg)) {
2689  BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2690  .addReg(SrcReg, getKillRegState(KillSrc));
2691  return;
2692  }
2693  if (AArch64::GPR64RegClass.contains(DestReg) &&
2694  AArch64::FPR64RegClass.contains(SrcReg)) {
2695  BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2696  .addReg(SrcReg, getKillRegState(KillSrc));
2697  return;
2698  }
2699  // Copies between GPR32 and FPR32.
2700  if (AArch64::FPR32RegClass.contains(DestReg) &&
2701  AArch64::GPR32RegClass.contains(SrcReg)) {
2702  BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2703  .addReg(SrcReg, getKillRegState(KillSrc));
2704  return;
2705  }
2706  if (AArch64::GPR32RegClass.contains(DestReg) &&
2707  AArch64::FPR32RegClass.contains(SrcReg)) {
2708  BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2709  .addReg(SrcReg, getKillRegState(KillSrc));
2710  return;
2711  }
2712 
2713  if (DestReg == AArch64::NZCV) {
2714  assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2715  BuildMI(MBB, I, DL, get(AArch64::MSR))
2716  .addImm(AArch64SysReg::NZCV)
2717  .addReg(SrcReg, getKillRegState(KillSrc))
2718  .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
2719  return;
2720  }
2721 
2722  if (SrcReg == AArch64::NZCV) {
2723  assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
2724  BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
2725  .addImm(AArch64SysReg::NZCV)
2726  .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
2727  return;
2728  }
2729 
2730  llvm_unreachable("unimplemented reg-to-reg copy");
2731 }
2732 
2734  MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2735  bool isKill, int FI, const TargetRegisterClass *RC,
2736  const TargetRegisterInfo *TRI) const {
2737  DebugLoc DL;
2738  if (MBBI != MBB.end())
2739  DL = MBBI->getDebugLoc();
2740  MachineFunction &MF = *MBB.getParent();
2741  MachineFrameInfo &MFI = MF.getFrameInfo();
2742  unsigned Align = MFI.getObjectAlignment(FI);
2743 
2746  PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2747  unsigned Opc = 0;
2748  bool Offset = true;
2749  switch (TRI->getSpillSize(*RC)) {
2750  case 1:
2751  if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2752  Opc = AArch64::STRBui;
2753  break;
2754  case 2:
2755  if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2756  Opc = AArch64::STRHui;
2757  break;
2758  case 4:
2759  if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2760  Opc = AArch64::STRWui;
2762  MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2763  else
2764  assert(SrcReg != AArch64::WSP);
2765  } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2766  Opc = AArch64::STRSui;
2767  break;
2768  case 8:
2769  if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2770  Opc = AArch64::STRXui;
2772  MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2773  else
2774  assert(SrcReg != AArch64::SP);
2775  } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2776  Opc = AArch64::STRDui;
2777  break;
2778  case 16:
2779  if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2780  Opc = AArch64::STRQui;
2781  else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2782  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2783  Opc = AArch64::ST1Twov1d;
2784  Offset = false;
2785  } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
2786  BuildMI(MBB, MBBI, DL, get(AArch64::STPXi))
2787  .addReg(TRI->getSubReg(SrcReg, AArch64::sube64),
2788  getKillRegState(isKill))
2789  .addReg(TRI->getSubReg(SrcReg, AArch64::subo64),
2790  getKillRegState(isKill))
2791  .addFrameIndex(FI)
2792  .addImm(0)
2793  .addMemOperand(MMO);
2794  return;
2795  }
2796  break;
2797  case 24:
2798  if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2799  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2800  Opc = AArch64::ST1Threev1d;
2801  Offset = false;
2802  }
2803  break;
2804  case 32:
2805  if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2806  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2807  Opc = AArch64::ST1Fourv1d;
2808  Offset = false;
2809  } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2810  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2811  Opc = AArch64::ST1Twov2d;
2812  Offset = false;
2813  }
2814  break;
2815  case 48:
2816  if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2817  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2818  Opc = AArch64::ST1Threev2d;
2819  Offset = false;
2820  }
2821  break;
2822  case 64:
2823  if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2824  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2825  Opc = AArch64::ST1Fourv2d;
2826  Offset = false;
2827  }
2828  break;
2829  }
2830  assert(Opc && "Unknown register class");
2831 
2832  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
2833  .addReg(SrcReg, getKillRegState(isKill))
2834  .addFrameIndex(FI);
2835 
2836  if (Offset)
2837  MI.addImm(0);
2838  MI.addMemOperand(MMO);
2839 }
2840 
2842  MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2843  int FI, const TargetRegisterClass *RC,
2844  const TargetRegisterInfo *TRI) const {
2845  DebugLoc DL;
2846  if (MBBI != MBB.end())
2847  DL = MBBI->getDebugLoc();
2848  MachineFunction &MF = *MBB.getParent();
2849  MachineFrameInfo &MFI = MF.getFrameInfo();
2850  unsigned Align = MFI.getObjectAlignment(FI);
2853  PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2854 
2855  unsigned Opc = 0;
2856  bool Offset = true;
2857  switch (TRI->getSpillSize(*RC)) {
2858  case 1:
2859  if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2860  Opc = AArch64::LDRBui;
2861  break;
2862  case 2:
2863  if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2864  Opc = AArch64::LDRHui;
2865  break;
2866  case 4:
2867  if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2868  Opc = AArch64::LDRWui;
2870  MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
2871  else
2872  assert(DestReg != AArch64::WSP);
2873  } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2874  Opc = AArch64::LDRSui;
2875  break;
2876  case 8:
2877  if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2878  Opc = AArch64::LDRXui;
2880  MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
2881  else
2882  assert(DestReg != AArch64::SP);
2883  } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2884  Opc = AArch64::LDRDui;
2885  break;
2886  case 16:
2887  if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2888  Opc = AArch64::LDRQui;
2889  else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2890  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2891  Opc = AArch64::LD1Twov1d;
2892  Offset = false;
2893  } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
2894  BuildMI(MBB, MBBI, DL, get(AArch64::LDPXi))
2895  .addReg(TRI->getSubReg(DestReg, AArch64::sube64),
2896  getDefRegState(true))
2897  .addReg(TRI->getSubReg(DestReg, AArch64::subo64),
2898  getDefRegState(true))
2899  .addFrameIndex(FI)
2900  .addImm(0)
2901  .addMemOperand(MMO);
2902  return;
2903  }
2904  break;
2905  case 24:
2906  if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2907  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2908  Opc = AArch64::LD1Threev1d;
2909  Offset = false;
2910  }
2911  break;
2912  case 32:
2913  if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2914  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2915  Opc = AArch64::LD1Fourv1d;
2916  Offset = false;
2917  } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2918  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2919  Opc = AArch64::LD1Twov2d;
2920  Offset = false;
2921  }
2922  break;
2923  case 48:
2924  if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2925  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2926  Opc = AArch64::LD1Threev2d;
2927  Offset = false;
2928  }
2929  break;
2930  case 64:
2931  if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2932  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2933  Opc = AArch64::LD1Fourv2d;
2934  Offset = false;
2935  }
2936  break;
2937  }
2938  assert(Opc && "Unknown register class");
2939 
2940  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
2941  .addReg(DestReg, getDefRegState(true))
2942  .addFrameIndex(FI);
2943  if (Offset)
2944  MI.addImm(0);
2945  MI.addMemOperand(MMO);
2946 }
2947 
2949  MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
2950  unsigned DestReg, unsigned SrcReg, int Offset,
2951  const TargetInstrInfo *TII,
2952  MachineInstr::MIFlag Flag, bool SetNZCV) {
2953  if (DestReg == SrcReg && Offset == 0)
2954  return;
2955 
2956  assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
2957  "SP increment/decrement not 16-byte aligned");
2958 
2959  bool isSub = Offset < 0;
2960  if (isSub)
2961  Offset = -Offset;
2962 
2963  // FIXME: If the offset won't fit in 24-bits, compute the offset into a
2964  // scratch register. If DestReg is a virtual register, use it as the
2965  // scratch register; otherwise, create a new virtual register (to be
2966  // replaced by the scavenger at the end of PEI). That case can be optimized
2967  // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
2968  // register can be loaded with offset%8 and the add/sub can use an extending
2969  // instruction with LSL#3.
2970  // Currently the function handles any offsets but generates a poor sequence
2971  // of code.
2972  // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
2973 
2974  unsigned Opc;
2975  if (SetNZCV)
2976  Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
2977  else
2978  Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
2979  const unsigned MaxEncoding = 0xfff;
2980  const unsigned ShiftSize = 12;
2981  const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
2982  while (((unsigned)Offset) >= (1 << ShiftSize)) {
2983  unsigned ThisVal;
2984  if (((unsigned)Offset) > MaxEncodableValue) {
2985  ThisVal = MaxEncodableValue;
2986  } else {
2987  ThisVal = Offset & MaxEncodableValue;
2988  }
2989  assert((ThisVal >> ShiftSize) <= MaxEncoding &&
2990  "Encoding cannot handle value that big");
2991  BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2992  .addReg(SrcReg)
2993  .addImm(ThisVal >> ShiftSize)
2995  .setMIFlag(Flag);
2996 
2997  SrcReg = DestReg;
2998  Offset -= ThisVal;
2999  if (Offset == 0)
3000  return;
3001  }
3002  BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
3003  .addReg(SrcReg)
3004  .addImm(Offset)
3006  .setMIFlag(Flag);
3007 }
3008 
3012  LiveIntervals *LIS) const {
3013  // This is a bit of a hack. Consider this instruction:
3014  //
3015  // %0 = COPY %sp; GPR64all:%0
3016  //
3017  // We explicitly chose GPR64all for the virtual register so such a copy might
3018  // be eliminated by RegisterCoalescer. However, that may not be possible, and
3019  // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
3020  // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
3021  //
3022  // To prevent that, we are going to constrain the %0 register class here.
3023  //
3024  // <rdar://problem/11522048>
3025  //
3026  if (MI.isFullCopy()) {
3027  unsigned DstReg = MI.getOperand(0).getReg();
3028  unsigned SrcReg = MI.getOperand(1).getReg();
3029  if (SrcReg == AArch64::SP &&
3031  MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
3032  return nullptr;
3033  }
3034  if (DstReg == AArch64::SP &&
3036  MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
3037  return nullptr;
3038  }
3039  }
3040 
3041  // Handle the case where a copy is being spilled or filled but the source
3042  // and destination register class don't match. For example:
3043  //
3044  // %0 = COPY %xzr; GPR64common:%0
3045  //
3046  // In this case we can still safely fold away the COPY and generate the
3047  // following spill code:
3048  //
3049  // STRXui %xzr, %stack.0
3050  //
3051  // This also eliminates spilled cross register class COPYs (e.g. between x and
3052  // d regs) of the same size. For example:
3053  //
3054  // %0 = COPY %1; GPR64:%0, FPR64:%1
3055  //
3056  // will be filled as
3057  //
3058  // LDRDui %0, fi<#0>
3059  //
3060  // instead of
3061  //
3062  // LDRXui %Temp, fi<#0>
3063  // %0 = FMOV %Temp
3064  //
3065  if (MI.isCopy() && Ops.size() == 1 &&
3066  // Make sure we're only folding the explicit COPY defs/uses.
3067  (Ops[0] == 0 || Ops[0] == 1)) {
3068  bool IsSpill = Ops[0] == 0;
3069  bool IsFill = !IsSpill;
3071  const MachineRegisterInfo &MRI = MF.getRegInfo();
3072  MachineBasicBlock &MBB = *MI.getParent();
3073  const MachineOperand &DstMO = MI.getOperand(0);
3074  const MachineOperand &SrcMO = MI.getOperand(1);
3075  unsigned DstReg = DstMO.getReg();
3076  unsigned SrcReg = SrcMO.getReg();
3077  // This is slightly expensive to compute for physical regs since
3078  // getMinimalPhysRegClass is slow.
3079  auto getRegClass = [&](unsigned Reg) {
3081  ? MRI.getRegClass(Reg)
3082  : TRI.getMinimalPhysRegClass(Reg);
3083  };
3084 
3085  if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
3086  assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
3087  TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
3088  "Mismatched register size in non subreg COPY");
3089  if (IsSpill)
3090  storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
3091  getRegClass(SrcReg), &TRI);
3092  else
3093  loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
3094  getRegClass(DstReg), &TRI);
3095  return &*--InsertPt;
3096  }
3097 
3098  // Handle cases like spilling def of:
3099  //
3100  // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
3101  //
3102  // where the physical register source can be widened and stored to the full
3103  // virtual reg destination stack slot, in this case producing:
3104  //
3105  // STRXui %xzr, %stack.0
3106  //
3107  if (IsSpill && DstMO.isUndef() &&
3109  assert(SrcMO.getSubReg() == 0 &&
3110  "Unexpected subreg on physical register");
3111  const TargetRegisterClass *SpillRC;
3112  unsigned SpillSubreg;
3113  switch (DstMO.getSubReg()) {
3114  default:
3115  SpillRC = nullptr;
3116  break;
3117  case AArch64::sub_32:
3118  case AArch64::ssub:
3119  if (AArch64::GPR32RegClass.contains(SrcReg)) {
3120  SpillRC = &AArch64::GPR64RegClass;
3121  SpillSubreg = AArch64::sub_32;
3122  } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
3123  SpillRC = &AArch64::FPR64RegClass;
3124  SpillSubreg = AArch64::ssub;
3125  } else
3126  SpillRC = nullptr;
3127  break;
3128  case AArch64::dsub:
3129  if (AArch64::FPR64RegClass.contains(SrcReg)) {
3130  SpillRC = &AArch64::FPR128RegClass;
3131  SpillSubreg = AArch64::dsub;
3132  } else
3133  SpillRC = nullptr;
3134  break;
3135  }
3136 
3137  if (SpillRC)
3138  if (unsigned WidenedSrcReg =
3139  TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
3140  storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
3141  FrameIndex, SpillRC, &TRI);
3142  return &*--InsertPt;
3143  }
3144  }
3145 
3146  // Handle cases like filling use of:
3147  //
3148  // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
3149  //
3150  // where we can load the full virtual reg source stack slot, into the subreg
3151  // destination, in this case producing:
3152  //
3153  // LDRWui %0:sub_32<def,read-undef>, %stack.0
3154  //
3155  if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
3156  const TargetRegisterClass *FillRC;
3157  switch (DstMO.getSubReg()) {
3158  default:
3159  FillRC = nullptr;
3160  break;
3161  case AArch64::sub_32:
3162  FillRC = &AArch64::GPR32RegClass;
3163  break;
3164  case AArch64::ssub:
3165  FillRC = &AArch64::FPR32RegClass;
3166  break;
3167  case AArch64::dsub:
3168  FillRC = &AArch64::FPR64RegClass;
3169  break;
3170  }
3171 
3172  if (FillRC) {
3173  assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
3174  TRI.getRegSizeInBits(*FillRC) &&
3175  "Mismatched regclass size on folded subreg COPY");
3176  loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
3177  MachineInstr &LoadMI = *--InsertPt;
3178  MachineOperand &LoadDst = LoadMI.getOperand(0);
3179  assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
3180  LoadDst.setSubReg(DstMO.getSubReg());
3181  LoadDst.setIsUndef();
3182  return &LoadMI;
3183  }
3184  }
3185  }
3186 
3187  // Cannot fold.
3188  return nullptr;
3189 }
3190 
3192  bool *OutUseUnscaledOp,
3193  unsigned *OutUnscaledOp,
3194  int *EmittableOffset) {
3195  int Scale = 1;
3196  bool IsSigned = false;
3197  // The ImmIdx should be changed case by case if it is not 2.
3198  unsigned ImmIdx = 2;
3199  unsigned UnscaledOp = 0;
3200  // Set output values in case of early exit.
3201  if (EmittableOffset)
3202  *EmittableOffset = 0;
3203  if (OutUseUnscaledOp)
3204  *OutUseUnscaledOp = false;
3205  if (OutUnscaledOp)
3206  *OutUnscaledOp = 0;
3207  switch (MI.getOpcode()) {
3208  default:
3209  llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
3210  // Vector spills/fills can't take an immediate offset.
3211  case AArch64::LD1Twov2d:
3212  case AArch64::LD1Threev2d:
3213  case AArch64::LD1Fourv2d:
3214  case AArch64::LD1Twov1d:
3215  case AArch64::LD1Threev1d:
3216  case AArch64::LD1Fourv1d:
3217  case AArch64::ST1Twov2d:
3218  case AArch64::ST1Threev2d:
3219  case AArch64::ST1Fourv2d:
3220  case AArch64::ST1Twov1d:
3221  case AArch64::ST1Threev1d:
3222  case AArch64::ST1Fourv1d:
3224  case AArch64::PRFMui:
3225  Scale = 8;
3226  UnscaledOp = AArch64::PRFUMi;
3227  break;
3228  case AArch64::LDRXui:
3229  Scale = 8;
3230  UnscaledOp = AArch64::LDURXi;
3231  break;
3232  case AArch64::LDRWui:
3233  Scale = 4;
3234  UnscaledOp = AArch64::LDURWi;
3235  break;
3236  case AArch64::LDRBui:
3237  Scale = 1;
3238  UnscaledOp = AArch64::LDURBi;
3239  break;
3240  case AArch64::LDRHui:
3241  Scale = 2;
3242  UnscaledOp = AArch64::LDURHi;
3243  break;
3244  case AArch64::LDRSui:
3245  Scale = 4;
3246  UnscaledOp = AArch64::LDURSi;
3247  break;
3248  case AArch64::LDRDui:
3249  Scale = 8;
3250  UnscaledOp = AArch64::LDURDi;
3251  break;
3252  case AArch64::LDRQui:
3253  Scale = 16;
3254  UnscaledOp = AArch64::LDURQi;
3255  break;
3256  case AArch64::LDRBBui:
3257  Scale = 1;
3258  UnscaledOp = AArch64::LDURBBi;
3259  break;
3260  case AArch64::LDRHHui:
3261  Scale = 2;
3262  UnscaledOp = AArch64::LDURHHi;
3263  break;
3264  case AArch64::LDRSBXui:
3265  Scale = 1;
3266  UnscaledOp = AArch64::LDURSBXi;
3267  break;
3268  case AArch64::LDRSBWui:
3269  Scale = 1;
3270  UnscaledOp = AArch64::LDURSBWi;
3271  break;
3272  case AArch64::LDRSHXui:
3273  Scale = 2;
3274  UnscaledOp = AArch64::LDURSHXi;
3275  break;
3276  case AArch64::LDRSHWui:
3277  Scale = 2;
3278  UnscaledOp = AArch64::LDURSHWi;
3279  break;
3280  case AArch64::LDRSWui:
3281  Scale = 4;
3282  UnscaledOp = AArch64::LDURSWi;
3283  break;
3284 
3285  case AArch64::STRXui:
3286  Scale = 8;
3287  UnscaledOp = AArch64::STURXi;
3288  break;
3289  case AArch64::STRWui:
3290  Scale = 4;
3291  UnscaledOp = AArch64::STURWi;
3292  break;
3293  case AArch64::STRBui:
3294  Scale = 1;
3295  UnscaledOp = AArch64::STURBi;
3296  break;
3297  case AArch64::STRHui:
3298  Scale = 2;
3299  UnscaledOp = AArch64::STURHi;
3300  break;
3301  case AArch64::STRSui:
3302  Scale = 4;
3303  UnscaledOp = AArch64::STURSi;
3304  break;
3305  case AArch64::STRDui:
3306  Scale = 8;
3307  UnscaledOp = AArch64::STURDi;
3308  break;
3309  case AArch64::STRQui:
3310  Scale = 16;
3311  UnscaledOp = AArch64::STURQi;
3312  break;
3313  case AArch64::STRBBui:
3314  Scale = 1;
3315  UnscaledOp = AArch64::STURBBi;
3316  break;
3317  case AArch64::STRHHui:
3318  Scale = 2;
3319  UnscaledOp = AArch64::STURHHi;
3320  break;
3321 
3322  case AArch64::LDPXi:
3323  case AArch64::LDPDi:
3324  case AArch64::STPXi:
3325  case AArch64::STPDi:
3326  case AArch64::LDNPXi:
3327  case AArch64::LDNPDi:
3328  case AArch64::STNPXi:
3329  case AArch64::STNPDi:
3330  ImmIdx = 3;
3331  IsSigned = true;
3332  Scale = 8;
3333  break;
3334  case AArch64::LDPQi:
3335  case AArch64::STPQi:
3336  case AArch64::LDNPQi:
3337  case AArch64::STNPQi:
3338  ImmIdx = 3;
3339  IsSigned = true;
3340  Scale = 16;
3341  break;
3342  case AArch64::LDPWi:
3343  case AArch64::LDPSi:
3344  case AArch64::STPWi:
3345  case AArch64::STPSi:
3346  case AArch64::LDNPWi:
3347  case AArch64::LDNPSi:
3348  case AArch64::STNPWi:
3349  case AArch64::STNPSi:
3350  ImmIdx = 3;
3351  IsSigned = true;
3352  Scale = 4;
3353  break;
3354 
3355  case AArch64::LDURXi:
3356  case AArch64::LDURWi:
3357  case AArch64::LDURBi:
3358  case AArch64::LDURHi:
3359  case AArch64::LDURSi:
3360  case AArch64::LDURDi:
3361  case AArch64::LDURQi:
3362  case AArch64::LDURHHi:
3363  case AArch64::LDURBBi:
3364  case AArch64::LDURSBXi:
3365  case AArch64::LDURSBWi:
3366  case AArch64::LDURSHXi:
3367  case AArch64::LDURSHWi:
3368  case AArch64::LDURSWi:
3369  case AArch64::STURXi:
3370  case AArch64::STURWi:
3371  case AArch64::STURBi:
3372  case AArch64::STURHi:
3373  case AArch64::STURSi:
3374  case AArch64::STURDi:
3375  case AArch64::STURQi:
3376  case AArch64::STURBBi:
3377  case AArch64::STURHHi:
3378  Scale = 1;
3379  break;
3380  }
3381 
3382  Offset += MI.getOperand(ImmIdx).getImm() * Scale;
3383 
3384  bool useUnscaledOp = false;
3385  // If the offset doesn't match the scale, we rewrite the instruction to
3386  // use the unscaled instruction instead. Likewise, if we have a negative
3387  // offset (and have an unscaled op to use).
3388  if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
3389  useUnscaledOp = true;
3390 
3391  // Use an unscaled addressing mode if the instruction has a negative offset
3392  // (or if the instruction is already using an unscaled addressing mode).
3393  unsigned MaskBits;
3394  if (IsSigned) {
3395  // ldp/stp instructions.
3396  MaskBits = 7;
3397  Offset /= Scale;
3398  } else if (UnscaledOp == 0 || useUnscaledOp) {
3399  MaskBits = 9;
3400  IsSigned = true;
3401  Scale = 1;
3402  } else {
3403  MaskBits = 12;
3404  IsSigned = false;
3405  Offset /= Scale;
3406  }
3407 
3408  // Attempt to fold address computation.
3409  int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
3410  int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
3411  if (Offset >= MinOff && Offset <= MaxOff) {
3412  if (EmittableOffset)
3413  *EmittableOffset = Offset;
3414  Offset = 0;
3415  } else {
3416  int NewOff = Offset < 0 ? MinOff : MaxOff;
3417  if (EmittableOffset)
3418  *EmittableOffset = NewOff;
3419  Offset = (Offset - NewOff) * Scale;
3420  }
3421  if (OutUseUnscaledOp)
3422  *OutUseUnscaledOp = useUnscaledOp;
3423  if (OutUnscaledOp)
3424  *OutUnscaledOp = UnscaledOp;
3426  (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
3427 }
3428 
3429 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
3430  unsigned FrameReg, int &Offset,
3431  const AArch64InstrInfo *TII) {
3432  unsigned Opcode = MI.getOpcode();
3433  unsigned ImmIdx = FrameRegIdx + 1;
3434 
3435  if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
3436  Offset += MI.getOperand(ImmIdx).getImm();
3437  emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
3438  MI.getOperand(0).getReg(), FrameReg, Offset, TII,
3439  MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
3440  MI.eraseFromParent();
3441  Offset = 0;
3442  return true;
3443  }
3444 
3445  int NewOffset;
3446  unsigned UnscaledOp;
3447  bool UseUnscaledOp;
3448  int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
3449  &UnscaledOp, &NewOffset);
3450  if (Status & AArch64FrameOffsetCanUpdate) {
3451  if (Status & AArch64FrameOffsetIsLegal)
3452  // Replace the FrameIndex with FrameReg.
3453  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
3454  if (UseUnscaledOp)
3455  MI.setDesc(TII->get(UnscaledOp));
3456 
3457  MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
3458  return Offset == 0;
3459  }
3460 
3461  return false;
3462 }
3463 
3464 void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
3465  NopInst.setOpcode(AArch64::HINT);
3466  NopInst.addOperand(MCOperand::createImm(0));
3467 }
3468 
3469 // AArch64 supports MachineCombiner.
3470 bool AArch64InstrInfo::useMachineCombiner() const { return true; }
3471 
3472 // True when Opc sets flag
3473 static bool isCombineInstrSettingFlag(unsigned Opc) {
3474  switch (Opc) {
3475  case AArch64::ADDSWrr:
3476  case AArch64::ADDSWri:
3477  case AArch64::ADDSXrr:
3478  case AArch64::ADDSXri:
3479  case AArch64::SUBSWrr:
3480  case AArch64::SUBSXrr:
3481  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3482  case AArch64::SUBSWri:
3483  case AArch64::SUBSXri:
3484  return true;
3485  default:
3486  break;
3487  }
3488  return false;
3489 }
3490 
3491 // 32b Opcodes that can be combined with a MUL
3492 static bool isCombineInstrCandidate32(unsigned Opc) {
3493  switch (Opc) {
3494  case AArch64::ADDWrr:
3495  case AArch64::ADDWri:
3496  case AArch64::SUBWrr:
3497  case AArch64::ADDSWrr:
3498  case AArch64::ADDSWri:
3499  case AArch64::SUBSWrr:
3500  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3501  case AArch64::SUBWri:
3502  case AArch64::SUBSWri:
3503  return true;
3504  default:
3505  break;
3506  }
3507  return false;
3508 }
3509 
3510 // 64b Opcodes that can be combined with a MUL
3511 static bool isCombineInstrCandidate64(unsigned Opc) {
3512  switch (Opc) {
3513  case AArch64::ADDXrr:
3514  case AArch64::ADDXri:
3515  case AArch64::SUBXrr:
3516  case AArch64::ADDSXrr:
3517  case AArch64::ADDSXri:
3518  case AArch64::SUBSXrr:
3519  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3520  case AArch64::SUBXri:
3521  case AArch64::SUBSXri:
3522  return true;
3523  default:
3524  break;
3525  }
3526  return false;
3527 }
3528 
3529 // FP Opcodes that can be combined with a FMUL
3530 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
3531  switch (Inst.getOpcode()) {
3532  default:
3533  break;
3534  case AArch64::FADDSrr:
3535  case AArch64::FADDDrr:
3536  case AArch64::FADDv2f32:
3537  case AArch64::FADDv2f64:
3538  case AArch64::FADDv4f32:
3539  case AArch64::FSUBSrr:
3540  case AArch64::FSUBDrr:
3541  case AArch64::FSUBv2f32:
3542  case AArch64::FSUBv2f64:
3543  case AArch64::FSUBv4f32:
3544  TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
3545  return (Options.UnsafeFPMath ||
3546  Options.AllowFPOpFusion == FPOpFusion::Fast);
3547  }
3548  return false;
3549 }
3550 
3551 // Opcodes that can be combined with a MUL
3552 static bool isCombineInstrCandidate(unsigned Opc) {
3554 }
3555 
3556 //
3557 // Utility routine that checks if \param MO is defined by an
3558 // \param CombineOpc instruction in the basic block \param MBB
3560  unsigned CombineOpc, unsigned ZeroReg = 0,
3561  bool CheckZeroReg = false) {
3562  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3563  MachineInstr *MI = nullptr;
3564 
3566  MI = MRI.getUniqueVRegDef(MO.getReg());
3567  // And it needs to be in the trace (otherwise, it won't have a depth).
3568  if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
3569  return false;
3570  // Must only used by the user we combine with.
3571  if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
3572  return false;
3573 
3574  if (CheckZeroReg) {
3575  assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
3576  MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
3577  MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
3578  // The third input reg must be zero.
3579  if (MI->getOperand(3).getReg() != ZeroReg)
3580  return false;
3581  }
3582 
3583  return true;
3584 }
3585 
3586 //
3587 // Is \param MO defined by an integer multiply and can be combined?
3589  unsigned MulOpc, unsigned ZeroReg) {
3590  return canCombine(MBB, MO, MulOpc, ZeroReg, true);
3591 }
3592 
3593 //
3594 // Is \param MO defined by a floating-point multiply and can be combined?
3596  unsigned MulOpc) {
3597  return canCombine(MBB, MO, MulOpc);
3598 }
3599 
3600 // TODO: There are many more machine instruction opcodes to match:
3601 // 1. Other data types (integer, vectors)
3602 // 2. Other math / logic operations (xor, or)
3603 // 3. Other forms of the same operation (intrinsics and other variants)
3605  const MachineInstr &Inst) const {
3606  switch (Inst.getOpcode()) {
3607  case AArch64::FADDDrr:
3608  case AArch64::FADDSrr:
3609  case AArch64::FADDv2f32:
3610  case AArch64::FADDv2f64:
3611  case AArch64::FADDv4f32:
3612  case AArch64::FMULDrr:
3613  case AArch64::FMULSrr:
3614  case AArch64::FMULX32:
3615  case AArch64::FMULX64:
3616  case AArch64::FMULXv2f32:
3617  case AArch64::FMULXv2f64:
3618  case AArch64::FMULXv4f32:
3619  case AArch64::FMULv2f32:
3620  case AArch64::FMULv2f64:
3621  case AArch64::FMULv4f32:
3622  return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
3623  default:
3624  return false;
3625  }
3626 }
3627 
3628 /// Find instructions that can be turned into madd.
3629 static bool getMaddPatterns(MachineInstr &Root,
3631  unsigned Opc = Root.getOpcode();
3632  MachineBasicBlock &MBB = *Root.getParent();
3633  bool Found = false;
3634 
3635  if (!isCombineInstrCandidate(Opc))
3636  return false;
3637  if (isCombineInstrSettingFlag(Opc)) {
3638  int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
3639  // When NZCV is live bail out.
3640  if (Cmp_NZCV == -1)
3641  return false;
3642  unsigned NewOpc = convertToNonFlagSettingOpc(Root);
3643  // When opcode can't change bail out.
3644  // CHECKME: do we miss any cases for opcode conversion?
3645  if (NewOpc == Opc)
3646  return false;
3647  Opc = NewOpc;
3648  }
3649 
3650  switch (Opc) {
3651  default:
3652  break;
3653  case AArch64::ADDWrr:
3654  assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3655  "ADDWrr does not have register operands");
3656  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3657  AArch64::WZR)) {
3659  Found = true;
3660  }
3661  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3662  AArch64::WZR)) {
3664  Found = true;
3665  }
3666  break;
3667  case AArch64::ADDXrr:
3668  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3669  AArch64::XZR)) {
3671  Found = true;
3672  }
3673  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3674  AArch64::XZR)) {
3676  Found = true;
3677  }
3678  break;
3679  case AArch64::SUBWrr:
3680  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3681  AArch64::WZR)) {
3683  Found = true;
3684  }
3685  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3686  AArch64::WZR)) {
3688  Found = true;
3689  }
3690  break;
3691  case AArch64::SUBXrr:
3692  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3693  AArch64::XZR)) {
3695  Found = true;
3696  }
3697  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3698  AArch64::XZR)) {
3700  Found = true;
3701  }
3702  break;
3703  case AArch64::ADDWri:
3704  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3705  AArch64::WZR)) {
3707  Found = true;
3708  }
3709  break;
3710  case AArch64::ADDXri:
3711  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3712  AArch64::XZR)) {
3714  Found = true;
3715  }
3716  break;
3717  case AArch64::SUBWri:
3718  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3719  AArch64::WZR)) {
3721  Found = true;
3722  }
3723  break;
3724  case AArch64::SUBXri:
3725  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3726  AArch64::XZR)) {
3728  Found = true;
3729  }
3730  break;
3731  }
3732  return Found;
3733 }
3734 /// Floating-Point Support
3735 
3736 /// Find instructions that can be turned into madd.
3737 static bool getFMAPatterns(MachineInstr &Root,
3739 
3740  if (!isCombineInstrCandidateFP(Root))
3741  return false;
3742 
3743  MachineBasicBlock &MBB = *Root.getParent();
3744  bool Found = false;
3745 
3746  switch (Root.getOpcode()) {
3747  default:
3748  assert(false && "Unsupported FP instruction in combiner\n");
3749  break;
3750  case AArch64::FADDSrr:
3751  assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3752  "FADDWrr does not have register operands");
3753  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3755  Found = true;
3756  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3757  AArch64::FMULv1i32_indexed)) {
3759  Found = true;
3760  }
3761  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3763  Found = true;
3764  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3765  AArch64::FMULv1i32_indexed)) {
3767  Found = true;
3768  }
3769  break;
3770  case AArch64::FADDDrr:
3771  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3773  Found = true;
3774  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3775  AArch64::FMULv1i64_indexed)) {
3777  Found = true;
3778  }
3779  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3781  Found = true;
3782  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3783  AArch64::FMULv1i64_indexed)) {
3785  Found = true;
3786  }
3787  break;
3788  case AArch64::FADDv2f32:
3789  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3790  AArch64::FMULv2i32_indexed)) {
3792  Found = true;
3793  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3794  AArch64::FMULv2f32)) {
3796  Found = true;
3797  }
3798  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3799  AArch64::FMULv2i32_indexed)) {
3801  Found = true;
3802  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3803  AArch64::FMULv2f32)) {
3805  Found = true;
3806  }
3807  break;
3808  case AArch64::FADDv2f64:
3809  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3810  AArch64::FMULv2i64_indexed)) {
3812  Found = true;
3813  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3814  AArch64::FMULv2f64)) {
3816  Found = true;
3817  }
3818  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3819  AArch64::FMULv2i64_indexed)) {
3821  Found = true;
3822  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3823  AArch64::FMULv2f64)) {
3825  Found = true;
3826  }
3827  break;
3828  case AArch64::FADDv4f32:
3829  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3830  AArch64::FMULv4i32_indexed)) {
3832  Found = true;
3833  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3834  AArch64::FMULv4f32)) {
3836  Found = true;
3837  }
3838  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3839  AArch64::FMULv4i32_indexed)) {
3841  Found = true;
3842  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3843  AArch64::FMULv4f32)) {
3845  Found = true;
3846  }
3847  break;
3848 
3849  case AArch64::FSUBSrr:
3850  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3852  Found = true;
3853  }
3854  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3856  Found = true;
3857  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3858  AArch64::FMULv1i32_indexed)) {
3860  Found = true;
3861  }
3862  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) {
3864  Found = true;
3865  }
3866  break;
3867  case AArch64::FSUBDrr:
3868  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3870  Found = true;
3871  }
3872  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3874  Found = true;
3875  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3876  AArch64::FMULv1i64_indexed)) {
3878  Found = true;
3879  }
3880  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) {
3882  Found = true;
3883  }
3884  break;
3885  case AArch64::FSUBv2f32:
3886  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3887  AArch64::FMULv2i32_indexed)) {
3889  Found = true;
3890  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3891  AArch64::FMULv2f32)) {
3893  Found = true;
3894  }
3895  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3896  AArch64::FMULv2i32_indexed)) {
3898  Found = true;
3899  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3900  AArch64::FMULv2f32)) {
3902  Found = true;
3903  }
3904  break;
3905  case AArch64::FSUBv2f64:
3906  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3907  AArch64::FMULv2i64_indexed)) {
3909  Found = true;
3910  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3911  AArch64::FMULv2f64)) {
3913  Found = true;
3914  }
3915  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3916  AArch64::FMULv2i64_indexed)) {
3918  Found = true;
3919  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3920  AArch64::FMULv2f64)) {
3922  Found = true;
3923  }
3924  break;
3925  case AArch64::FSUBv4f32:
3926  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3927  AArch64::FMULv4i32_indexed)) {
3929  Found = true;
3930  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3931  AArch64::FMULv4f32)) {
3933  Found = true;
3934  }
3935  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3936  AArch64::FMULv4i32_indexed)) {
3938  Found = true;
3939  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3940  AArch64::FMULv4f32)) {
3942  Found = true;
3943  }
3944  break;
3945  }
3946  return Found;
3947 }
3948 
3949 /// Return true when a code sequence can improve throughput. It
3950 /// should be called only for instructions in loops.
3951 /// \param Pattern - combiner pattern
3953  MachineCombinerPattern Pattern) const {
3954  switch (Pattern) {
3955  default:
3956  break;
3991  return true;
3992  } // end switch (Pattern)
3993  return false;
3994 }
3995 /// Return true when there is potentially a faster code sequence for an
3996 /// instruction chain ending in \p Root. All potential patterns are listed in
3997 /// the \p Pattern vector. Pattern should be sorted in priority order since the
3998 /// pattern evaluator stops checking as soon as it finds a faster sequence.
3999 
4001  MachineInstr &Root,
4002  SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
4003  // Integer patterns
4004  if (getMaddPatterns(Root, Patterns))
4005  return true;
4006  // Floating point patterns
4007  if (getFMAPatterns(Root, Patterns))
4008  return true;
4009 
4010  return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
4011 }
4012 
4014 /// genFusedMultiply - Generate fused multiply instructions.
4015 /// This function supports both integer and floating point instructions.
4016 /// A typical example:
4017 /// F|MUL I=A,B,0
4018 /// F|ADD R,I,C
4019 /// ==> F|MADD R,A,B,C
4020 /// \param MF Containing MachineFunction
4021 /// \param MRI Register information
4022 /// \param TII Target information
4023 /// \param Root is the F|ADD instruction
4024 /// \param [out] InsInstrs is a vector of machine instructions and will
4025 /// contain the generated madd instruction
4026 /// \param IdxMulOpd is index of operand in Root that is the result of
4027 /// the F|MUL. In the example above IdxMulOpd is 1.
4028 /// \param MaddOpc the opcode fo the f|madd instruction
4029 /// \param RC Register class of operands
4030 /// \param kind of fma instruction (addressing mode) to be generated
4031 /// \param ReplacedAddend is the result register from the instruction
4032 /// replacing the non-combined operand, if any.
4033 static MachineInstr *
4035  const TargetInstrInfo *TII, MachineInstr &Root,
4036  SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
4037  unsigned MaddOpc, const TargetRegisterClass *RC,
4039  const unsigned *ReplacedAddend = nullptr) {
4040  assert(IdxMulOpd == 1 || IdxMulOpd == 2);
4041 
4042  unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
4043  MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
4044  unsigned ResultReg = Root.getOperand(0).getReg();
4045  unsigned SrcReg0 = MUL->getOperand(1).getReg();
4046  bool Src0IsKill = MUL->getOperand(1).isKill();
4047  unsigned SrcReg1 = MUL->getOperand(2).getReg();
4048  bool Src1IsKill = MUL->getOperand(2).isKill();
4049 
4050  unsigned SrcReg2;
4051  bool Src2IsKill;
4052  if (ReplacedAddend) {
4053  // If we just generated a new addend, we must be it's only use.
4054  SrcReg2 = *ReplacedAddend;
4055  Src2IsKill = true;
4056  } else {
4057  SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
4058  Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
4059  }
4060 
4062  MRI.constrainRegClass(ResultReg, RC);
4064  MRI.constrainRegClass(SrcReg0, RC);
4066  MRI.constrainRegClass(SrcReg1, RC);
4068  MRI.constrainRegClass(SrcReg2, RC);
4069 
4070  MachineInstrBuilder MIB;
4071  if (kind == FMAInstKind::Default)
4072  MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4073  .addReg(SrcReg0, getKillRegState(Src0IsKill))
4074  .addReg(SrcReg1, getKillRegState(Src1IsKill))
4075  .addReg(SrcReg2, getKillRegState(Src2IsKill));
4076  else if (kind == FMAInstKind::Indexed)
4077  MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4078  .addReg(SrcReg2, getKillRegState(Src2IsKill))
4079  .addReg(SrcReg0, getKillRegState(Src0IsKill))
4080  .addReg(SrcReg1, getKillRegState(Src1IsKill))
4081  .addImm(MUL->getOperand(3).getImm());
4082  else if (kind == FMAInstKind::Accumulator)
4083  MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4084  .addReg(SrcReg2, getKillRegState(Src2IsKill))
4085  .addReg(SrcReg0, getKillRegState(Src0IsKill))
4086  .addReg(SrcReg1, getKillRegState(Src1IsKill));
4087  else
4088  assert(false && "Invalid FMA instruction kind \n");
4089  // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
4090  InsInstrs.push_back(MIB);
4091  return MUL;
4092 }
4093 
4094 /// genMaddR - Generate madd instruction and combine mul and add using
4095 /// an extra virtual register
4096 /// Example - an ADD intermediate needs to be stored in a register:
4097 /// MUL I=A,B,0
4098 /// ADD R,I,Imm
4099 /// ==> ORR V, ZR, Imm
4100 /// ==> MADD R,A,B,V
4101 /// \param MF Containing MachineFunction
4102 /// \param MRI Register information
4103 /// \param TII Target information
4104 /// \param Root is the ADD instruction
4105 /// \param [out] InsInstrs is a vector of machine instructions and will
4106 /// contain the generated madd instruction
4107 /// \param IdxMulOpd is index of operand in Root that is the result of
4108 /// the MUL. In the example above IdxMulOpd is 1.
4109 /// \param MaddOpc the opcode fo the madd instruction
4110 /// \param VR is a virtual register that holds the value of an ADD operand
4111 /// (V in the example above).
4112 /// \param RC Register class of operands
4114  const TargetInstrInfo *TII, MachineInstr &Root,
4116  unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
4117  const TargetRegisterClass *RC) {
4118  assert(IdxMulOpd == 1 || IdxMulOpd == 2);
4119 
4120  MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
4121  unsigned ResultReg = Root.getOperand(0).getReg();
4122  unsigned SrcReg0 = MUL->getOperand(1).getReg();
4123  bool Src0IsKill = MUL->getOperand(1).isKill();
4124  unsigned SrcReg1 = MUL->getOperand(2).getReg();
4125  bool Src1IsKill = MUL->getOperand(2).isKill();
4126 
4128  MRI.constrainRegClass(ResultReg, RC);
4130  MRI.constrainRegClass(SrcReg0, RC);
4132  MRI.constrainRegClass(SrcReg1, RC);
4134  MRI.constrainRegClass(VR, RC);
4135 
4136  MachineInstrBuilder MIB =
4137  BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4138  .addReg(SrcReg0, getKillRegState(Src0IsKill))
4139  .addReg(SrcReg1, getKillRegState(Src1IsKill))
4140  .addReg(VR);
4141  // Insert the MADD
4142  InsInstrs.push_back(MIB);
4143  return MUL;
4144 }
4145 
4146 /// When getMachineCombinerPatterns() finds potential patterns,
4147 /// this function generates the instructions that could replace the
4148 /// original code sequence
4150  MachineInstr &Root, MachineCombinerPattern Pattern,
4153  DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
4154  MachineBasicBlock &MBB = *Root.getParent();
4155  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
4156  MachineFunction &MF = *MBB.getParent();
4157  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
4158 
4159  MachineInstr *MUL;
4160  const TargetRegisterClass *RC;
4161  unsigned Opc;
4162  switch (Pattern) {
4163  default:
4164  // Reassociate instructions.
4165  TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
4166  DelInstrs, InstrIdxForVirtReg);
4167  return;
4170  // MUL I=A,B,0
4171  // ADD R,I,C
4172  // ==> MADD R,A,B,C
4173  // --- Create(MADD);
4174  if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
4175  Opc = AArch64::MADDWrrr;
4176  RC = &AArch64::GPR32RegClass;
4177  } else {
4178  Opc = AArch64::MADDXrrr;
4179  RC = &AArch64::GPR64RegClass;
4180  }
4181  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4182  break;
4185  // MUL I=A,B,0
4186  // ADD R,C,I
4187  // ==> MADD R,A,B,C
4188  // --- Create(MADD);
4189  if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
4190  Opc = AArch64::MADDWrrr;
4191  RC = &AArch64::GPR32RegClass;
4192  } else {
4193  Opc = AArch64::MADDXrrr;
4194  RC = &AArch64::GPR64RegClass;
4195  }
4196  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4197  break;
4200  // MUL I=A,B,0
4201  // ADD R,I,Imm
4202  // ==> ORR V, ZR, Imm
4203  // ==> MADD R,A,B,V
4204  // --- Create(MADD);
4205  const TargetRegisterClass *OrrRC;
4206  unsigned BitSize, OrrOpc, ZeroReg;
4207  if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
4208  OrrOpc = AArch64::ORRWri;
4209  OrrRC = &AArch64::GPR32spRegClass;
4210  BitSize = 32;
4211  ZeroReg = AArch64::WZR;
4212  Opc = AArch64::MADDWrrr;
4213  RC = &AArch64::GPR32RegClass;
4214  } else {
4215  OrrOpc = AArch64::ORRXri;
4216  OrrRC = &AArch64::GPR64spRegClass;
4217  BitSize = 64;
4218  ZeroReg = AArch64::XZR;
4219  Opc = AArch64::MADDXrrr;
4220  RC = &AArch64::GPR64RegClass;
4221  }
4222  unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4223  uint64_t Imm = Root.getOperand(2).getImm();
4224 
4225  if (Root.getOperand(3).isImm()) {
4226  unsigned Val = Root.getOperand(3).getImm();
4227  Imm = Imm << Val;
4228  }
4229  uint64_t UImm = SignExtend64(Imm, BitSize);
4230  uint64_t Encoding;
4231  if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4232  MachineInstrBuilder MIB1 =
4233  BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4234  .addReg(ZeroReg)
4235  .addImm(Encoding);
4236  InsInstrs.push_back(MIB1);
4237  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4238  MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4239  }
4240  break;
4241  }
4244  // MUL I=A,B,0
4245  // SUB R,I, C
4246  // ==> SUB V, 0, C
4247  // ==> MADD R,A,B,V // = -C + A*B
4248  // --- Create(MADD);
4249  const TargetRegisterClass *SubRC;
4250  unsigned SubOpc, ZeroReg;
4251  if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
4252  SubOpc = AArch64::SUBWrr;
4253  SubRC = &AArch64::GPR32spRegClass;
4254  ZeroReg = AArch64::WZR;
4255  Opc = AArch64::MADDWrrr;
4256  RC = &AArch64::GPR32RegClass;
4257  } else {
4258  SubOpc = AArch64::SUBXrr;
4259  SubRC = &AArch64::GPR64spRegClass;
4260  ZeroReg = AArch64::XZR;
4261  Opc = AArch64::MADDXrrr;
4262  RC = &AArch64::GPR64RegClass;
4263  }
4264  unsigned NewVR = MRI.createVirtualRegister(SubRC);
4265  // SUB NewVR, 0, C
4266  MachineInstrBuilder MIB1 =
4267  BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
4268  .addReg(ZeroReg)
4269  .add(Root.getOperand(2));
4270  InsInstrs.push_back(MIB1);
4271  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4272  MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4273  break;
4274  }
4277  // MUL I=A,B,0
4278  // SUB R,C,I
4279  // ==> MSUB R,A,B,C (computes C - A*B)
4280  // --- Create(MSUB);
4281  if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
4282  Opc = AArch64::MSUBWrrr;
4283  RC = &AArch64::GPR32RegClass;
4284  } else {
4285  Opc = AArch64::MSUBXrrr;
4286  RC = &AArch64::GPR64RegClass;
4287  }
4288  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4289  break;
4292  // MUL I=A,B,0
4293  // SUB R,I, Imm
4294  // ==> ORR V, ZR, -Imm
4295  // ==> MADD R,A,B,V // = -Imm + A*B
4296  // --- Create(MADD);
4297  const TargetRegisterClass *OrrRC;
4298  unsigned BitSize, OrrOpc, ZeroReg;
4299  if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
4300  OrrOpc = AArch64::ORRWri;
4301  OrrRC = &AArch64::GPR32spRegClass;
4302  BitSize = 32;
4303  ZeroReg = AArch64::WZR;
4304  Opc = AArch64::MADDWrrr;
4305  RC = &AArch64::GPR32RegClass;
4306  } else {
4307  OrrOpc = AArch64::ORRXri;
4308  OrrRC = &AArch64::GPR64spRegClass;
4309  BitSize = 64;
4310  ZeroReg = AArch64::XZR;
4311  Opc = AArch64::MADDXrrr;
4312  RC = &AArch64::GPR64RegClass;
4313  }
4314  unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4315  uint64_t Imm = Root.getOperand(2).getImm();
4316  if (Root.getOperand(3).isImm()) {
4317  unsigned Val = Root.getOperand(3).getImm();
4318  Imm = Imm << Val;
4319  }
4320  uint64_t UImm = SignExtend64(-Imm, BitSize);
4321  uint64_t Encoding;
4322  if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4323  MachineInstrBuilder MIB1 =
4324  BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4325  .addReg(ZeroReg)
4326  .addImm(Encoding);
4327  InsInstrs.push_back(MIB1);
4328  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4329  MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4330  }
4331  break;
4332  }
4333  // Floating Point Support
4336  // MUL I=A,B,0
4337  // ADD R,I,C
4338  // ==> MADD R,A,B,C
4339  // --- Create(MADD);
4340  if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
4341  Opc = AArch64::FMADDSrrr;
4342  RC = &AArch64::FPR32RegClass;
4343  } else {
4344  Opc = AArch64::FMADDDrrr;
4345  RC = &AArch64::FPR64RegClass;
4346  }
4347  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4348  break;
4351  // FMUL I=A,B,0
4352  // FADD R,C,I
4353  // ==> FMADD R,A,B,C
4354  // --- Create(FMADD);
4355  if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
4356  Opc = AArch64::FMADDSrrr;
4357  RC = &AArch64::FPR32RegClass;
4358  } else {
4359  Opc = AArch64::FMADDDrrr;
4360  RC = &AArch64::FPR64RegClass;
4361  }
4362  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4363  break;
4364 
4366  Opc = AArch64::FMLAv1i32_indexed;
4367  RC = &AArch64::FPR32RegClass;
4368  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4370  break;
4372  Opc = AArch64::FMLAv1i32_indexed;
4373  RC = &AArch64::FPR32RegClass;
4374  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4376  break;
4377 
4379  Opc = AArch64::FMLAv1i64_indexed;
4380  RC = &AArch64::FPR64RegClass;
4381  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4383  break;
4385  Opc = AArch64::FMLAv1i64_indexed;
4386  RC = &AArch64::FPR64RegClass;
4387  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4389  break;
4390 
4393  RC = &AArch64::FPR64RegClass;
4395  Opc = AArch64::FMLAv2i32_indexed;
4396  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4398  } else {
4399  Opc = AArch64::FMLAv2f32;
4400  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4402  }
4403  break;
4406  RC = &AArch64::FPR64RegClass;
4408  Opc = AArch64::FMLAv2i32_indexed;
4409  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4411  } else {
4412  Opc = AArch64::FMLAv2f32;
4413  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4415  }
4416  break;
4417 
4420  RC = &AArch64::FPR128RegClass;
4422  Opc = AArch64::FMLAv2i64_indexed;
4423  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4425  } else {
4426  Opc = AArch64::FMLAv2f64;
4427  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4429  }
4430  break;
4433  RC = &AArch64::FPR128RegClass;
4435  Opc = AArch64::FMLAv2i64_indexed;
4436  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4438  } else {
4439  Opc = AArch64::FMLAv2f64;
4440  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4442  }
4443  break;
4444 
4447  RC = &AArch64::FPR128RegClass;
4449  Opc = AArch64::FMLAv4i32_indexed;
4450  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4452  } else {
4453  Opc = AArch64::FMLAv4f32;
4454  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4456  }
4457  break;
4458 
4461  RC = &AArch64::FPR128RegClass;
4463  Opc = AArch64::FMLAv4i32_indexed;
4464  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4466  } else {
4467  Opc = AArch64::FMLAv4f32;
4468  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4470  }
4471  break;
4472 
4475  // FMUL I=A,B,0
4476  // FSUB R,I,C
4477  // ==> FNMSUB R,A,B,C // = -C + A*B
4478  // --- Create(FNMSUB);
4479  if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
4480  Opc = AArch64::FNMSUBSrrr;
4481  RC = &AArch64::FPR32RegClass;
4482  } else {
4483  Opc = AArch64::FNMSUBDrrr;
4484  RC = &AArch64::FPR64RegClass;
4485  }
4486  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4487  break;
4488  }
4489 
4492  // FNMUL I=A,B,0
4493  // FSUB R,I,C
4494  // ==> FNMADD R,A,B,C // = -A*B - C
4495  // --- Create(FNMADD);
4496  if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) {
4497  Opc = AArch64::FNMADDSrrr;
4498  RC = &AArch64::FPR32RegClass;
4499  } else {
4500  Opc = AArch64::FNMADDDrrr;
4501  RC = &AArch64::FPR64RegClass;
4502  }
4503  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4504  break;
4505  }
4506 
4509  // FMUL I=A,B,0
4510  // FSUB R,C,I
4511  // ==> FMSUB R,A,B,C (computes C - A*B)
4512  // --- Create(FMSUB);
4513  if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
4514  Opc = AArch64::FMSUBSrrr;
4515  RC = &AArch64::FPR32RegClass;
4516  } else {
4517  Opc = AArch64::FMSUBDrrr;
4518  RC = &AArch64::FPR64RegClass;
4519  }
4520  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4521  break;
4522  }
4523 
4525  Opc = AArch64::FMLSv1i32_indexed;
4526  RC = &AArch64::FPR32RegClass;
4527  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4529  break;
4530 
4532  Opc = AArch64::FMLSv1i64_indexed;
4533  RC = &AArch64::FPR64RegClass;
4534  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4536  break;
4537 
4540  RC = &AArch64::FPR64RegClass;
4542  Opc = AArch64::FMLSv2i32_indexed;
4543  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4545  } else {
4546  Opc = AArch64::FMLSv2f32;
4547  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4549  }
4550  break;
4551 
4554  RC = &AArch64::FPR128RegClass;
4556  Opc = AArch64::FMLSv2i64_indexed;
4557  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4559  } else {
4560  Opc = AArch64::FMLSv2f64;
4561  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4563  }
4564  break;
4565 
4568  RC = &AArch64::FPR128RegClass;
4570  Opc = AArch64::FMLSv4i32_indexed;
4571  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4573  } else {
4574  Opc = AArch64::FMLSv4f32;
4575  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4577  }
4578  break;
4581  RC = &AArch64::FPR64RegClass;
4582  unsigned NewVR = MRI.createVirtualRegister(RC);
4583  MachineInstrBuilder MIB1 =
4584  BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
4585  .add(Root.getOperand(2));
4586  InsInstrs.push_back(MIB1);
4587  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4589  Opc = AArch64::FMLAv2i32_indexed;
4590  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4591  FMAInstKind::Indexed, &NewVR);
4592  } else {
4593  Opc = AArch64::FMLAv2f32;
4594  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4595  FMAInstKind::Accumulator, &NewVR);
4596  }
4597  break;
4598  }
4601  RC = &AArch64::FPR128RegClass;
4602  unsigned NewVR = MRI.createVirtualRegister(RC);
4603  MachineInstrBuilder MIB1 =
4604  BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
4605  .add(Root.getOperand(2));
4606  InsInstrs.push_back(MIB1);
4607  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4609  Opc = AArch64::FMLAv4i32_indexed;
4610  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4611  FMAInstKind::Indexed, &NewVR);
4612  } else {
4613  Opc = AArch64::FMLAv4f32;
4614  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4615  FMAInstKind::Accumulator, &NewVR);
4616  }
4617  break;
4618  }
4621  RC = &AArch64::FPR128RegClass;
4622  unsigned NewVR = MRI.createVirtualRegister(RC);
4623  MachineInstrBuilder MIB1 =
4624  BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
4625  .add(Root.getOperand(2));
4626  InsInstrs.push_back(MIB1);
4627  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4629  Opc = AArch64::FMLAv2i64_indexed;
4630  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4631  FMAInstKind::Indexed, &NewVR);
4632  } else {
4633  Opc = AArch64::FMLAv2f64;
4634  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4635  FMAInstKind::Accumulator, &NewVR);
4636  }
4637  break;
4638  }
4639  } // end switch (Pattern)
4640  // Record MUL and ADD/SUB for deletion
4641  DelInstrs.push_back(MUL);
4642  DelInstrs.push_back(&Root);
4643 }
4644 
4645 /// Replace csincr-branch sequence by simple conditional branch
4646 ///
4647 /// Examples:
4648 /// 1. \code
4649 /// csinc w9, wzr, wzr, <condition code>
4650 /// tbnz w9, #0, 0x44
4651 /// \endcode
4652 /// to
4653 /// \code
4654 /// b.<inverted condition code>
4655 /// \endcode
4656 ///
4657 /// 2. \code
4658 /// csinc w9, wzr, wzr, <condition code>
4659 /// tbz w9, #0, 0x44
4660 /// \endcode
4661 /// to
4662 /// \code
4663 /// b.<condition code>
4664 /// \endcode
4665 ///
4666 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
4667 /// compare's constant operand is power of 2.
4668 ///
4669 /// Examples:
4670 /// \code
4671 /// and w8, w8, #0x400
4672 /// cbnz w8, L1
4673 /// \endcode
4674 /// to
4675 /// \code
4676 /// tbnz w8, #10, L1
4677 /// \endcode
4678 ///
4679 /// \param MI Conditional Branch
4680 /// \return True when the simple conditional branch is generated
4681 ///
4683  bool IsNegativeBranch = false;
4684  bool IsTestAndBranch = false;
4685  unsigned TargetBBInMI = 0;
4686  switch (MI.getOpcode()) {
4687  default:
4688  llvm_unreachable("Unknown branch instruction?");
4689  case AArch64::Bcc:
4690  return false;
4691  case AArch64::CBZW:
4692  case AArch64::CBZX:
4693  TargetBBInMI = 1;
4694  break;
4695  case AArch64::CBNZW:
4696  case AArch64::CBNZX:
4697  TargetBBInMI = 1;
4698  IsNegativeBranch = true;
4699  break;
4700  case AArch64::TBZW:
4701  case AArch64::TBZX:
4702  TargetBBInMI = 2;
4703  IsTestAndBranch = true;
4704  break;
4705  case AArch64::TBNZW:
4706  case AArch64::TBNZX:
4707  TargetBBInMI = 2;
4708  IsNegativeBranch = true;
4709  IsTestAndBranch = true;
4710  break;
4711  }
4712  // So we increment a zero register and test for bits other
4713  // than bit 0? Conservatively bail out in case the verifier
4714  // missed this case.
4715  if (IsTestAndBranch && MI.getOperand(1).getImm())
4716  return false;
4717 
4718  // Find Definition.
4719  assert(MI.getParent() && "Incomplete machine instruciton\n");
4720  MachineBasicBlock *MBB = MI.getParent();
4721  MachineFunction *MF = MBB->getParent();
4722  MachineRegisterInfo *MRI = &MF->getRegInfo();
4723  unsigned VReg = MI.getOperand(0).getReg();
4725  return false;
4726 
4727  MachineInstr *DefMI = MRI->getVRegDef(VReg);
4728 
4729  // Look through COPY instructions to find definition.
4730  while (DefMI->isCopy()) {
4731  unsigned CopyVReg = DefMI->getOperand(1).getReg();
4732  if (!MRI->hasOneNonDBGUse(CopyVReg))
4733  return false;
4734  if (!MRI->hasOneDef(CopyVReg))
4735  return false;
4736  DefMI = MRI->getVRegDef(CopyVReg);
4737  }
4738 
4739  switch (DefMI->getOpcode()) {
4740  default:
4741  return false;
4742  // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
4743  case AArch64::ANDWri:
4744  case AArch64::ANDXri: {
4745  if (IsTestAndBranch)
4746  return false;
4747  if (DefMI->getParent() != MBB)
4748  return false;
4749  if (!MRI->hasOneNonDBGUse(VReg))
4750  return false;
4751 
4752  bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
4754  DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
4755  if (!isPowerOf2_64(Mask))
4756  return false;
4757 
4758  MachineOperand &MO = DefMI->getOperand(1);
4759  unsigned NewReg = MO.getReg();
4761  return false;
4762 
4763  assert(!MRI->def_empty(NewReg) && "Register must be defined.");
4764 
4765  MachineBasicBlock &RefToMBB = *MBB;
4766  MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
4767  DebugLoc DL = MI.getDebugLoc();
4768  unsigned Imm = Log2_64(Mask);
4769  unsigned Opc = (Imm < 32)
4770  ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
4771  : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
4772  MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
4773  .addReg(NewReg)
4774  .addImm(Imm)
4775  .addMBB(TBB);
4776  // Register lives on to the CBZ now.
4777  MO.setIsKill(false);
4778 
4779  // For immediate smaller than 32, we need to use the 32-bit
4780  // variant (W) in all cases. Indeed the 64-bit variant does not
4781  // allow to encode them.
4782  // Therefore, if the input register is 64-bit, we need to take the
4783  // 32-bit sub-part.
4784  if (!Is32Bit && Imm < 32)
4785  NewMI->getOperand(0).setSubReg(AArch64::sub_32);
4786  MI.eraseFromParent();
4787  return true;
4788  }
4789  // Look for CSINC
4790  case AArch64::CSINCWr:
4791  case AArch64::CSINCXr: {
4792  if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
4793  DefMI->getOperand(2).getReg() == AArch64::WZR) &&
4794  !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
4795  DefMI->getOperand(2).getReg() == AArch64::XZR))
4796  return false;
4797 
4798  if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
4799  return false;
4800 
4802  // Convert only when the condition code is not modified between
4803  // the CSINC and the branch. The CC may be used by other
4804  // instructions in between.
4806  return false;
4807  MachineBasicBlock &RefToMBB = *MBB;
4808  MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
4809  DebugLoc DL = MI.getDebugLoc();
4810  if (IsNegativeBranch)
4812  BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
4813  MI.eraseFromParent();
4814  return true;
4815  }
4816  }
4817 }
4818 
4819 std::pair<unsigned, unsigned>
4821  const unsigned Mask = AArch64II::MO_FRAGMENT;
4822  return std::make_pair(TF & Mask, TF & ~Mask);
4823 }
4824 
4827  using namespace AArch64II;
4828 
4829  static const std::pair<unsigned, const char *> TargetFlags[] = {
4830  {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
4831  {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
4832  {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
4833  {MO_HI12, "aarch64-hi12"}};
4834  return makeArrayRef(TargetFlags);
4835 }
4836 
4839  using namespace AArch64II;
4840 
4841  static const std::pair<unsigned, const char *> TargetFlags[] = {
4842  {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"}, {MO_TLS, "aarch64-tls"}};
4843  return makeArrayRef(TargetFlags);
4844 }
4845 
4848  static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
4849  {{MOSuppressPair, "aarch64-suppress-pair"},
4850  {MOStridedAccess, "aarch64-strided-access"}};
4851  return makeArrayRef(TargetFlags);
4852 }
4853 
4854  /// Constants defining how certain sequences should be outlined.
4855  /// This encompasses how an outlined function should be called, and what kind of
4856  /// frame should be emitted for that outlined function.
4857  ///
4858  /// \p MachineOutlinerDefault implies that the function should be called with
4859  /// a save and restore of LR to the stack.
4860  ///
4861  /// That is,
4862  ///
4863  /// I1 Save LR OUTLINED_FUNCTION:
4864  /// I2 --> BL OUTLINED_FUNCTION I1
4865  /// I3 Restore LR I2
4866  /// I3
4867  /// RET
4868  ///
4869  /// * Call construction overhead: 3 (save + BL + restore)
4870  /// * Frame construction overhead: 1 (ret)
4871  /// * Requires stack fixups? Yes
4872  ///
4873  /// \p MachineOutlinerTailCall implies that the function is being created from
4874  /// a sequence of instructions ending in a return.
4875  ///
4876  /// That is,
4877  ///
4878  /// I1 OUTLINED_FUNCTION:
4879  /// I2 --> B OUTLINED_FUNCTION I1
4880  /// RET I2
4881  /// RET
4882  ///
4883  /// * Call construction overhead: 1 (B)
4884  /// * Frame construction overhead: 0 (Return included in sequence)
4885  /// * Requires stack fixups? No
4886  ///
4887  /// \p MachineOutlinerNoLRSave implies that the function should be called using
4888  /// a BL instruction, but doesn't require LR to be saved and restored. This
4889  /// happens when LR is known to be dead.
4890  ///
4891  /// That is,
4892  ///
4893  /// I1 OUTLINED_FUNCTION:
4894  /// I2 --> BL OUTLINED_FUNCTION I1
4895  /// I3 I2
4896  /// I3
4897  /// RET
4898  ///
4899  /// * Call construction overhead: 1 (BL)
4900  /// * Frame construction overhead: 1 (RET)
4901  /// * Requires stack fixups? No
4902  ///
4903  /// \p MachineOutlinerThunk implies that the function is being created from
4904  /// a sequence of instructions ending in a call. The outlined function is
4905  /// called with a BL instruction, and the outlined function tail-calls the
4906  /// original call destination.
4907  ///
4908  /// That is,
4909  ///
4910  /// I1 OUTLINED_FUNCTION:
4911  /// I2 --> BL OUTLINED_FUNCTION I1
4912  /// BL f I2
4913  /// B f
4914  /// * Call construction overhead: 1 (BL)
4915  /// * Frame construction overhead: 0
4916  /// * Requires stack fixups? No
4917  ///
4919  MachineOutlinerDefault, /// Emit a save, restore, call, and return.
4920  MachineOutlinerTailCall, /// Only emit a branch.
4921  MachineOutlinerNoLRSave, /// Emit a call and return.
4922  MachineOutlinerThunk, /// Emit a call and tail-call.
4923 };
4924 
4927  HasCalls = 0x4
4928 };
4929 
4932  std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
4933  unsigned SequenceSize = std::accumulate(
4934  RepeatedSequenceLocs[0].front(),
4935  std::next(RepeatedSequenceLocs[0].back()),
4936  0, [this](unsigned Sum, const MachineInstr &MI) {
4937  return Sum + getInstSizeInBytes(MI);
4938  });
4939  unsigned CallID = MachineOutlinerDefault;
4940  unsigned FrameID = MachineOutlinerDefault;
4941  unsigned NumBytesForCall = 12;
4942  unsigned NumBytesToCreateFrame = 4;
4943 
4944  // Compute liveness information for each candidate.
4946  std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
4947  [&TRI](outliner::Candidate &C) { C.initLRU(TRI); });
4948 
4949  // According to the AArch64 Procedure Call Standard, the following are
4950  // undefined on entry/exit from a function call:
4951  //
4952  // * Registers x16, x17, (and thus w16, w17)
4953  // * Condition codes (and thus the NZCV register)
4954  //
4955  // Because if this, we can't outline any sequence of instructions where
4956  // one
4957  // of these registers is live into/across it. Thus, we need to delete
4958  // those
4959  // candidates.
4960  auto CantGuaranteeValueAcrossCall = [](outliner::Candidate &C) {
4961  LiveRegUnits LRU = C.LRU;
4962  return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) ||
4963  !LRU.available(AArch64::NZCV));
4964  };
4965 
4966  // Erase every candidate that violates the restrictions above. (It could be
4967  // true that we have viable candidates, so it's not worth bailing out in
4968  // the case that, say, 1 out of 20 candidates violate the restructions.)
4969  RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
4970  RepeatedSequenceLocs.end(),
4971  CantGuaranteeValueAcrossCall),
4972  RepeatedSequenceLocs.end());
4973 
4974  // At this point, we have only "safe" candidates to outline. Figure out
4975  // frame + call instruction information.
4976 
4977  unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
4978 
4979  // If the last instruction in any candidate is a terminator, then we should
4980  // tail call all of the candidates.
4981  if (RepeatedSequenceLocs[0].back()->isTerminator()) {
4982  CallID = MachineOutlinerTailCall;
4983  FrameID = MachineOutlinerTailCall;
4984  NumBytesForCall = 4;
4985  NumBytesToCreateFrame = 0;
4986  }
4987 
4988  else if (LastInstrOpcode == AArch64::BL || LastInstrOpcode == AArch64::BLR) {
4989  // FIXME: Do we need to check if the code after this uses the value of LR?
4990  CallID = MachineOutlinerThunk;
4991  FrameID = MachineOutlinerThunk;
4992  NumBytesForCall = 4;
4993  NumBytesToCreateFrame = 0;
4994  }
4995 
4996  // Make sure that LR isn't live on entry to this candidate. The only
4997  // instructions that use LR that could possibly appear in a repeated sequence
4998  // are calls. Therefore, we only have to check and see if LR is dead on entry
4999  // to (or exit from) some candidate.
5000  else if (std::all_of(RepeatedSequenceLocs.begin(),
5001  RepeatedSequenceLocs.end(),
5002  [](outliner::Candidate &C) {
5003  return C.LRU.available(AArch64::LR);
5004  })) {
5005  CallID = MachineOutlinerNoLRSave;
5006  FrameID = MachineOutlinerNoLRSave;
5007  NumBytesForCall = 4;
5008  NumBytesToCreateFrame = 4;
5009  }
5010 
5011  // Check if the range contains a call. These require a save + restore of the
5012  // link register.
5013  if (std::any_of(RepeatedSequenceLocs[0].front(),
5014  RepeatedSequenceLocs[0].back(),
5015  [](const MachineInstr &MI) { return MI.isCall(); }))
5016  NumBytesToCreateFrame += 8; // Save + restore the link register.
5017 
5018  // Handle the last instruction separately. If this is a tail call, then the
5019  // last instruction is a call. We don't want to save + restore in this case.
5020  // However, it could be possible that the last instruction is a call without
5021  // it being valid to tail call this sequence. We should consider this as well.
5022  else if (FrameID != MachineOutlinerThunk &&
5023  FrameID != MachineOutlinerTailCall &&
5024  RepeatedSequenceLocs[0].back()->isCall())
5025  NumBytesToCreateFrame += 8;
5026 
5027  return outliner::TargetCostInfo(SequenceSize, NumBytesForCall,
5028  NumBytesToCreateFrame, CallID, FrameID);
5029 }
5030 
5032  MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
5033  const Function &F = MF.getFunction();
5034 
5035  // Can F be deduplicated by the linker? If it can, don't outline from it.
5036  if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
5037  return false;
5038 
5039  // Don't outline from functions with section markings; the program could
5040  // expect that all the code is in the named section.
5041  // FIXME: Allow outlining from multiple functions with the same section
5042  // marking.
5043  if (F.hasSection())
5044  return false;
5045 
5046  // Outlining from functions with redzones is unsafe since the outliner may
5047  // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
5048  // outline from it.
5050  if (!AFI || AFI->hasRedZone().getValueOr(true))
5051  return false;
5052 
5053  // It's safe to outline from MF.
5054  return true;
5055 }
5056 
5057 unsigned
5059  unsigned Flags = 0x0;
5060  // Check if there's a call inside this MachineBasicBlock. If there is, then
5061  // set a flag.
5062  if (std::any_of(MBB.begin(), MBB.end(),
5063  [](MachineInstr &MI) { return MI.isCall(); }))
5065 
5066  // Check if LR is available through all of the MBB. If it's not, then set
5067  // a flag.
5069  "Suitable Machine Function for outlining must track liveness");
5071  LRU.addLiveOuts(MBB);
5072 
5073  std::for_each(MBB.rbegin(),
5074  MBB.rend(),
5075  [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
5076 
5077  if (!LRU.available(AArch64::LR))
5079 
5080  return Flags;
5081 }
5082 
5085  unsigned Flags) const {
5086  MachineInstr &MI = *MIT;
5087  MachineBasicBlock *MBB = MI.getParent();
5088  MachineFunction *MF = MBB->getParent();
5089  AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
5090 
5091  // Don't outline LOHs.
5092  if (FuncInfo->getLOHRelated().count(&MI))
5094 
5095  // Don't allow debug values to impact outlining type.
5096  if (MI.isDebugInstr() || MI.isIndirectDebugValue())
5098 
5099  // At this point, KILL instructions don't really tell us much so we can go
5100  // ahead and skip over them.
5101  if (MI.isKill())
5103 
5104  // Is this a terminator for a basic block?
5105  if (MI.isTerminator()) {
5106 
5107  // Is this the end of a function?
5108  if (MI.getParent()->succ_empty())
5110 
5111  // It's not, so don't outline it.
5113  }
5114 
5115  // Make sure none of the operands are un-outlinable.
5116  for (const MachineOperand &MOP : MI.operands()) {
5117  if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
5118  MOP.isTargetIndex())
5120 
5121  // If it uses LR or W30 explicitly, then don't touch it.
5122  if (MOP.isReg() && !MOP.isImplicit() &&
5123  (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
5125  }
5126 
5127  // Special cases for instructions that can always be outlined, but will fail
5128  // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
5129  // be outlined because they don't require a *specific* value to be in LR.
5130  if (MI.getOpcode() == AArch64::ADRP)
5132 
5133  // If MI is a call we might be able to outline it. We don't want to outline
5134  // any calls that rely on the position of items on the stack. When we outline
5135  // something containing a call, we have to emit a save and restore of LR in
5136  // the outlined function. Currently, this always happens by saving LR to the
5137  // stack. Thus, if we outline, say, half the parameters for a function call
5138  // plus the call, then we'll break the callee's expectations for the layout
5139  // of the stack.
5140  //
5141  // FIXME: Allow calls to functions which construct a stack frame, as long
5142  // as they don't access arguments on the stack.
5143  // FIXME: Figure out some way to analyze functions defined in other modules.
5144  // We should be able to compute the memory usage based on the IR calling
5145  // convention, even if we can't see the definition.
5146  if (MI.isCall()) {
5147  // Get the function associated with the call. Look at each operand and find
5148  // the one that represents the callee and get its name.
5149  const Function *Callee = nullptr;
5150  for (const MachineOperand &MOP : MI.operands()) {
5151  if (MOP.isGlobal()) {
5152  Callee = dyn_cast<Function>(MOP.getGlobal());
5153  break;
5154  }
5155  }
5156 
5157  // Never outline calls to mcount. There isn't any rule that would require
5158  // this, but the Linux kernel's "ftrace" feature depends on it.
5159  if (Callee && Callee->getName() == "\01_mcount")
5161 
5162  // If we don't know anything about the callee, assume it depends on the
5163  // stack layout of the caller. In that case, it's only legal to outline
5164  // as a tail-call. Whitelist the call instructions we know about so we
5165  // don't get unexpected results with call pseudo-instructions.
5166  auto UnknownCallOutlineType = outliner::InstrType::Illegal;
5167  if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL)
5168  UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
5169 
5170  if (!Callee)
5171  return UnknownCallOutlineType;
5172 
5173  // We have a function we have information about. Check it if it's something
5174  // can safely outline.
5175  MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
5176 
5177  // We don't know what's going on with the callee at all. Don't touch it.
5178  if (!CalleeMF)
5179  return UnknownCallOutlineType;
5180 
5181  // Check if we know anything about the callee saves on the function. If we
5182  // don't, then don't touch it, since that implies that we haven't
5183  // computed anything about its stack frame yet.
5184  MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
5185  if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
5186  MFI.getNumObjects() > 0)
5187  return UnknownCallOutlineType;
5188 
5189  // At this point, we can say that CalleeMF ought to not pass anything on the
5190  // stack. Therefore, we can outline it.
5192  }
5193 
5194  // Don't outline positions.
5195  if (MI.isPosition())
5197 
5198  // Don't touch the link register or W30.
5199  if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
5200  MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
5202 
5203  // Does this use the stack?
5204  if (MI.modifiesRegister(AArch64::SP, &RI) ||
5205  MI.readsRegister(AArch64::SP, &RI)) {
5206  // True if there is no chance that any outlined candidate from this range
5207  // could require stack fixups. That is, both
5208  // * LR is available in the range (No save/restore around call)
5209  // * The range doesn't include calls (No save/restore in outlined frame)
5210  // are true.
5211  // FIXME: This is very restrictive; the flags check the whole block,
5212  // not just the bit we will try to outline.
5213  bool MightNeedStackFixUp =
5216 
5217  // If this instruction is in a range where it *never* needs to be fixed
5218  // up, then we can *always* outline it. This is true even if it's not
5219  // possible to fix that instruction up.
5220  //
5221  // Why? Consider two equivalent instructions I1, I2 where both I1 and I2
5222  // use SP. Suppose that I1 sits within a range that definitely doesn't
5223  // need stack fixups, while I2 sits in a range that does.
5224  //
5225  // First, I1 can be outlined as long as we *never* fix up the stack in
5226  // any sequence containing it. I1 is already a safe instruction in the
5227  // original program, so as long as we don't modify it we're good to go.
5228  // So this leaves us with showing that outlining I2 won't break our
5229  // program.
5230  //
5231  // Suppose I1 and I2 belong to equivalent candidate sequences. When we
5232  // look at I2, we need to see if it can be fixed up. Suppose I2, (and
5233  // thus I1) cannot be fixed up. Then I2 will be assigned an unique
5234  // integer label; thus, I2 cannot belong to any candidate sequence (a
5235  // contradiction). Suppose I2 can be fixed up. Then I1 can be fixed up
5236  // as well, so we're good. Thus, I1 is always safe to outline.
5237  //
5238  // This gives us two things: first off, it buys us some more instructions
5239  // for our search space by deeming stack instructions illegal only when
5240  // they can't be fixed up AND we might have to fix them up. Second off,
5241  // This allows us to catch tricky instructions like, say,
5242  // %xi = ADDXri %sp, n, 0. We can't safely outline these since they might
5243  // be paired with later SUBXris, which might *not* end up being outlined.
5244  // If we mess with the stack to save something, then an ADDXri messes with
5245  // it *after*, then we aren't going to restore the right something from
5246  // the stack if we don't outline the corresponding SUBXri first. ADDXris and
5247  // SUBXris are extremely common in prologue/epilogue code, so supporting
5248  // them in the outliner can be a pretty big win!
5249  if (!MightNeedStackFixUp)
5251 
5252  // Any modification of SP will break our code to save/restore LR.
5253  // FIXME: We could handle some instructions which add a constant offset to
5254  // SP, with a bit more work.
5255  if (MI.modifiesRegister(AArch64::SP, &RI))
5257 
5258  // At this point, we have a stack instruction that we might need to fix
5259  // up. We'll handle it if it's a load or store.
5260  if (MI.mayLoadOrStore()) {
5261  unsigned Base; // Filled with the base regiser of MI.
5262  int64_t Offset; // Filled with the offset of MI.
5263  unsigned DummyWidth;
5264 
5265  // Does it allow us to offset the base register and is the base SP?
5266  if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) ||
5267  Base != AArch64::SP)
5269 
5270  // Find the minimum/maximum offset for this instruction and check if
5271  // fixing it up would be in range.
5272  int64_t MinOffset, MaxOffset; // Unscaled offsets for the instruction.
5273  unsigned Scale; // The scale to multiply the offsets by.
5274  getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
5275 
5276  // TODO: We should really test what happens if an instruction overflows.
5277  // This is tricky to test with IR tests, but when the outliner is moved
5278  // to a MIR test, it really ought to be checked.
5279  Offset += 16; // Update the offset to what it would be if we outlined.
5280  if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
5282 
5283  // It's in range, so we can outline it.
5285  }
5286 
5287  // FIXME: Add handling for instructions like "add x0, sp, #8".
5288 
5289  // We can't fix it up, so don't outline it.
5291  }
5292 
5294 }
5295 
5296 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
5297  for (MachineInstr &MI : MBB) {
5298  unsigned Base, Width;
5299  int64_t Offset;
5300 
5301  // Is this a load or store with an immediate offset with SP as the base?
5302  if (!MI.mayLoadOrStore() ||
5303  !getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Width, &RI) ||
5304  Base != AArch64::SP)
5305  continue;
5306 
5307  // It is, so we have to fix it up.
5308  unsigned Scale;
5309  int64_t Dummy1, Dummy2;
5310 
5311  MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
5312  assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
5313  getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
5314  assert(Scale != 0 && "Unexpected opcode!");
5315 
5316  // We've pushed the return address to the stack, so add 16 to the offset.
5317  // This is safe, since we already checked if it would overflow when we
5318  // checked if this instruction was legal to outline.
5319  int64_t NewImm = (Offset + 16) / Scale;
5320  StackOffsetOperand.setImm(NewImm);
5321  }
5322 }
5323 
5326  const outliner::TargetCostInfo &TCI) const {
5327  // For thunk outlining, rewrite the last instruction from a call to a
5328  // tail-call.
5330  MachineInstr *Call = &*--MBB.instr_end();
5331  unsigned TailOpcode;
5332  if (Call->getOpcode() == AArch64::BL) {
5333  TailOpcode = AArch64::TCRETURNdi;
5334  } else {
5335  assert(Call->getOpcode() == AArch64::BLR);
5336  TailOpcode = AArch64::TCRETURNri;
5337  }
5338  MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
5339  .add(Call->getOperand(0))
5340  .addImm(0);
5341  MBB.insert(MBB.end(), TC);
5342  Call->eraseFromParent();
5343  }
5344 
5345  // Is there a call in the outlined range?
5346  auto IsNonTailCall = [](MachineInstr &MI) {
5347  return MI.isCall() && !MI.isReturn();
5348  };
5349  if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) {
5350  // Fix up the instructions in the range, since we're going to modify the
5351  // stack.
5353  "Can only fix up stack references once");
5354  fixupPostOutline(MBB);
5355 
5356  // LR has to be a live in so that we can save it.
5357  MBB.addLiveIn(AArch64::LR);
5358 
5360  MachineBasicBlock::iterator Et = MBB.end();
5361 
5364  Et = std::prev(MBB.end());
5365 
5366  // Insert a save before the outlined region
5367  MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
5368  .addReg(AArch64::SP, RegState::Define)
5369  .addReg(AArch64::LR)
5370  .addReg(AArch64::SP)
5371  .addImm(-16);
5372  It = MBB.insert(It, STRXpre);
5373 
5374  const TargetSubtargetInfo &STI = MF.getSubtarget();
5375  const MCRegisterInfo *MRI = STI.getRegisterInfo();
5376  unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
5377 
5378  // Add a CFI saying the stack was moved 16 B down.
5379  int64_t StackPosEntry =
5381  BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
5382  .addCFIIndex(StackPosEntry)
5384 
5385  // Add a CFI saying that the LR that we want to find is now 16 B higher than
5386  // before.
5387  int64_t LRPosEntry =
5388  MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, 16));
5389  BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
5390  .addCFIIndex(LRPosEntry)
5392 
5393  // Insert a restore before the terminator for the function.
5394  MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
5395  .addReg(AArch64::SP, RegState::Define)
5396  .addReg(AArch64::LR, RegState::Define)
5397  .addReg(AArch64::SP)
5398  .addImm(16);
5399  Et = MBB.insert(Et, LDRXpost);
5400  }
5401 
5402  // If this is a tail call outlined function, then there's already a return.
5405  return;
5406 
5407  // It's not a tail call, so we have to insert the return ourselves.
5408  MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
5409  .addReg(AArch64::LR, RegState::Undef);
5410  MBB.insert(MBB.end(), ret);
5411 
5412  // Did we have to modify the stack by saving the link register?
5414  return;
5415 
5416  // We modified the stack.
5417  // Walk over the basic block and fix up all the stack accesses.
5418  fixupPostOutline(MBB);
5419 }
5420 
5423  MachineFunction &MF, const outliner::TargetCostInfo &TCI) const {
5424 
5425  // Are we tail calling?
5427  // If yes, then we can just branch to the label.
5428  It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
5429  .addGlobalAddress(M.getNamedValue(MF.getName()))
5430  .addImm(0));
5431  return It;
5432  }
5433 
5434  // Are we saving the link register?
5437  // No, so just insert the call.
5438  It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
5439  .addGlobalAddress(M.getNamedValue(MF.getName())));
5440  return It;
5441  }
5442 
5443  // We want to return the spot where we inserted the call.
5445 
5446  // We have a default call. Save the link register.
5447  MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
5448  .addReg(AArch64::SP, RegState::Define)
5449  .addReg(AArch64::LR)
5450  .addReg(AArch64::SP)
5451  .addImm(-16);
5452  It = MBB.insert(It, STRXpre);
5453  It++;
5454 
5455  // Insert the call.
5456  It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
5457  .addGlobalAddress(M.getNamedValue(MF.getName())));
5458  CallPt = It;
5459  It++;
5460 
5461  // Restore the link register.
5462  MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
5463  .addReg(AArch64::SP, RegState::Define)
5464  .addReg(AArch64::LR, RegState::Define)
5465  .addReg(AArch64::SP)
5466  .addImm(16);
5467  It = MBB.insert(It, LDRXpost);
5468 
5469  return CallPt;
5470 }
uint64_t CallInst * C
static bool isStridedAccess(const MachineInstr &MI)
Return true if the given load or store is a strided memory access.
ArrayRef< std::pair< unsigned, const char * > > getSerializableBitmaskMachineOperandTargetFlags() const override
const MachineInstrBuilder & add(const MachineOperand &MO) const
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address...
outliner::TargetCostInfo getOutliningCandidateInfo(std::vector< outliner::Candidate > &RepeatedSequenceLocs) const override
unsigned CallConstructionID
Represents the specific instructions that must be emitted to construct a call to this candidate...
static bool isFPRCopy(const MachineInstr &MI)
Does this instruction rename an FPR without modifying bits?
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
instr_iterator instr_begin()
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:485
instr_iterator instr_end()
MachineBasicBlock * getMBB() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
unsigned getNumObjects() const
Return the number of objects.
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) const override
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
static bool isGPRCopy(const MachineInstr &MI)
Does this instruction rename a GPR without modifying bits?
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern > &Patterns)
Floating-Point Support.
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:285
bool hasZeroCycleZeroing() const
static CondCode getInvertedCondCode(CondCode Code)
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:162
unsigned getReg() const
getReg - Returns the register number.
void setIsUndef(bool Val=true)