LLVM  9.0.0svn
AArch64InstrInfo.cpp
Go to the documentation of this file.
1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the AArch64 implementation of the TargetInstrInfo class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AArch64InstrInfo.h"
15 #include "AArch64Subtarget.h"
17 #include "Utils/AArch64BaseInfo.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/CodeGen/StackMaps.h"
33 #include "llvm/IR/DebugLoc.h"
34 #include "llvm/IR/GlobalValue.h"
35 #include "llvm/MC/MCInst.h"
36 #include "llvm/MC/MCInstrDesc.h"
37 #include "llvm/Support/Casting.h"
38 #include "llvm/Support/CodeGen.h"
40 #include "llvm/Support/Compiler.h"
45 #include <cassert>
46 #include <cstdint>
47 #include <iterator>
48 #include <utility>
49 
50 using namespace llvm;
51 
52 #define GET_INSTRINFO_CTOR_DTOR
53 #include "AArch64GenInstrInfo.inc"
54 
56  "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
57  cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
58 
60  "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
61  cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
62 
63 static cl::opt<unsigned>
64  BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
65  cl::desc("Restrict range of Bcc instructions (DEBUG)"));
66 
68  : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
69  AArch64::CATCHRET),
70  RI(STI.getTargetTriple()), Subtarget(STI) {}
71 
72 /// GetInstSize - Return the number of bytes of code the specified
73 /// instruction may be. This returns the maximum number of bytes.
75  const MachineBasicBlock &MBB = *MI.getParent();
76  const MachineFunction *MF = MBB.getParent();
77  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
78 
79  if (MI.getOpcode() == AArch64::INLINEASM)
80  return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
81 
82  // FIXME: We currently only handle pseudoinstructions that don't get expanded
83  // before the assembly printer.
84  unsigned NumBytes = 0;
85  const MCInstrDesc &Desc = MI.getDesc();
86  switch (Desc.getOpcode()) {
87  default:
88  // Anything not explicitly designated otherwise is a normal 4-byte insn.
89  NumBytes = 4;
90  break;
91  case TargetOpcode::DBG_VALUE:
93  case TargetOpcode::IMPLICIT_DEF:
94  case TargetOpcode::KILL:
95  NumBytes = 0;
96  break;
97  case TargetOpcode::STACKMAP:
98  // The upper bound for a stackmap intrinsic is the full length of its shadow
99  NumBytes = StackMapOpers(&MI).getNumPatchBytes();
100  assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
101  break;
102  case TargetOpcode::PATCHPOINT:
103  // The size of the patchpoint intrinsic is the number of bytes requested
104  NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
105  assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
106  break;
108  // This gets lowered to an instruction sequence which takes 16 bytes
109  NumBytes = 16;
110  break;
111  case AArch64::JumpTableDest32:
112  case AArch64::JumpTableDest16:
113  case AArch64::JumpTableDest8:
114  NumBytes = 12;
115  break;
116  case AArch64::SPACE:
117  NumBytes = MI.getOperand(1).getImm();
118  break;
119  }
120 
121  return NumBytes;
122 }
123 
126  // Block ends with fall-through condbranch.
127  switch (LastInst->getOpcode()) {
128  default:
129  llvm_unreachable("Unknown branch instruction?");
130  case AArch64::Bcc:
131  Target = LastInst->getOperand(1).getMBB();
132  Cond.push_back(LastInst->getOperand(0));
133  break;
134  case AArch64::CBZW:
135  case AArch64::CBZX:
136  case AArch64::CBNZW:
137  case AArch64::CBNZX:
138  Target = LastInst->getOperand(1).getMBB();
140  Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
141  Cond.push_back(LastInst->getOperand(0));
142  break;
143  case AArch64::TBZW:
144  case AArch64::TBZX:
145  case AArch64::TBNZW:
146  case AArch64::TBNZX:
147  Target = LastInst->getOperand(2).getMBB();
149  Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
150  Cond.push_back(LastInst->getOperand(0));
151  Cond.push_back(LastInst->getOperand(1));
152  }
153 }
154 
155 static unsigned getBranchDisplacementBits(unsigned Opc) {
156  switch (Opc) {
157  default:
158  llvm_unreachable("unexpected opcode!");
159  case AArch64::B:
160  return 64;
161  case AArch64::TBNZW:
162  case AArch64::TBZW:
163  case AArch64::TBNZX:
164  case AArch64::TBZX:
165  return TBZDisplacementBits;
166  case AArch64::CBNZW:
167  case AArch64::CBZW:
168  case AArch64::CBNZX:
169  case AArch64::CBZX:
170  return CBZDisplacementBits;
171  case AArch64::Bcc:
172  return BCCDisplacementBits;
173  }
174 }
175 
177  int64_t BrOffset) const {
178  unsigned Bits = getBranchDisplacementBits(BranchOp);
179  assert(Bits >= 3 && "max branch displacement must be enough to jump"
180  "over conditional branch expansion");
181  return isIntN(Bits, BrOffset / 4);
182 }
183 
186  switch (MI.getOpcode()) {
187  default:
188  llvm_unreachable("unexpected opcode!");
189  case AArch64::B:
190  return MI.getOperand(0).getMBB();
191  case AArch64::TBZW:
192  case AArch64::TBNZW:
193  case AArch64::TBZX:
194  case AArch64::TBNZX:
195  return MI.getOperand(2).getMBB();
196  case AArch64::CBZW:
197  case AArch64::CBNZW:
198  case AArch64::CBZX:
199  case AArch64::CBNZX:
200  case AArch64::Bcc:
201  return MI.getOperand(1).getMBB();
202  }
203 }
204 
205 // Branch analysis.
207  MachineBasicBlock *&TBB,
208  MachineBasicBlock *&FBB,
210  bool AllowModify) const {
211  // If the block has no terminators, it just falls into the block after it.
213  if (I == MBB.end())
214  return false;
215 
216  if (!isUnpredicatedTerminator(*I))
217  return false;
218 
219  // Get the last instruction in the block.
220  MachineInstr *LastInst = &*I;
221 
222  // If there is only one terminator instruction, process it.
223  unsigned LastOpc = LastInst->getOpcode();
224  if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
225  if (isUncondBranchOpcode(LastOpc)) {
226  TBB = LastInst->getOperand(0).getMBB();
227  return false;
228  }
229  if (isCondBranchOpcode(LastOpc)) {
230  // Block ends with fall-through condbranch.
231  parseCondBranch(LastInst, TBB, Cond);
232  return false;
233  }
234  return true; // Can't handle indirect branch.
235  }
236 
237  // Get the instruction before it if it is a terminator.
238  MachineInstr *SecondLastInst = &*I;
239  unsigned SecondLastOpc = SecondLastInst->getOpcode();
240 
241  // If AllowModify is true and the block ends with two or more unconditional
242  // branches, delete all but the first unconditional branch.
243  if (AllowModify && isUncondBranchOpcode(LastOpc)) {
244  while (isUncondBranchOpcode(SecondLastOpc)) {
245  LastInst->eraseFromParent();
246  LastInst = SecondLastInst;
247  LastOpc = LastInst->getOpcode();
248  if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
249  // Return now the only terminator is an unconditional branch.
250  TBB = LastInst->getOperand(0).getMBB();
251  return false;
252  } else {
253  SecondLastInst = &*I;
254  SecondLastOpc = SecondLastInst->getOpcode();
255  }
256  }
257  }
258 
259  // If there are three terminators, we don't know what sort of block this is.
260  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
261  return true;
262 
263  // If the block ends with a B and a Bcc, handle it.
264  if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
265  parseCondBranch(SecondLastInst, TBB, Cond);
266  FBB = LastInst->getOperand(0).getMBB();
267  return false;
268  }
269 
270  // If the block ends with two unconditional branches, handle it. The second
271  // one is not executed, so remove it.
272  if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
273  TBB = SecondLastInst->getOperand(0).getMBB();
274  I = LastInst;
275  if (AllowModify)
276  I->eraseFromParent();
277  return false;
278  }
279 
280  // ...likewise if it ends with an indirect branch followed by an unconditional
281  // branch.
282  if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
283  I = LastInst;
284  if (AllowModify)
285  I->eraseFromParent();
286  return true;
287  }
288 
289  // Otherwise, can't handle this.
290  return true;
291 }
292 
294  SmallVectorImpl<MachineOperand> &Cond) const {
295  if (Cond[0].getImm() != -1) {
296  // Regular Bcc
297  AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
298  Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
299  } else {
300  // Folded compare-and-branch
301  switch (Cond[1].getImm()) {
302  default:
303  llvm_unreachable("Unknown conditional branch!");
304  case AArch64::CBZW:
305  Cond[1].setImm(AArch64::CBNZW);
306  break;
307  case AArch64::CBNZW:
308  Cond[1].setImm(AArch64::CBZW);
309  break;
310  case AArch64::CBZX:
311  Cond[1].setImm(AArch64::CBNZX);
312  break;
313  case AArch64::CBNZX:
314  Cond[1].setImm(AArch64::CBZX);
315  break;
316  case AArch64::TBZW:
317  Cond[1].setImm(AArch64::TBNZW);
318  break;
319  case AArch64::TBNZW:
320  Cond[1].setImm(AArch64::TBZW);
321  break;
322  case AArch64::TBZX:
323  Cond[1].setImm(AArch64::TBNZX);
324  break;
325  case AArch64::TBNZX:
326  Cond[1].setImm(AArch64::TBZX);
327  break;
328  }
329  }
330 
331  return false;
332 }
333 
335  int *BytesRemoved) const {
337  if (I == MBB.end())
338  return 0;
339 
340  if (!isUncondBranchOpcode(I->getOpcode()) &&
341  !isCondBranchOpcode(I->getOpcode()))
342  return 0;
343 
344  // Remove the branch.
345  I->eraseFromParent();
346 
347  I = MBB.end();
348 
349  if (I == MBB.begin()) {
350  if (BytesRemoved)
351  *BytesRemoved = 4;
352  return 1;
353  }
354  --I;
355  if (!isCondBranchOpcode(I->getOpcode())) {
356  if (BytesRemoved)
357  *BytesRemoved = 4;
358  return 1;
359  }
360 
361  // Remove the branch.
362  I->eraseFromParent();
363  if (BytesRemoved)
364  *BytesRemoved = 8;
365 
366  return 2;
367 }
368 
369 void AArch64InstrInfo::instantiateCondBranch(
370  MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
371  ArrayRef<MachineOperand> Cond) const {
372  if (Cond[0].getImm() != -1) {
373  // Regular Bcc
374  BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
375  } else {
376  // Folded compare-and-branch
377  // Note that we use addOperand instead of addReg to keep the flags.
378  const MachineInstrBuilder MIB =
379  BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
380  if (Cond.size() > 3)
381  MIB.addImm(Cond[3].getImm());
382  MIB.addMBB(TBB);
383  }
384 }
385 
388  ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
389  // Shouldn't be a fall through.
390  assert(TBB && "insertBranch must not be told to insert a fallthrough");
391 
392  if (!FBB) {
393  if (Cond.empty()) // Unconditional branch?
394  BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
395  else
396  instantiateCondBranch(MBB, DL, TBB, Cond);
397 
398  if (BytesAdded)
399  *BytesAdded = 4;
400 
401  return 1;
402  }
403 
404  // Two-way conditional branch.
405  instantiateCondBranch(MBB, DL, TBB, Cond);
406  BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
407 
408  if (BytesAdded)
409  *BytesAdded = 8;
410 
411  return 2;
412 }
413 
414 // Find the original register that VReg is copied from.
415 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
417  const MachineInstr *DefMI = MRI.getVRegDef(VReg);
418  if (!DefMI->isFullCopy())
419  return VReg;
420  VReg = DefMI->getOperand(1).getReg();
421  }
422  return VReg;
423 }
424 
425 // Determine if VReg is defined by an instruction that can be folded into a
426 // csel instruction. If so, return the folded opcode, and the replacement
427 // register.
428 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
429  unsigned *NewVReg = nullptr) {
430  VReg = removeCopies(MRI, VReg);
432  return 0;
433 
434  bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
435  const MachineInstr *DefMI = MRI.getVRegDef(VReg);
436  unsigned Opc = 0;
437  unsigned SrcOpNum = 0;
438  switch (DefMI->getOpcode()) {
439  case AArch64::ADDSXri:
440  case AArch64::ADDSWri:
441  // if NZCV is used, do not fold.
442  if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
443  return 0;
444  // fall-through to ADDXri and ADDWri.
446  case AArch64::ADDXri:
447  case AArch64::ADDWri:
448  // add x, 1 -> csinc.
449  if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
450  DefMI->getOperand(3).getImm() != 0)
451  return 0;
452  SrcOpNum = 1;
453  Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
454  break;
455 
456  case AArch64::ORNXrr:
457  case AArch64::ORNWrr: {
458  // not x -> csinv, represented as orn dst, xzr, src.
459  unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
460  if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
461  return 0;
462  SrcOpNum = 2;
463  Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
464  break;
465  }
466 
467  case AArch64::SUBSXrr:
468  case AArch64::SUBSWrr:
469  // if NZCV is used, do not fold.
470  if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
471  return 0;
472  // fall-through to SUBXrr and SUBWrr.
474  case AArch64::SUBXrr:
475  case AArch64::SUBWrr: {
476  // neg x -> csneg, represented as sub dst, xzr, src.
477  unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
478  if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
479  return 0;
480  SrcOpNum = 2;
481  Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
482  break;
483  }
484  default:
485  return 0;
486  }
487  assert(Opc && SrcOpNum && "Missing parameters");
488 
489  if (NewVReg)
490  *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
491  return Opc;
492 }
493 
496  unsigned TrueReg, unsigned FalseReg,
497  int &CondCycles, int &TrueCycles,
498  int &FalseCycles) const {
499  // Check register classes.
500  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
501  const TargetRegisterClass *RC =
502  RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
503  if (!RC)
504  return false;
505 
506  // Expanding cbz/tbz requires an extra cycle of latency on the condition.
507  unsigned ExtraCondLat = Cond.size() != 1;
508 
509  // GPRs are handled by csel.
510  // FIXME: Fold in x+1, -x, and ~x when applicable.
511  if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
512  AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
513  // Single-cycle csel, csinc, csinv, and csneg.
514  CondCycles = 1 + ExtraCondLat;
515  TrueCycles = FalseCycles = 1;
516  if (canFoldIntoCSel(MRI, TrueReg))
517  TrueCycles = 0;
518  else if (canFoldIntoCSel(MRI, FalseReg))
519  FalseCycles = 0;
520  return true;
521  }
522 
523  // Scalar floating point is handled by fcsel.
524  // FIXME: Form fabs, fmin, and fmax when applicable.
525  if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
526  AArch64::FPR32RegClass.hasSubClassEq(RC)) {
527  CondCycles = 5 + ExtraCondLat;
528  TrueCycles = FalseCycles = 2;
529  return true;
530  }
531 
532  // Can't do vectors.
533  return false;
534 }
535 
538  const DebugLoc &DL, unsigned DstReg,
540  unsigned TrueReg, unsigned FalseReg) const {
542 
543  // Parse the condition code, see parseCondBranch() above.
545  switch (Cond.size()) {
546  default:
547  llvm_unreachable("Unknown condition opcode in Cond");
548  case 1: // b.cc
549  CC = AArch64CC::CondCode(Cond[0].getImm());
550  break;
551  case 3: { // cbz/cbnz
552  // We must insert a compare against 0.
553  bool Is64Bit;
554  switch (Cond[1].getImm()) {
555  default:
556  llvm_unreachable("Unknown branch opcode in Cond");
557  case AArch64::CBZW:
558  Is64Bit = false;
559  CC = AArch64CC::EQ;
560  break;
561  case AArch64::CBZX:
562  Is64Bit = true;
563  CC = AArch64CC::EQ;
564  break;
565  case AArch64::CBNZW:
566  Is64Bit = false;
567  CC = AArch64CC::NE;
568  break;
569  case AArch64::CBNZX:
570  Is64Bit = true;
571  CC = AArch64CC::NE;
572  break;
573  }
574  unsigned SrcReg = Cond[2].getReg();
575  if (Is64Bit) {
576  // cmp reg, #0 is actually subs xzr, reg, #0.
577  MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
578  BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
579  .addReg(SrcReg)
580  .addImm(0)
581  .addImm(0);
582  } else {
583  MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
584  BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
585  .addReg(SrcReg)
586  .addImm(0)
587  .addImm(0);
588  }
589  break;
590  }
591  case 4: { // tbz/tbnz
592  // We must insert a tst instruction.
593  switch (Cond[1].getImm()) {
594  default:
595  llvm_unreachable("Unknown branch opcode in Cond");
596  case AArch64::TBZW:
597  case AArch64::TBZX:
598  CC = AArch64CC::EQ;
599  break;
600  case AArch64::TBNZW:
601  case AArch64::TBNZX:
602  CC = AArch64CC::NE;
603  break;
604  }
605  // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
606  if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
607  BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
608  .addReg(Cond[2].getReg())
609  .addImm(
610  AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
611  else
612  BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
613  .addReg(Cond[2].getReg())
614  .addImm(
615  AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
616  break;
617  }
618  }
619 
620  unsigned Opc = 0;
621  const TargetRegisterClass *RC = nullptr;
622  bool TryFold = false;
623  if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
624  RC = &AArch64::GPR64RegClass;
625  Opc = AArch64::CSELXr;
626  TryFold = true;
627  } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
628  RC = &AArch64::GPR32RegClass;
629  Opc = AArch64::CSELWr;
630  TryFold = true;
631  } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
632  RC = &AArch64::FPR64RegClass;
633  Opc = AArch64::FCSELDrrr;
634  } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
635  RC = &AArch64::FPR32RegClass;
636  Opc = AArch64::FCSELSrrr;
637  }
638  assert(RC && "Unsupported regclass");
639 
640  // Try folding simple instructions into the csel.
641  if (TryFold) {
642  unsigned NewVReg = 0;
643  unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
644  if (FoldedOpc) {
645  // The folded opcodes csinc, csinc and csneg apply the operation to
646  // FalseReg, so we need to invert the condition.
648  TrueReg = FalseReg;
649  } else
650  FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
651 
652  // Fold the operation. Leave any dead instructions for DCE to clean up.
653  if (FoldedOpc) {
654  FalseReg = NewVReg;
655  Opc = FoldedOpc;
656  // The extends the live range of NewVReg.
657  MRI.clearKillFlags(NewVReg);
658  }
659  }
660 
661  // Pull all virtual register into the appropriate class.
662  MRI.constrainRegClass(TrueReg, RC);
663  MRI.constrainRegClass(FalseReg, RC);
664 
665  // Insert the csel.
666  BuildMI(MBB, I, DL, get(Opc), DstReg)
667  .addReg(TrueReg)
668  .addReg(FalseReg)
669  .addImm(CC);
670 }
671 
672 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
673 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
674  uint64_t Imm = MI.getOperand(1).getImm();
675  uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
676  uint64_t Encoding;
677  return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
678 }
679 
680 // FIXME: this implementation should be micro-architecture dependent, so a
681 // micro-architecture target hook should be introduced here in future.
683  if (!Subtarget.hasCustomCheapAsMoveHandling())
684  return MI.isAsCheapAsAMove();
685 
686  const unsigned Opcode = MI.getOpcode();
687 
688  // Firstly, check cases gated by features.
689 
690  if (Subtarget.hasZeroCycleZeroingFP()) {
691  if (Opcode == AArch64::FMOVH0 ||
692  Opcode == AArch64::FMOVS0 ||
693  Opcode == AArch64::FMOVD0)
694  return true;
695  }
696 
697  if (Subtarget.hasZeroCycleZeroingGP()) {
698  if (Opcode == TargetOpcode::COPY &&
699  (MI.getOperand(1).getReg() == AArch64::WZR ||
700  MI.getOperand(1).getReg() == AArch64::XZR))
701  return true;
702  }
703 
704  // Secondly, check cases specific to sub-targets.
705 
706  if (Subtarget.hasExynosCheapAsMoveHandling()) {
707  if (isExynosCheapAsMove(MI))
708  return true;
709 
710  return MI.isAsCheapAsAMove();
711  }
712 
713  // Finally, check generic cases.
714 
715  switch (Opcode) {
716  default:
717  return false;
718 
719  // add/sub on register without shift
720  case AArch64::ADDWri:
721  case AArch64::ADDXri:
722  case AArch64::SUBWri:
723  case AArch64::SUBXri:
724  return (MI.getOperand(3).getImm() == 0);
725 
726  // logical ops on immediate
727  case AArch64::ANDWri:
728  case AArch64::ANDXri:
729  case AArch64::EORWri:
730  case AArch64::EORXri:
731  case AArch64::ORRWri:
732  case AArch64::ORRXri:
733  return true;
734 
735  // logical ops on register without shift
736  case AArch64::ANDWrr:
737  case AArch64::ANDXrr:
738  case AArch64::BICWrr:
739  case AArch64::BICXrr:
740  case AArch64::EONWrr:
741  case AArch64::EONXrr:
742  case AArch64::EORWrr:
743  case AArch64::EORXrr:
744  case AArch64::ORNWrr:
745  case AArch64::ORNXrr:
746  case AArch64::ORRWrr:
747  case AArch64::ORRXrr:
748  return true;
749 
750  // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
751  // ORRXri, it is as cheap as MOV
752  case AArch64::MOVi32imm:
753  return canBeExpandedToORR(MI, 32);
754  case AArch64::MOVi64imm:
755  return canBeExpandedToORR(MI, 64);
756  }
757 
758  llvm_unreachable("Unknown opcode to check as cheap as a move!");
759 }
760 
762  switch (MI.getOpcode()) {
763  default:
764  return false;
765 
766  case AArch64::ADDWrs:
767  case AArch64::ADDXrs:
768  case AArch64::ADDSWrs:
769  case AArch64::ADDSXrs: {
770  unsigned Imm = MI.getOperand(3).getImm();
771  unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
772  if (ShiftVal == 0)
773  return true;
774  return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
775  }
776 
777  case AArch64::ADDWrx:
778  case AArch64::ADDXrx:
779  case AArch64::ADDXrx64:
780  case AArch64::ADDSWrx:
781  case AArch64::ADDSXrx:
782  case AArch64::ADDSXrx64: {
783  unsigned Imm = MI.getOperand(3).getImm();
784  switch (AArch64_AM::getArithExtendType(Imm)) {
785  default:
786  return false;
787  case AArch64_AM::UXTB:
788  case AArch64_AM::UXTH:
789  case AArch64_AM::UXTW:
790  case AArch64_AM::UXTX:
791  return AArch64_AM::getArithShiftValue(Imm) <= 4;
792  }
793  }
794 
795  case AArch64::SUBWrs:
796  case AArch64::SUBSWrs: {
797  unsigned Imm = MI.getOperand(3).getImm();
798  unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
799  return ShiftVal == 0 ||
800  (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
801  }
802 
803  case AArch64::SUBXrs:
804  case AArch64::SUBSXrs: {
805  unsigned Imm = MI.getOperand(3).getImm();
806  unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
807  return ShiftVal == 0 ||
808  (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
809  }
810 
811  case AArch64::SUBWrx:
812  case AArch64::SUBXrx:
813  case AArch64::SUBXrx64:
814  case AArch64::SUBSWrx:
815  case AArch64::SUBSXrx:
816  case AArch64::SUBSXrx64: {
817  unsigned Imm = MI.getOperand(3).getImm();
818  switch (AArch64_AM::getArithExtendType(Imm)) {
819  default:
820  return false;
821  case AArch64_AM::UXTB:
822  case AArch64_AM::UXTH:
823  case AArch64_AM::UXTW:
824  case AArch64_AM::UXTX:
825  return AArch64_AM::getArithShiftValue(Imm) == 0;
826  }
827  }
828 
829  case AArch64::LDRBBroW:
830  case AArch64::LDRBBroX:
831  case AArch64::LDRBroW:
832  case AArch64::LDRBroX:
833  case AArch64::LDRDroW:
834  case AArch64::LDRDroX:
835  case AArch64::LDRHHroW:
836  case AArch64::LDRHHroX:
837  case AArch64::LDRHroW:
838  case AArch64::LDRHroX:
839  case AArch64::LDRQroW:
840  case AArch64::LDRQroX:
841  case AArch64::LDRSBWroW:
842  case AArch64::LDRSBWroX:
843  case AArch64::LDRSBXroW:
844  case AArch64::LDRSBXroX:
845  case AArch64::LDRSHWroW:
846  case AArch64::LDRSHWroX:
847  case AArch64::LDRSHXroW:
848  case AArch64::LDRSHXroX:
849  case AArch64::LDRSWroW:
850  case AArch64::LDRSWroX:
851  case AArch64::LDRSroW:
852  case AArch64::LDRSroX:
853  case AArch64::LDRWroW:
854  case AArch64::LDRWroX:
855  case AArch64::LDRXroW:
856  case AArch64::LDRXroX:
857  case AArch64::PRFMroW:
858  case AArch64::PRFMroX:
859  case AArch64::STRBBroW:
860  case AArch64::STRBBroX:
861  case AArch64::STRBroW:
862  case AArch64::STRBroX:
863  case AArch64::STRDroW:
864  case AArch64::STRDroX:
865  case AArch64::STRHHroW:
866  case AArch64::STRHHroX:
867  case AArch64::STRHroW:
868  case AArch64::STRHroX:
869  case AArch64::STRQroW:
870  case AArch64::STRQroX:
871  case AArch64::STRSroW:
872  case AArch64::STRSroX:
873  case AArch64::STRWroW:
874  case AArch64::STRWroX:
875  case AArch64::STRXroW:
876  case AArch64::STRXroX: {
877  unsigned IsSigned = MI.getOperand(3).getImm();
878  return !IsSigned;
879  }
880  }
881 }
882 
884  unsigned Opc = MI.getOpcode();
885  switch (Opc) {
886  default:
887  return false;
888  case AArch64::SEH_StackAlloc:
889  case AArch64::SEH_SaveFPLR:
890  case AArch64::SEH_SaveFPLR_X:
891  case AArch64::SEH_SaveReg:
892  case AArch64::SEH_SaveReg_X:
893  case AArch64::SEH_SaveRegP:
894  case AArch64::SEH_SaveRegP_X:
895  case AArch64::SEH_SaveFReg:
896  case AArch64::SEH_SaveFReg_X:
897  case AArch64::SEH_SaveFRegP:
898  case AArch64::SEH_SaveFRegP_X:
899  case AArch64::SEH_SetFP:
900  case AArch64::SEH_AddFP:
901  case AArch64::SEH_Nop:
902  case AArch64::SEH_PrologEnd:
903  case AArch64::SEH_EpilogStart:
904  case AArch64::SEH_EpilogEnd:
905  return true;
906  }
907 }
908 
910  unsigned &SrcReg, unsigned &DstReg,
911  unsigned &SubIdx) const {
912  switch (MI.getOpcode()) {
913  default:
914  return false;
915  case AArch64::SBFMXri: // aka sxtw
916  case AArch64::UBFMXri: // aka uxtw
917  // Check for the 32 -> 64 bit extension case, these instructions can do
918  // much more.
919  if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
920  return false;
921  // This is a signed or unsigned 32 -> 64 bit extension.
922  SrcReg = MI.getOperand(1).getReg();
923  DstReg = MI.getOperand(0).getReg();
924  SubIdx = AArch64::sub_32;
925  return true;
926  }
927 }
928 
930  const MachineInstr &MIa, const MachineInstr &MIb, AliasAnalysis *AA) const {
932  const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
933  int64_t OffsetA = 0, OffsetB = 0;
934  unsigned WidthA = 0, WidthB = 0;
935 
936  assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
937  assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
938 
941  return false;
942 
943  // Retrieve the base, offset from the base and width. Width
944  // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
945  // base are identical, and the offset of a lower memory access +
946  // the width doesn't overlap the offset of a higher memory access,
947  // then the memory accesses are different.
948  if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
949  getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
950  if (BaseOpA->isIdenticalTo(*BaseOpB)) {
951  int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
952  int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
953  int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
954  if (LowOffset + LowWidth <= HighOffset)
955  return true;
956  }
957  }
958  return false;
959 }
960 
962  const MachineBasicBlock *MBB,
963  const MachineFunction &MF) const {
964  if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
965  return true;
966  switch (MI.getOpcode()) {
967  case AArch64::HINT:
968  // CSDB hints are scheduling barriers.
969  if (MI.getOperand(0).getImm() == 0x14)
970  return true;
971  break;
972  case AArch64::DSB:
973  case AArch64::ISB:
974  // DSB and ISB also are scheduling barriers.
975  return true;
976  default:;
977  }
978  return isSEHInstruction(MI);
979 }
980 
981 /// analyzeCompare - For a comparison instruction, return the source registers
982 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
983 /// Return true if the comparison instruction can be analyzed.
984 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
985  unsigned &SrcReg2, int &CmpMask,
986  int &CmpValue) const {
987  // The first operand can be a frame index where we'd normally expect a
988  // register.
989  assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
990  if (!MI.getOperand(1).isReg())
991  return false;
992 
993  switch (MI.getOpcode()) {
994  default:
995  break;
996  case AArch64::SUBSWrr:
997  case AArch64::SUBSWrs:
998  case AArch64::SUBSWrx:
999  case AArch64::SUBSXrr:
1000  case AArch64::SUBSXrs:
1001  case AArch64::SUBSXrx:
1002  case AArch64::ADDSWrr:
1003  case AArch64::ADDSWrs:
1004  case AArch64::ADDSWrx:
1005  case AArch64::ADDSXrr:
1006  case AArch64::ADDSXrs:
1007  case AArch64::ADDSXrx:
1008  // Replace SUBSWrr with SUBWrr if NZCV is not used.
1009  SrcReg = MI.getOperand(1).getReg();
1010  SrcReg2 = MI.getOperand(2).getReg();
1011  CmpMask = ~0;
1012  CmpValue = 0;
1013  return true;
1014  case AArch64::SUBSWri:
1015  case AArch64::ADDSWri:
1016  case AArch64::SUBSXri:
1017  case AArch64::ADDSXri:
1018  SrcReg = MI.getOperand(1).getReg();
1019  SrcReg2 = 0;
1020  CmpMask = ~0;
1021  // FIXME: In order to convert CmpValue to 0 or 1
1022  CmpValue = MI.getOperand(2).getImm() != 0;
1023  return true;
1024  case AArch64::ANDSWri:
1025  case AArch64::ANDSXri:
1026  // ANDS does not use the same encoding scheme as the others xxxS
1027  // instructions.
1028  SrcReg = MI.getOperand(1).getReg();
1029  SrcReg2 = 0;
1030  CmpMask = ~0;
1031  // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
1032  // while the type of CmpValue is int. When converting uint64_t to int,
1033  // the high 32 bits of uint64_t will be lost.
1034  // In fact it causes a bug in spec2006-483.xalancbmk
1035  // CmpValue is only used to compare with zero in OptimizeCompareInstr
1037  MI.getOperand(2).getImm(),
1038  MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
1039  return true;
1040  }
1041 
1042  return false;
1043 }
1044 
1046  MachineBasicBlock *MBB = Instr.getParent();
1047  assert(MBB && "Can't get MachineBasicBlock here");
1048  MachineFunction *MF = MBB->getParent();
1049  assert(MF && "Can't get MachineFunction here");
1050  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1053 
1054  for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1055  ++OpIdx) {
1056  MachineOperand &MO = Instr.getOperand(OpIdx);
1057  const TargetRegisterClass *OpRegCstraints =
1058  Instr.getRegClassConstraint(OpIdx, TII, TRI);
1059 
1060  // If there's no constraint, there's nothing to do.
1061  if (!OpRegCstraints)
1062  continue;
1063  // If the operand is a frame index, there's nothing to do here.
1064  // A frame index operand will resolve correctly during PEI.
1065  if (MO.isFI())
1066  continue;
1067 
1068  assert(MO.isReg() &&
1069  "Operand has register constraints without being a register!");
1070 
1071  unsigned Reg = MO.getReg();
1073  if (!OpRegCstraints->contains(Reg))
1074  return false;
1075  } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1076  !MRI->constrainRegClass(Reg, OpRegCstraints))
1077  return false;
1078  }
1079 
1080  return true;
1081 }
1082 
1083 /// Return the opcode that does not set flags when possible - otherwise
1084 /// return the original opcode. The caller is responsible to do the actual
1085 /// substitution and legality checking.
1086 static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
1087  // Don't convert all compare instructions, because for some the zero register
1088  // encoding becomes the sp register.
1089  bool MIDefinesZeroReg = false;
1090  if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
1091  MIDefinesZeroReg = true;
1092 
1093  switch (MI.getOpcode()) {
1094  default:
1095  return MI.getOpcode();
1096  case AArch64::ADDSWrr:
1097  return AArch64::ADDWrr;
1098  case AArch64::ADDSWri:
1099  return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1100  case AArch64::ADDSWrs:
1101  return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1102  case AArch64::ADDSWrx:
1103  return AArch64::ADDWrx;
1104  case AArch64::ADDSXrr:
1105  return AArch64::ADDXrr;
1106  case AArch64::ADDSXri:
1107  return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1108  case AArch64::ADDSXrs:
1109  return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1110  case AArch64::ADDSXrx:
1111  return AArch64::ADDXrx;
1112  case AArch64::SUBSWrr:
1113  return AArch64::SUBWrr;
1114  case AArch64::SUBSWri:
1115  return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1116  case AArch64::SUBSWrs:
1117  return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1118  case AArch64::SUBSWrx:
1119  return AArch64::SUBWrx;
1120  case AArch64::SUBSXrr:
1121  return AArch64::SUBXrr;
1122  case AArch64::SUBSXri:
1123  return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1124  case AArch64::SUBSXrs:
1125  return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1126  case AArch64::SUBSXrx:
1127  return AArch64::SUBXrx;
1128  }
1129 }
1130 
1131 enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1132 
1133 /// True when condition flags are accessed (either by writing or reading)
1134 /// on the instruction trace starting at From and ending at To.
1135 ///
1136 /// Note: If From and To are from different blocks it's assumed CC are accessed
1137 /// on the path.
1140  const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1141  // Early exit if To is at the beginning of the BB.
1142  if (To == To->getParent()->begin())
1143  return true;
1144 
1145  // Check whether the instructions are in the same basic block
1146  // If not, assume the condition flags might get modified somewhere.
1147  if (To->getParent() != From->getParent())
1148  return true;
1149 
1150  // From must be above To.
1151  assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
1152  [From](MachineInstr &MI) {
1153  return MI.getIterator() == From;
1154  }) != To->getParent()->rend());
1155 
1156  // We iterate backward starting \p To until we hit \p From.
1157  for (--To; To != From; --To) {
1158  const MachineInstr &Instr = *To;
1159 
1160  if (((AccessToCheck & AK_Write) &&
1161  Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1162  ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1163  return true;
1164  }
1165  return false;
1166 }
1167 
1168 /// Try to optimize a compare instruction. A compare instruction is an
1169 /// instruction which produces AArch64::NZCV. It can be truly compare
1170 /// instruction
1171 /// when there are no uses of its destination register.
1172 ///
1173 /// The following steps are tried in order:
1174 /// 1. Convert CmpInstr into an unconditional version.
1175 /// 2. Remove CmpInstr if above there is an instruction producing a needed
1176 /// condition code or an instruction which can be converted into such an
1177 /// instruction.
1178 /// Only comparison with zero is supported.
1180  MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
1181  int CmpValue, const MachineRegisterInfo *MRI) const {
1182  assert(CmpInstr.getParent());
1183  assert(MRI);
1184 
1185  // Replace SUBSWrr with SUBWrr if NZCV is not used.
1186  int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
1187  if (DeadNZCVIdx != -1) {
1188  if (CmpInstr.definesRegister(AArch64::WZR) ||
1189  CmpInstr.definesRegister(AArch64::XZR)) {
1190  CmpInstr.eraseFromParent();
1191  return true;
1192  }
1193  unsigned Opc = CmpInstr.getOpcode();
1194  unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1195  if (NewOpc == Opc)
1196  return false;
1197  const MCInstrDesc &MCID = get(NewOpc);
1198  CmpInstr.setDesc(MCID);
1199  CmpInstr.RemoveOperand(DeadNZCVIdx);
1200  bool succeeded = UpdateOperandRegClass(CmpInstr);
1201  (void)succeeded;
1202  assert(succeeded && "Some operands reg class are incompatible!");
1203  return true;
1204  }
1205 
1206  // Continue only if we have a "ri" where immediate is zero.
1207  // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1208  // function.
1209  assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
1210  if (CmpValue != 0 || SrcReg2 != 0)
1211  return false;
1212 
1213  // CmpInstr is a Compare instruction if destination register is not used.
1214  if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1215  return false;
1216 
1217  return substituteCmpToZero(CmpInstr, SrcReg, MRI);
1218 }
1219 
1220 /// Get opcode of S version of Instr.
1221 /// If Instr is S version its opcode is returned.
1222 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1223 /// or we are not interested in it.
1224 static unsigned sForm(MachineInstr &Instr) {
1225  switch (Instr.getOpcode()) {
1226  default:
1227  return AArch64::INSTRUCTION_LIST_END;
1228 
1229  case AArch64::ADDSWrr:
1230  case AArch64::ADDSWri:
1231  case AArch64::ADDSXrr:
1232  case AArch64::ADDSXri:
1233  case AArch64::SUBSWrr:
1234  case AArch64::SUBSWri:
1235  case AArch64::SUBSXrr:
1236  case AArch64::SUBSXri:
1237  return Instr.getOpcode();
1238 
1239  case AArch64::ADDWrr:
1240  return AArch64::ADDSWrr;
1241  case AArch64::ADDWri:
1242  return AArch64::ADDSWri;
1243  case AArch64::ADDXrr:
1244  return AArch64::ADDSXrr;
1245  case AArch64::ADDXri:
1246  return AArch64::ADDSXri;
1247  case AArch64::ADCWr:
1248  return AArch64::ADCSWr;
1249  case AArch64::ADCXr:
1250  return AArch64::ADCSXr;
1251  case AArch64::SUBWrr:
1252  return AArch64::SUBSWrr;
1253  case AArch64::SUBWri:
1254  return AArch64::SUBSWri;
1255  case AArch64::SUBXrr:
1256  return AArch64::SUBSXrr;
1257  case AArch64::SUBXri:
1258  return AArch64::SUBSXri;
1259  case AArch64::SBCWr:
1260  return AArch64::SBCSWr;
1261  case AArch64::SBCXr:
1262  return AArch64::SBCSXr;
1263  case AArch64::ANDWri:
1264  return AArch64::ANDSWri;
1265  case AArch64::ANDXri:
1266  return AArch64::ANDSXri;
1267  }
1268 }
1269 
1270 /// Check if AArch64::NZCV should be alive in successors of MBB.
1272  for (auto *BB : MBB->successors())
1273  if (BB->isLiveIn(AArch64::NZCV))
1274  return true;
1275  return false;
1276 }
1277 
1278 namespace {
1279 
1280 struct UsedNZCV {
1281  bool N = false;
1282  bool Z = false;
1283  bool C = false;
1284  bool V = false;
1285 
1286  UsedNZCV() = default;
1287 
1288  UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
1289  this->N |= UsedFlags.N;
1290  this->Z |= UsedFlags.Z;
1291  this->C |= UsedFlags.C;
1292  this->V |= UsedFlags.V;
1293  return *this;
1294  }
1295 };
1296 
1297 } // end anonymous namespace
1298 
1299 /// Find a condition code used by the instruction.
1300 /// Returns AArch64CC::Invalid if either the instruction does not use condition
1301 /// codes or we don't optimize CmpInstr in the presence of such instructions.
1303  switch (Instr.getOpcode()) {
1304  default:
1305  return AArch64CC::Invalid;
1306 
1307  case AArch64::Bcc: {
1308  int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1309  assert(Idx >= 2);
1310  return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1311  }
1312 
1313  case AArch64::CSINVWr:
1314  case AArch64::CSINVXr:
1315  case AArch64::CSINCWr:
1316  case AArch64::CSINCXr:
1317  case AArch64::CSELWr:
1318  case AArch64::CSELXr:
1319  case AArch64::CSNEGWr:
1320  case AArch64::CSNEGXr:
1321  case AArch64::FCSELSrrr:
1322  case AArch64::FCSELDrrr: {
1323  int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1324  assert(Idx >= 1);
1325  return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1326  }
1327  }
1328 }
1329 
1330 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1331  assert(CC != AArch64CC::Invalid);
1332  UsedNZCV UsedFlags;
1333  switch (CC) {
1334  default:
1335  break;
1336 
1337  case AArch64CC::EQ: // Z set
1338  case AArch64CC::NE: // Z clear
1339  UsedFlags.Z = true;
1340  break;
1341 
1342  case AArch64CC::HI: // Z clear and C set
1343  case AArch64CC::LS: // Z set or C clear
1344  UsedFlags.Z = true;
1346  case AArch64CC::HS: // C set
1347  case AArch64CC::LO: // C clear
1348  UsedFlags.C = true;
1349  break;
1350 
1351  case AArch64CC::MI: // N set
1352  case AArch64CC::PL: // N clear
1353  UsedFlags.N = true;
1354  break;
1355 
1356  case AArch64CC::VS: // V set
1357  case AArch64CC::VC: // V clear
1358  UsedFlags.V = true;
1359  break;
1360 
1361  case AArch64CC::GT: // Z clear, N and V the same
1362  case AArch64CC::LE: // Z set, N and V differ
1363  UsedFlags.Z = true;
1365  case AArch64CC::GE: // N and V the same
1366  case AArch64CC::LT: // N and V differ
1367  UsedFlags.N = true;
1368  UsedFlags.V = true;
1369  break;
1370  }
1371  return UsedFlags;
1372 }
1373 
1374 static bool isADDSRegImm(unsigned Opcode) {
1375  return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1376 }
1377 
1378 static bool isSUBSRegImm(unsigned Opcode) {
1379  return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1380 }
1381 
1382 /// Check if CmpInstr can be substituted by MI.
1383 ///
1384 /// CmpInstr can be substituted:
1385 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1386 /// - and, MI and CmpInstr are from the same MachineBB
1387 /// - and, condition flags are not alive in successors of the CmpInstr parent
1388 /// - and, if MI opcode is the S form there must be no defs of flags between
1389 /// MI and CmpInstr
1390 /// or if MI opcode is not the S form there must be neither defs of flags
1391 /// nor uses of flags between MI and CmpInstr.
1392 /// - and C/V flags are not used after CmpInstr
1394  const TargetRegisterInfo *TRI) {
1395  assert(MI);
1396  assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1397  assert(CmpInstr);
1398 
1399  const unsigned CmpOpcode = CmpInstr->getOpcode();
1400  if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1401  return false;
1402 
1403  if (MI->getParent() != CmpInstr->getParent())
1404  return false;
1405 
1406  if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1407  return false;
1408 
1409  AccessKind AccessToCheck = AK_Write;
1410  if (sForm(*MI) != MI->getOpcode())
1411  AccessToCheck = AK_All;
1412  if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1413  return false;
1414 
1415  UsedNZCV NZCVUsedAfterCmp;
1416  for (auto I = std::next(CmpInstr->getIterator()),
1417  E = CmpInstr->getParent()->instr_end();
1418  I != E; ++I) {
1419  const MachineInstr &Instr = *I;
1420  if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1422  if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1423  return false;
1424  NZCVUsedAfterCmp |= getUsedNZCV(CC);
1425  }
1426 
1427  if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1428  break;
1429  }
1430 
1431  return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1432 }
1433 
1434 /// Substitute an instruction comparing to zero with another instruction
1435 /// which produces needed condition flags.
1436 ///
1437 /// Return true on success.
1438 bool AArch64InstrInfo::substituteCmpToZero(
1439  MachineInstr &CmpInstr, unsigned SrcReg,
1440  const MachineRegisterInfo *MRI) const {
1441  assert(MRI);
1442  // Get the unique definition of SrcReg.
1443  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1444  if (!MI)
1445  return false;
1446 
1448 
1449  unsigned NewOpc = sForm(*MI);
1450  if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1451  return false;
1452 
1453  if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
1454  return false;
1455 
1456  // Update the instruction to set NZCV.
1457  MI->setDesc(get(NewOpc));
1458  CmpInstr.eraseFromParent();
1459  bool succeeded = UpdateOperandRegClass(*MI);
1460  (void)succeeded;
1461  assert(succeeded && "Some operands reg class are incompatible!");
1462  MI->addRegisterDefined(AArch64::NZCV, TRI);
1463  return true;
1464 }
1465 
1467  if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1468  MI.getOpcode() != AArch64::CATCHRET)
1469  return false;
1470 
1471  MachineBasicBlock &MBB = *MI.getParent();
1472  DebugLoc DL = MI.getDebugLoc();
1473 
1474  if (MI.getOpcode() == AArch64::CATCHRET) {
1475  // Skip to the first instruction before the epilog.
1476  const TargetInstrInfo *TII =
1477  MBB.getParent()->getSubtarget().getInstrInfo();
1478  MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
1479  auto MBBI = MachineBasicBlock::iterator(MI);
1480  MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
1481  while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
1482  FirstEpilogSEH != MBB.begin())
1483  FirstEpilogSEH = std::prev(FirstEpilogSEH);
1484  if (FirstEpilogSEH != MBB.begin())
1485  FirstEpilogSEH = std::next(FirstEpilogSEH);
1486  BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
1487  .addReg(AArch64::X0, RegState::Define)
1488  .addMBB(TargetMBB);
1489  BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
1490  .addReg(AArch64::X0, RegState::Define)
1491  .addReg(AArch64::X0)
1492  .addMBB(TargetMBB)
1493  .addImm(0);
1494  return true;
1495  }
1496 
1497  unsigned Reg = MI.getOperand(0).getReg();
1498  const GlobalValue *GV =
1499  cast<GlobalValue>((*MI.memoperands_begin())->getValue());
1500  const TargetMachine &TM = MBB.getParent()->getTarget();
1501  unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1502  const unsigned char MO_NC = AArch64II::MO_NC;
1503 
1504  if ((OpFlags & AArch64II::MO_GOT) != 0) {
1505  BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1506  .addGlobalAddress(GV, 0, OpFlags);
1507  BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1508  .addReg(Reg, RegState::Kill)
1509  .addImm(0)
1511  } else if (TM.getCodeModel() == CodeModel::Large) {
1512  BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1513  .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
1514  .addImm(0);
1515  BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1516  .addReg(Reg, RegState::Kill)
1517  .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
1518  .addImm(16);
1519  BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1520  .addReg(Reg, RegState::Kill)
1521  .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
1522  .addImm(32);
1523  BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1524  .addReg(Reg, RegState::Kill)
1526  .addImm(48);
1527  BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1528  .addReg(Reg, RegState::Kill)
1529  .addImm(0)
1531  } else if (TM.getCodeModel() == CodeModel::Tiny) {
1532  BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
1533  .addGlobalAddress(GV, 0, OpFlags);
1534  } else {
1535  BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1536  .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1537  unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1538  BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1539  .addReg(Reg, RegState::Kill)
1540  .addGlobalAddress(GV, 0, LoFlags)
1542  }
1543 
1544  MBB.erase(MI);
1545 
1546  return true;
1547 }
1548 
1549 // Return true if this instruction simply sets its single destination register
1550 // to zero. This is equivalent to a register rename of the zero-register.
1552  switch (MI.getOpcode()) {
1553  default:
1554  break;
1555  case AArch64::MOVZWi:
1556  case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
1557  if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1558  assert(MI.getDesc().getNumOperands() == 3 &&
1559  MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1560  return true;
1561  }
1562  break;
1563  case AArch64::ANDWri: // and Rd, Rzr, #imm
1564  return MI.getOperand(1).getReg() == AArch64::WZR;
1565  case AArch64::ANDXri:
1566  return MI.getOperand(1).getReg() == AArch64::XZR;
1567  case TargetOpcode::COPY:
1568  return MI.getOperand(1).getReg() == AArch64::WZR;
1569  }
1570  return false;
1571 }
1572 
1573 // Return true if this instruction simply renames a general register without
1574 // modifying bits.
1576  switch (MI.getOpcode()) {
1577  default:
1578  break;
1579  case TargetOpcode::COPY: {
1580  // GPR32 copies will by lowered to ORRXrs
1581  unsigned DstReg = MI.getOperand(0).getReg();
1582  return (AArch64::GPR32RegClass.contains(DstReg) ||
1583  AArch64::GPR64RegClass.contains(DstReg));
1584  }
1585  case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
1586  if (MI.getOperand(1).getReg() == AArch64::XZR) {
1587  assert(MI.getDesc().getNumOperands() == 4 &&
1588  MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1589  return true;
1590  }
1591  break;
1592  case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
1593  if (MI.getOperand(2).getImm() == 0) {
1594  assert(MI.getDesc().getNumOperands() == 4 &&
1595  MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1596  return true;
1597  }
1598  break;
1599  }
1600  return false;
1601 }
1602 
1603 // Return true if this instruction simply renames a general register without
1604 // modifying bits.
1606  switch (MI.getOpcode()) {
1607  default:
1608  break;
1609  case TargetOpcode::COPY: {
1610  // FPR64 copies will by lowered to ORR.16b
1611  unsigned DstReg = MI.getOperand(0).getReg();
1612  return (AArch64::FPR64RegClass.contains(DstReg) ||
1613  AArch64::FPR128RegClass.contains(DstReg));
1614  }
1615  case AArch64::ORRv16i8:
1616  if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1617  assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
1618  "invalid ORRv16i8 operands");
1619  return true;
1620  }
1621  break;
1622  }
1623  return false;
1624 }
1625 
1627  int &FrameIndex) const {
1628  switch (MI.getOpcode()) {
1629  default:
1630  break;
1631  case AArch64::LDRWui:
1632  case AArch64::LDRXui:
1633  case AArch64::LDRBui:
1634  case AArch64::LDRHui:
1635  case AArch64::LDRSui:
1636  case AArch64::LDRDui:
1637  case AArch64::LDRQui:
1638  if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1639  MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1640  FrameIndex = MI.getOperand(1).getIndex();
1641  return MI.getOperand(0).getReg();
1642  }
1643  break;
1644  }
1645 
1646  return 0;
1647 }
1648 
1650  int &FrameIndex) const {
1651  switch (MI.getOpcode()) {
1652  default:
1653  break;
1654  case AArch64::STRWui:
1655  case AArch64::STRXui:
1656  case AArch64::STRBui:
1657  case AArch64::STRHui:
1658  case AArch64::STRSui:
1659  case AArch64::STRDui:
1660  case AArch64::STRQui:
1661  if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1662  MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1663  FrameIndex = MI.getOperand(1).getIndex();
1664  return MI.getOperand(0).getReg();
1665  }
1666  break;
1667  }
1668  return 0;
1669 }
1670 
1671 /// Check all MachineMemOperands for a hint to suppress pairing.
1673  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1674  return MMO->getFlags() & MOSuppressPair;
1675  });
1676 }
1677 
1678 /// Set a flag on the first MachineMemOperand to suppress pairing.
1680  if (MI.memoperands_empty())
1681  return;
1682  (*MI.memoperands_begin())->setFlags(MOSuppressPair);
1683 }
1684 
1685 /// Check all MachineMemOperands for a hint that the load/store is strided.
1687  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1688  return MMO->getFlags() & MOStridedAccess;
1689  });
1690 }
1691 
1693  switch (Opc) {
1694  default:
1695  return false;
1696  case AArch64::STURSi:
1697  case AArch64::STURDi:
1698  case AArch64::STURQi:
1699  case AArch64::STURBBi:
1700  case AArch64::STURHHi:
1701  case AArch64::STURWi:
1702  case AArch64::STURXi:
1703  case AArch64::LDURSi:
1704  case AArch64::LDURDi:
1705  case AArch64::LDURQi:
1706  case AArch64::LDURWi:
1707  case AArch64::LDURXi:
1708  case AArch64::LDURSWi:
1709  case AArch64::LDURHHi:
1710  case AArch64::LDURBBi:
1711  case AArch64::LDURSBWi:
1712  case AArch64::LDURSHWi:
1713  return true;
1714  }
1715 }
1716 
1718  switch (Opc) {
1719  default: return {};
1720  case AArch64::PRFMui: return AArch64::PRFUMi;
1721  case AArch64::LDRXui: return AArch64::LDURXi;
1722  case AArch64::LDRWui: return AArch64::LDURWi;
1723  case AArch64::LDRBui: return AArch64::LDURBi;
1724  case AArch64::LDRHui: return AArch64::LDURHi;
1725  case AArch64::LDRSui: return AArch64::LDURSi;
1726  case AArch64::LDRDui: return AArch64::LDURDi;
1727  case AArch64::LDRQui: return AArch64::LDURQi;
1728  case AArch64::LDRBBui: return AArch64::LDURBBi;
1729  case AArch64::LDRHHui: return AArch64::LDURHHi;
1730  case AArch64::LDRSBXui: return AArch64::LDURSBXi;
1731  case AArch64::LDRSBWui: return AArch64::LDURSBWi;
1732  case AArch64::LDRSHXui: return AArch64::LDURSHXi;
1733  case AArch64::LDRSHWui: return AArch64::LDURSHWi;
1734  case AArch64::LDRSWui: return AArch64::LDURSWi;
1735  case AArch64::STRXui: return AArch64::STURXi;
1736  case AArch64::STRWui: return AArch64::STURWi;
1737  case AArch64::STRBui: return AArch64::STURBi;
1738  case AArch64::STRHui: return AArch64::STURHi;
1739  case AArch64::STRSui: return AArch64::STURSi;
1740  case AArch64::STRDui: return AArch64::STURDi;
1741  case AArch64::STRQui: return AArch64::STURQi;
1742  case AArch64::STRBBui: return AArch64::STURBBi;
1743  case AArch64::STRHHui: return AArch64::STURHHi;
1744  }
1745 }
1746 
1748  switch (Opc) {
1749  default:
1750  return 2;
1751  case AArch64::LDPXi:
1752  case AArch64::LDPDi:
1753  case AArch64::STPXi:
1754  case AArch64::STPDi:
1755  case AArch64::LDNPXi:
1756  case AArch64::LDNPDi:
1757  case AArch64::STNPXi:
1758  case AArch64::STNPDi:
1759  case AArch64::LDPQi:
1760  case AArch64::STPQi:
1761  case AArch64::LDNPQi:
1762  case AArch64::STNPQi:
1763  case AArch64::LDPWi:
1764  case AArch64::LDPSi:
1765  case AArch64::STPWi:
1766  case AArch64::STPSi:
1767  case AArch64::LDNPWi:
1768  case AArch64::LDNPSi:
1769  case AArch64::STNPWi:
1770  case AArch64::STNPSi:
1771  case AArch64::LDG:
1772  return 3;
1773  case AArch64::ADDG:
1774  case AArch64::STGOffset:
1775  return 2;
1776  }
1777 }
1778 
1780  switch (MI.getOpcode()) {
1781  default:
1782  return false;
1783  // Scaled instructions.
1784  case AArch64::STRSui:
1785  case AArch64::STRDui:
1786  case AArch64::STRQui:
1787  case AArch64::STRXui:
1788  case AArch64::STRWui:
1789  case AArch64::LDRSui:
1790  case AArch64::LDRDui:
1791  case AArch64::LDRQui:
1792  case AArch64::LDRXui:
1793  case AArch64::LDRWui:
1794  case AArch64::LDRSWui:
1795  // Unscaled instructions.
1796  case AArch64::STURSi:
1797  case AArch64::STURDi:
1798  case AArch64::STURQi:
1799  case AArch64::STURWi:
1800  case AArch64::STURXi:
1801  case AArch64::LDURSi:
1802  case AArch64::LDURDi:
1803  case AArch64::LDURQi:
1804  case AArch64::LDURWi:
1805  case AArch64::LDURXi:
1806  case AArch64::LDURSWi:
1807  return true;
1808  }
1809 }
1810 
1812  bool &Is64Bit) {
1813  switch (Opc) {
1814  default:
1815  llvm_unreachable("Opcode has no flag setting equivalent!");
1816  // 32-bit cases:
1817  case AArch64::ADDWri:
1818  Is64Bit = false;
1819  return AArch64::ADDSWri;
1820  case AArch64::ADDWrr:
1821  Is64Bit = false;
1822  return AArch64::ADDSWrr;
1823  case AArch64::ADDWrs:
1824  Is64Bit = false;
1825  return AArch64::ADDSWrs;
1826  case AArch64::ADDWrx:
1827  Is64Bit = false;
1828  return AArch64::ADDSWrx;
1829  case AArch64::ANDWri:
1830  Is64Bit = false;
1831  return AArch64::ANDSWri;
1832  case AArch64::ANDWrr:
1833  Is64Bit = false;
1834  return AArch64::ANDSWrr;
1835  case AArch64::ANDWrs:
1836  Is64Bit = false;
1837  return AArch64::ANDSWrs;
1838  case AArch64::BICWrr:
1839  Is64Bit = false;
1840  return AArch64::BICSWrr;
1841  case AArch64::BICWrs:
1842  Is64Bit = false;
1843  return AArch64::BICSWrs;
1844  case AArch64::SUBWri:
1845  Is64Bit = false;
1846  return AArch64::SUBSWri;
1847  case AArch64::SUBWrr:
1848  Is64Bit = false;
1849  return AArch64::SUBSWrr;
1850  case AArch64::SUBWrs:
1851  Is64Bit = false;
1852  return AArch64::SUBSWrs;
1853  case AArch64::SUBWrx:
1854  Is64Bit = false;
1855  return AArch64::SUBSWrx;
1856  // 64-bit cases:
1857  case AArch64::ADDXri:
1858  Is64Bit = true;
1859  return AArch64::ADDSXri;
1860  case AArch64::ADDXrr:
1861  Is64Bit = true;
1862  return AArch64::ADDSXrr;
1863  case AArch64::ADDXrs:
1864  Is64Bit = true;
1865  return AArch64::ADDSXrs;
1866  case AArch64::ADDXrx:
1867  Is64Bit = true;
1868  return AArch64::ADDSXrx;
1869  case AArch64::ANDXri:
1870  Is64Bit = true;
1871  return AArch64::ANDSXri;
1872  case AArch64::ANDXrr:
1873  Is64Bit = true;
1874  return AArch64::ANDSXrr;
1875  case AArch64::ANDXrs:
1876  Is64Bit = true;
1877  return AArch64::ANDSXrs;
1878  case AArch64::BICXrr:
1879  Is64Bit = true;
1880  return AArch64::BICSXrr;
1881  case AArch64::BICXrs:
1882  Is64Bit = true;
1883  return AArch64::BICSXrs;
1884  case AArch64::SUBXri:
1885  Is64Bit = true;
1886  return AArch64::SUBSXri;
1887  case AArch64::SUBXrr:
1888  Is64Bit = true;
1889  return AArch64::SUBSXrr;
1890  case AArch64::SUBXrs:
1891  Is64Bit = true;
1892  return AArch64::SUBSXrs;
1893  case AArch64::SUBXrx:
1894  Is64Bit = true;
1895  return AArch64::SUBSXrx;
1896  }
1897 }
1898 
1899 // Is this a candidate for ld/st merging or pairing? For example, we don't
1900 // touch volatiles or load/stores that have a hint to avoid pair formation.
1902  // If this is a volatile load/store, don't mess with it.
1903  if (MI.hasOrderedMemoryRef())
1904  return false;
1905 
1906  // Make sure this is a reg/fi+imm (as opposed to an address reloc).
1907  assert((MI.getOperand(1).isReg() || MI.getOperand(1).isFI()) &&
1908  "Expected a reg or frame index operand.");
1909  if (!MI.getOperand(2).isImm())
1910  return false;
1911 
1912  // Can't merge/pair if the instruction modifies the base register.
1913  // e.g., ldr x0, [x0]
1914  // This case will never occur with an FI base.
1915  if (MI.getOperand(1).isReg()) {
1916  unsigned BaseReg = MI.getOperand(1).getReg();
1918  if (MI.modifiesRegister(BaseReg, TRI))
1919  return false;
1920  }
1921 
1922  // Check if this load/store has a hint to avoid pair formation.
1923  // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
1924  if (isLdStPairSuppressed(MI))
1925  return false;
1926 
1927  // On some CPUs quad load/store pairs are slower than two single load/stores.
1928  if (Subtarget.isPaired128Slow()) {
1929  switch (MI.getOpcode()) {
1930  default:
1931  break;
1932  case AArch64::LDURQi:
1933  case AArch64::STURQi:
1934  case AArch64::LDRQui:
1935  case AArch64::STRQui:
1936  return false;
1937  }
1938  }
1939 
1940  return true;
1941 }
1942 
1944  const MachineOperand *&BaseOp,
1945  int64_t &Offset,
1946  const TargetRegisterInfo *TRI) const {
1947  unsigned Width;
1948  return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI);
1949 }
1950 
1952  const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
1953  unsigned &Width, const TargetRegisterInfo *TRI) const {
1954  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
1955  // Handle only loads/stores with base register followed by immediate offset.
1956  if (LdSt.getNumExplicitOperands() == 3) {
1957  // Non-paired instruction (e.g., ldr x1, [x0, #8]).
1958  if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
1959  !LdSt.getOperand(2).isImm())
1960  return false;
1961  } else if (LdSt.getNumExplicitOperands() == 4) {
1962  // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
1963  if (!LdSt.getOperand(1).isReg() ||
1964  (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
1965  !LdSt.getOperand(3).isImm())
1966  return false;
1967  } else
1968  return false;
1969 
1970  // Get the scaling factor for the instruction and set the width for the
1971  // instruction.
1972  unsigned Scale = 0;
1973  int64_t Dummy1, Dummy2;
1974 
1975  // If this returns false, then it's an instruction we don't want to handle.
1976  if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
1977  return false;
1978 
1979  // Compute the offset. Offset is calculated as the immediate operand
1980  // multiplied by the scaling factor. Unscaled instructions have scaling factor
1981  // set to 1.
1982  if (LdSt.getNumExplicitOperands() == 3) {
1983  BaseOp = &LdSt.getOperand(1);
1984  Offset = LdSt.getOperand(2).getImm() * Scale;
1985  } else {
1986  assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
1987  BaseOp = &LdSt.getOperand(2);
1988  Offset = LdSt.getOperand(3).getImm() * Scale;
1989  }
1990 
1991  assert((BaseOp->isReg() || BaseOp->isFI()) &&
1992  "getMemOperandWithOffset only supports base "
1993  "operands of type register or frame index.");
1994 
1995  return true;
1996 }
1997 
2000  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
2001  MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
2002  assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
2003  return OfsOp;
2004 }
2005 
2006 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
2007  unsigned &Width, int64_t &MinOffset,
2008  int64_t &MaxOffset) {
2009  switch (Opcode) {
2010  // Not a memory operation or something we want to handle.
2011  default:
2012  Scale = Width = 0;
2013  MinOffset = MaxOffset = 0;
2014  return false;
2015  case AArch64::STRWpost:
2016  case AArch64::LDRWpost:
2017  Width = 32;
2018  Scale = 4;
2019  MinOffset = -256;
2020  MaxOffset = 255;
2021  break;
2022  case AArch64::LDURQi:
2023  case AArch64::STURQi:
2024  Width = 16;
2025  Scale = 1;
2026  MinOffset = -256;
2027  MaxOffset = 255;
2028  break;
2029  case AArch64::PRFUMi:
2030  case AArch64::LDURXi:
2031  case AArch64::LDURDi:
2032  case AArch64::STURXi:
2033  case AArch64::STURDi:
2034  Width = 8;
2035  Scale = 1;
2036  MinOffset = -256;
2037  MaxOffset = 255;
2038  break;
2039  case AArch64::LDURWi:
2040  case AArch64::LDURSi:
2041  case AArch64::LDURSWi:
2042  case AArch64::STURWi:
2043  case AArch64::STURSi:
2044  Width = 4;
2045  Scale = 1;
2046  MinOffset = -256;
2047  MaxOffset = 255;
2048  break;
2049  case AArch64::LDURHi:
2050  case AArch64::LDURHHi:
2051  case AArch64::LDURSHXi:
2052  case AArch64::LDURSHWi:
2053  case AArch64::STURHi:
2054  case AArch64::STURHHi:
2055  Width = 2;
2056  Scale = 1;
2057  MinOffset = -256;
2058  MaxOffset = 255;
2059  break;
2060  case AArch64::LDURBi:
2061  case AArch64::LDURBBi:
2062  case AArch64::LDURSBXi:
2063  case AArch64::LDURSBWi:
2064  case AArch64::STURBi:
2065  case AArch64::STURBBi:
2066  Width = 1;
2067  Scale = 1;
2068  MinOffset = -256;
2069  MaxOffset = 255;
2070  break;
2071  case AArch64::LDPQi:
2072  case AArch64::LDNPQi:
2073  case AArch64::STPQi:
2074  case AArch64::STNPQi:
2075  Scale = 16;
2076  Width = 32;
2077  MinOffset = -64;
2078  MaxOffset = 63;
2079  break;
2080  case AArch64::LDRQui:
2081  case AArch64::STRQui:
2082  Scale = Width = 16;
2083  MinOffset = 0;
2084  MaxOffset = 4095;
2085  break;
2086  case AArch64::LDPXi:
2087  case AArch64::LDPDi:
2088  case AArch64::LDNPXi:
2089  case AArch64::LDNPDi:
2090  case AArch64::STPXi:
2091  case AArch64::STPDi:
2092  case AArch64::STNPXi:
2093  case AArch64::STNPDi:
2094  Scale = 8;
2095  Width = 16;
2096  MinOffset = -64;
2097  MaxOffset = 63;
2098  break;
2099  case AArch64::PRFMui:
2100  case AArch64::LDRXui:
2101  case AArch64::LDRDui:
2102  case AArch64::STRXui:
2103  case AArch64::STRDui:
2104  Scale = Width = 8;
2105  MinOffset = 0;
2106  MaxOffset = 4095;
2107  break;
2108  case AArch64::LDPWi:
2109  case AArch64::LDPSi:
2110  case AArch64::LDNPWi:
2111  case AArch64::LDNPSi:
2112  case AArch64::STPWi:
2113  case AArch64::STPSi:
2114  case AArch64::STNPWi:
2115  case AArch64::STNPSi:
2116  Scale = 4;
2117  Width = 8;
2118  MinOffset = -64;
2119  MaxOffset = 63;
2120  break;
2121  case AArch64::LDRWui:
2122  case AArch64::LDRSui:
2123  case AArch64::LDRSWui:
2124  case AArch64::STRWui:
2125  case AArch64::STRSui:
2126  Scale = Width = 4;
2127  MinOffset = 0;
2128  MaxOffset = 4095;
2129  break;
2130  case AArch64::LDRHui:
2131  case AArch64::LDRHHui:
2132  case AArch64::LDRSHWui:
2133  case AArch64::LDRSHXui:
2134  case AArch64::STRHui:
2135  case AArch64::STRHHui:
2136  Scale = Width = 2;
2137  MinOffset = 0;
2138  MaxOffset = 4095;
2139  break;
2140  case AArch64::LDRBui:
2141  case AArch64::LDRBBui:
2142  case AArch64::LDRSBWui:
2143  case AArch64::LDRSBXui:
2144  case AArch64::STRBui:
2145  case AArch64::STRBBui:
2146  Scale = Width = 1;
2147  MinOffset = 0;
2148  MaxOffset = 4095;
2149  break;
2150  case AArch64::ADDG:
2151  Scale = 16;
2152  Width = 0;
2153  MinOffset = 0;
2154  MaxOffset = 63;
2155  break;
2156  case AArch64::LDG:
2157  case AArch64::STGOffset:
2158  Scale = Width = 16;
2159  MinOffset = -256;
2160  MaxOffset = 255;
2161  break;
2162  }
2163 
2164  return true;
2165 }
2166 
2167 static unsigned getOffsetStride(unsigned Opc) {
2168  switch (Opc) {
2169  default:
2170  return 0;
2171  case AArch64::LDURQi:
2172  case AArch64::STURQi:
2173  return 16;
2174  case AArch64::LDURXi:
2175  case AArch64::LDURDi:
2176  case AArch64::STURXi:
2177  case AArch64::STURDi:
2178  return 8;
2179  case AArch64::LDURWi:
2180  case AArch64::LDURSi:
2181  case AArch64::LDURSWi:
2182  case AArch64::STURWi:
2183  case AArch64::STURSi:
2184  return 4;
2185  }
2186 }
2187 
2188 // Scale the unscaled offsets. Returns false if the unscaled offset can't be
2189 // scaled.
2190 static bool scaleOffset(unsigned Opc, int64_t &Offset) {
2191  unsigned OffsetStride = getOffsetStride(Opc);
2192  if (OffsetStride == 0)
2193  return false;
2194  // If the byte-offset isn't a multiple of the stride, we can't scale this
2195  // offset.
2196  if (Offset % OffsetStride != 0)
2197  return false;
2198 
2199  // Convert the byte-offset used by unscaled into an "element" offset used
2200  // by the scaled pair load/store instructions.
2201  Offset /= OffsetStride;
2202  return true;
2203 }
2204 
2205 // Unscale the scaled offsets. Returns false if the scaled offset can't be
2206 // unscaled.
2207 static bool unscaleOffset(unsigned Opc, int64_t &Offset) {
2208  unsigned OffsetStride = getOffsetStride(Opc);
2209  if (OffsetStride == 0)
2210  return false;
2211 
2212  // Convert the "element" offset used by scaled pair load/store instructions
2213  // into the byte-offset used by unscaled.
2214  Offset *= OffsetStride;
2215  return true;
2216 }
2217 
2218 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
2219  if (FirstOpc == SecondOpc)
2220  return true;
2221  // We can also pair sign-ext and zero-ext instructions.
2222  switch (FirstOpc) {
2223  default:
2224  return false;
2225  case AArch64::LDRWui:
2226  case AArch64::LDURWi:
2227  return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
2228  case AArch64::LDRSWui:
2229  case AArch64::LDURSWi:
2230  return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
2231  }
2232  // These instructions can't be paired based on their opcodes.
2233  return false;
2234 }
2235 
2236 static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
2237  int64_t Offset1, unsigned Opcode1, int FI2,
2238  int64_t Offset2, unsigned Opcode2) {
2239  // Accesses through fixed stack object frame indices may access a different
2240  // fixed stack slot. Check that the object offsets + offsets match.
2241  if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
2242  int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
2243  int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
2244  assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
2245  // Get the byte-offset from the object offset.
2246  if (!unscaleOffset(Opcode1, Offset1) || !unscaleOffset(Opcode2, Offset2))
2247  return false;
2248  ObjectOffset1 += Offset1;
2249  ObjectOffset2 += Offset2;
2250  // Get the "element" index in the object.
2251  if (!scaleOffset(Opcode1, ObjectOffset1) ||
2252  !scaleOffset(Opcode2, ObjectOffset2))
2253  return false;
2254  return ObjectOffset1 + 1 == ObjectOffset2;
2255  }
2256 
2257  return FI1 == FI2;
2258 }
2259 
2260 /// Detect opportunities for ldp/stp formation.
2261 ///
2262 /// Only called for LdSt for which getMemOperandWithOffset returns true.
2264  const MachineOperand &BaseOp2,
2265  unsigned NumLoads) const {
2266  const MachineInstr &FirstLdSt = *BaseOp1.getParent();
2267  const MachineInstr &SecondLdSt = *BaseOp2.getParent();
2268  if (BaseOp1.getType() != BaseOp2.getType())
2269  return false;
2270 
2271  assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
2272  "Only base registers and frame indices are supported.");
2273 
2274  // Check for both base regs and base FI.
2275  if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
2276  return false;
2277 
2278  // Only cluster up to a single pair.
2279  if (NumLoads > 1)
2280  return false;
2281 
2282  if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
2283  return false;
2284 
2285  // Can we pair these instructions based on their opcodes?
2286  unsigned FirstOpc = FirstLdSt.getOpcode();
2287  unsigned SecondOpc = SecondLdSt.getOpcode();
2288  if (!canPairLdStOpc(FirstOpc, SecondOpc))
2289  return false;
2290 
2291  // Can't merge volatiles or load/stores that have a hint to avoid pair
2292  // formation, for example.
2293  if (!isCandidateToMergeOrPair(FirstLdSt) ||
2294  !isCandidateToMergeOrPair(SecondLdSt))
2295  return false;
2296 
2297  // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
2298  int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
2299  if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
2300  return false;
2301 
2302  int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
2303  if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
2304  return false;
2305 
2306  // Pairwise instructions have a 7-bit signed offset field.
2307  if (Offset1 > 63 || Offset1 < -64)
2308  return false;
2309 
2310  // The caller should already have ordered First/SecondLdSt by offset.
2311  // Note: except for non-equal frame index bases
2312  if (BaseOp1.isFI()) {
2313  assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 >= Offset2) &&
2314  "Caller should have ordered offsets.");
2315 
2316  const MachineFrameInfo &MFI =
2317  FirstLdSt.getParent()->getParent()->getFrameInfo();
2318  return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
2319  BaseOp2.getIndex(), Offset2, SecondOpc);
2320  }
2321 
2322  assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
2323  "Caller should have ordered offsets.");
2324 
2325  return Offset1 + 1 == Offset2;
2326 }
2327 
2329  unsigned Reg, unsigned SubIdx,
2330  unsigned State,
2331  const TargetRegisterInfo *TRI) {
2332  if (!SubIdx)
2333  return MIB.addReg(Reg, State);
2334 
2336  return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
2337  return MIB.addReg(Reg, State, SubIdx);
2338 }
2339 
2340 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
2341  unsigned NumRegs) {
2342  // We really want the positive remainder mod 32 here, that happens to be
2343  // easily obtainable with a mask.
2344  return ((DestReg - SrcReg) & 0x1f) < NumRegs;
2345 }
2346 
2349  const DebugLoc &DL, unsigned DestReg,
2350  unsigned SrcReg, bool KillSrc,
2351  unsigned Opcode,
2352  ArrayRef<unsigned> Indices) const {
2353  assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
2355  uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2356  uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2357  unsigned NumRegs = Indices.size();
2358 
2359  int SubReg = 0, End = NumRegs, Incr = 1;
2360  if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
2361  SubReg = NumRegs - 1;
2362  End = -1;
2363  Incr = -1;
2364  }
2365 
2366  for (; SubReg != End; SubReg += Incr) {
2367  const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2368  AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2369  AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
2370  AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2371  }
2372 }
2373 
2376  DebugLoc DL, unsigned DestReg,
2377  unsigned SrcReg, bool KillSrc,
2378  unsigned Opcode, unsigned ZeroReg,
2379  llvm::ArrayRef<unsigned> Indices) const {
2381  unsigned NumRegs = Indices.size();
2382 
2383 #ifndef NDEBUG
2384  uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2385  uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2386  assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
2387  "GPR reg sequences should not be able to overlap");
2388 #endif
2389 
2390  for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
2391  const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2392  AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2393  MIB.addReg(ZeroReg);
2394  AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2395  MIB.addImm(0);
2396  }
2397 }
2398 
2401  const DebugLoc &DL, unsigned DestReg,
2402  unsigned SrcReg, bool KillSrc) const {
2403  if (AArch64::GPR32spRegClass.contains(DestReg) &&
2404  (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
2406 
2407  if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
2408  // If either operand is WSP, expand to ADD #0.
2409  if (Subtarget.hasZeroCycleRegMove()) {
2410  // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
2411  unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2412  &AArch64::GPR64spRegClass);
2413  unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2414  &AArch64::GPR64spRegClass);
2415  // This instruction is reading and writing X registers. This may upset
2416  // the register scavenger and machine verifier, so we need to indicate
2417  // that we are reading an undefined value from SrcRegX, but a proper
2418  // value from SrcReg.
2419  BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
2420  .addReg(SrcRegX, RegState::Undef)
2421  .addImm(0)
2423  .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2424  } else {
2425  BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
2426  .addReg(SrcReg, getKillRegState(KillSrc))
2427  .addImm(0)
2429  }
2430  } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
2431  BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
2432  .addImm(0)
2434  } else {
2435  if (Subtarget.hasZeroCycleRegMove()) {
2436  // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
2437  unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2438  &AArch64::GPR64spRegClass);
2439  unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2440  &AArch64::GPR64spRegClass);
2441  // This instruction is reading and writing X registers. This may upset
2442  // the register scavenger and machine verifier, so we need to indicate
2443  // that we are reading an undefined value from SrcRegX, but a proper
2444  // value from SrcReg.
2445  BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
2446  .addReg(AArch64::XZR)
2447  .addReg(SrcRegX, RegState::Undef)
2448  .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2449  } else {
2450  // Otherwise, expand to ORR WZR.
2451  BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
2452  .addReg(AArch64::WZR)
2453  .addReg(SrcReg, getKillRegState(KillSrc));
2454  }
2455  }
2456  return;
2457  }
2458 
2459  if (AArch64::GPR64spRegClass.contains(DestReg) &&
2460  (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
2461  if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
2462  // If either operand is SP, expand to ADD #0.
2463  BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
2464  .addReg(SrcReg, getKillRegState(KillSrc))
2465  .addImm(0)
2467  } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
2468  BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
2469  .addImm(0)
2471  } else {
2472  // Otherwise, expand to ORR XZR.
2473  BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
2474  .addReg(AArch64::XZR)
2475  .addReg(SrcReg, getKillRegState(KillSrc));
2476  }
2477  return;
2478  }
2479 
2480  // Copy a DDDD register quad by copying the individual sub-registers.
2481  if (AArch64::DDDDRegClass.contains(DestReg) &&
2482  AArch64::DDDDRegClass.contains(SrcReg)) {
2483  static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2484  AArch64::dsub2, AArch64::dsub3};
2485  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2486  Indices);
2487  return;
2488  }
2489 
2490  // Copy a DDD register triple by copying the individual sub-registers.
2491  if (AArch64::DDDRegClass.contains(DestReg) &&
2492  AArch64::DDDRegClass.contains(SrcReg)) {
2493  static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2494  AArch64::dsub2};
2495  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2496  Indices);
2497  return;
2498  }
2499 
2500  // Copy a DD register pair by copying the individual sub-registers.
2501  if (AArch64::DDRegClass.contains(DestReg) &&
2502  AArch64::DDRegClass.contains(SrcReg)) {
2503  static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
2504  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2505  Indices);
2506  return;
2507  }
2508 
2509  // Copy a QQQQ register quad by copying the individual sub-registers.
2510  if (AArch64::QQQQRegClass.contains(DestReg) &&
2511  AArch64::QQQQRegClass.contains(SrcReg)) {
2512  static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2513  AArch64::qsub2, AArch64::qsub3};
2514  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2515  Indices);
2516  return;
2517  }
2518 
2519  // Copy a QQQ register triple by copying the individual sub-registers.
2520  if (AArch64::QQQRegClass.contains(DestReg) &&
2521  AArch64::QQQRegClass.contains(SrcReg)) {
2522  static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2523  AArch64::qsub2};
2524  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2525  Indices);
2526  return;
2527  }
2528 
2529  // Copy a QQ register pair by copying the individual sub-registers.
2530  if (AArch64::QQRegClass.contains(DestReg) &&
2531  AArch64::QQRegClass.contains(SrcReg)) {
2532  static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
2533  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2534  Indices);
2535  return;
2536  }
2537 
2538  if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
2539  AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
2540  static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
2541  copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
2542  AArch64::XZR, Indices);
2543  return;
2544  }
2545 
2546  if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
2547  AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
2548  static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
2549  copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
2550  AArch64::WZR, Indices);
2551  return;
2552  }
2553 
2554  if (AArch64::FPR128RegClass.contains(DestReg) &&
2555  AArch64::FPR128RegClass.contains(SrcReg)) {
2556  if (Subtarget.hasNEON()) {
2557  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2558  .addReg(SrcReg)
2559  .addReg(SrcReg, getKillRegState(KillSrc));
2560  } else {
2561  BuildMI(MBB, I, DL, get(AArch64::STRQpre))
2562  .addReg(AArch64::SP, RegState::Define)
2563  .addReg(SrcReg, getKillRegState(KillSrc))
2564  .addReg(AArch64::SP)
2565  .addImm(-16);
2566  BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
2567  .addReg(AArch64::SP, RegState::Define)
2568  .addReg(DestReg, RegState::Define)
2569  .addReg(AArch64::SP)
2570  .addImm(16);
2571  }
2572  return;
2573  }
2574 
2575  if (AArch64::FPR64RegClass.contains(DestReg) &&
2576  AArch64::FPR64RegClass.contains(SrcReg)) {
2577  if (Subtarget.hasNEON()) {
2578  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2579  &AArch64::FPR128RegClass);
2580  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2581  &AArch64::FPR128RegClass);
2582  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2583  .addReg(SrcReg)
2584  .addReg(SrcReg, getKillRegState(KillSrc));
2585  } else {
2586  BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2587  .addReg(SrcReg, getKillRegState(KillSrc));
2588  }
2589  return;
2590  }
2591 
2592  if (AArch64::FPR32RegClass.contains(DestReg) &&
2593  AArch64::FPR32RegClass.contains(SrcReg)) {
2594  if (Subtarget.hasNEON()) {
2595  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2596  &AArch64::FPR128RegClass);
2597  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2598  &AArch64::FPR128RegClass);
2599  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2600  .addReg(SrcReg)
2601  .addReg(SrcReg, getKillRegState(KillSrc));
2602  } else {
2603  BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2604  .addReg(SrcReg, getKillRegState(KillSrc));
2605  }
2606  return;
2607  }
2608 
2609  if (AArch64::FPR16RegClass.contains(DestReg) &&
2610  AArch64::FPR16RegClass.contains(SrcReg)) {
2611  if (Subtarget.hasNEON()) {
2612  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2613  &AArch64::FPR128RegClass);
2614  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2615  &AArch64::FPR128RegClass);
2616  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2617  .addReg(SrcReg)
2618  .addReg(SrcReg, getKillRegState(KillSrc));
2619  } else {
2620  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2621  &AArch64::FPR32RegClass);
2622  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2623  &AArch64::FPR32RegClass);
2624  BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2625  .addReg(SrcReg, getKillRegState(KillSrc));
2626  }
2627  return;
2628  }
2629 
2630  if (AArch64::FPR8RegClass.contains(DestReg) &&
2631  AArch64::FPR8RegClass.contains(SrcReg)) {
2632  if (Subtarget.hasNEON()) {
2633  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2634  &AArch64::FPR128RegClass);
2635  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2636  &AArch64::FPR128RegClass);
2637  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2638  .addReg(SrcReg)
2639  .addReg(SrcReg, getKillRegState(KillSrc));
2640  } else {
2641  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2642  &AArch64::FPR32RegClass);
2643  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2644  &AArch64::FPR32RegClass);
2645  BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2646  .addReg(SrcReg, getKillRegState(KillSrc));
2647  }
2648  return;
2649  }
2650 
2651  // Copies between GPR64 and FPR64.
2652  if (AArch64::FPR64RegClass.contains(DestReg) &&
2653  AArch64::GPR64RegClass.contains(SrcReg)) {
2654  BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2655  .addReg(SrcReg, getKillRegState(KillSrc));
2656  return;
2657  }
2658  if (AArch64::GPR64RegClass.contains(DestReg) &&
2659  AArch64::FPR64RegClass.contains(SrcReg)) {
2660  BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2661  .addReg(SrcReg, getKillRegState(KillSrc));
2662  return;
2663  }
2664  // Copies between GPR32 and FPR32.
2665  if (AArch64::FPR32RegClass.contains(DestReg) &&
2666  AArch64::GPR32RegClass.contains(SrcReg)) {
2667  BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2668  .addReg(SrcReg, getKillRegState(KillSrc));
2669  return;
2670  }
2671  if (AArch64::GPR32RegClass.contains(DestReg) &&
2672  AArch64::FPR32RegClass.contains(SrcReg)) {
2673  BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2674  .addReg(SrcReg, getKillRegState(KillSrc));
2675  return;
2676  }
2677 
2678  if (DestReg == AArch64::NZCV) {
2679  assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2680  BuildMI(MBB, I, DL, get(AArch64::MSR))
2681  .addImm(AArch64SysReg::NZCV)
2682  .addReg(SrcReg, getKillRegState(KillSrc))
2683  .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
2684  return;
2685  }
2686 
2687  if (SrcReg == AArch64::NZCV) {
2688  assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
2689  BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
2690  .addImm(AArch64SysReg::NZCV)
2691  .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
2692  return;
2693  }
2694 
2695  llvm_unreachable("unimplemented reg-to-reg copy");
2696 }
2697 
2699  MachineBasicBlock &MBB,
2700  MachineBasicBlock::iterator InsertBefore,
2701  const MCInstrDesc &MCID,
2702  unsigned SrcReg, bool IsKill,
2703  unsigned SubIdx0, unsigned SubIdx1, int FI,
2704  MachineMemOperand *MMO) {
2705  unsigned SrcReg0 = SrcReg;
2706  unsigned SrcReg1 = SrcReg;
2708  SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
2709  SubIdx0 = 0;
2710  SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
2711  SubIdx1 = 0;
2712  }
2713  BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
2714  .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
2715  .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
2716  .addFrameIndex(FI)
2717  .addImm(0)
2718  .addMemOperand(MMO);
2719 }
2720 
2722  MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2723  bool isKill, int FI, const TargetRegisterClass *RC,
2724  const TargetRegisterInfo *TRI) const {
2725  MachineFunction &MF = *MBB.getParent();
2726  MachineFrameInfo &MFI = MF.getFrameInfo();
2727  unsigned Align = MFI.getObjectAlignment(FI);
2728 
2731  PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2732  unsigned Opc = 0;
2733  bool Offset = true;
2734  switch (TRI->getSpillSize(*RC)) {
2735  case 1:
2736  if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2737  Opc = AArch64::STRBui;
2738  break;
2739  case 2:
2740  if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2741  Opc = AArch64::STRHui;
2742  break;
2743  case 4:
2744  if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2745  Opc = AArch64::STRWui;
2747  MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2748  else
2749  assert(SrcReg != AArch64::WSP);
2750  } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2751  Opc = AArch64::STRSui;
2752  break;
2753  case 8:
2754  if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2755  Opc = AArch64::STRXui;
2757  MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2758  else
2759  assert(SrcReg != AArch64::SP);
2760  } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
2761  Opc = AArch64::STRDui;
2762  } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
2764  get(AArch64::STPWi), SrcReg, isKill,
2765  AArch64::sube32, AArch64::subo32, FI, MMO);
2766  return;
2767  }
2768  break;
2769  case 16:
2770  if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2771  Opc = AArch64::STRQui;
2772  else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2773  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2774  Opc = AArch64::ST1Twov1d;
2775  Offset = false;
2776  } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
2778  get(AArch64::STPXi), SrcReg, isKill,
2779  AArch64::sube64, AArch64::subo64, FI, MMO);
2780  return;
2781  }
2782  break;
2783  case 24:
2784  if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2785  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2786  Opc = AArch64::ST1Threev1d;
2787  Offset = false;
2788  }
2789  break;
2790  case 32:
2791  if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2792  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2793  Opc = AArch64::ST1Fourv1d;
2794  Offset = false;
2795  } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2796  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2797  Opc = AArch64::ST1Twov2d;
2798  Offset = false;
2799  }
2800  break;
2801  case 48:
2802  if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2803  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2804  Opc = AArch64::ST1Threev2d;
2805  Offset = false;
2806  }
2807  break;
2808  case 64:
2809  if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2810  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2811  Opc = AArch64::ST1Fourv2d;
2812  Offset = false;
2813  }
2814  break;
2815  }
2816  assert(Opc && "Unknown register class");
2817 
2818  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
2819  .addReg(SrcReg, getKillRegState(isKill))
2820  .addFrameIndex(FI);
2821 
2822  if (Offset)
2823  MI.addImm(0);
2824  MI.addMemOperand(MMO);
2825 }
2826 
2828  MachineBasicBlock &MBB,
2829  MachineBasicBlock::iterator InsertBefore,
2830  const MCInstrDesc &MCID,
2831  unsigned DestReg, unsigned SubIdx0,
2832  unsigned SubIdx1, int FI,
2833  MachineMemOperand *MMO) {
2834  unsigned DestReg0 = DestReg;
2835  unsigned DestReg1 = DestReg;
2836  bool IsUndef = true;
2838  DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
2839  SubIdx0 = 0;
2840  DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
2841  SubIdx1 = 0;
2842  IsUndef = false;
2843  }
2844  BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
2845  .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
2846  .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
2847  .addFrameIndex(FI)
2848  .addImm(0)
2849  .addMemOperand(MMO);
2850 }
2851 
2853  MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2854  int FI, const TargetRegisterClass *RC,
2855  const TargetRegisterInfo *TRI) const {
2856  MachineFunction &MF = *MBB.getParent();
2857  MachineFrameInfo &MFI = MF.getFrameInfo();
2858  unsigned Align = MFI.getObjectAlignment(FI);
2861  PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2862 
2863  unsigned Opc = 0;
2864  bool Offset = true;
2865  switch (TRI->getSpillSize(*RC)) {
2866  case 1:
2867  if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2868  Opc = AArch64::LDRBui;
2869  break;
2870  case 2:
2871  if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2872  Opc = AArch64::LDRHui;
2873  break;
2874  case 4:
2875  if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2876  Opc = AArch64::LDRWui;
2878  MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
2879  else
2880  assert(DestReg != AArch64::WSP);
2881  } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2882  Opc = AArch64::LDRSui;
2883  break;
2884  case 8:
2885  if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2886  Opc = AArch64::LDRXui;
2888  MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
2889  else
2890  assert(DestReg != AArch64::SP);
2891  } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
2892  Opc = AArch64::LDRDui;
2893  } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
2895  get(AArch64::LDPWi), DestReg, AArch64::sube32,
2896  AArch64::subo32, FI, MMO);
2897  return;
2898  }
2899  break;
2900  case 16:
2901  if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2902  Opc = AArch64::LDRQui;
2903  else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2904  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2905  Opc = AArch64::LD1Twov1d;
2906  Offset = false;
2907  } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
2909  get(AArch64::LDPXi), DestReg, AArch64::sube64,
2910  AArch64::subo64, FI, MMO);
2911  return;
2912  }
2913  break;
2914  case 24:
2915  if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2916  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2917  Opc = AArch64::LD1Threev1d;
2918  Offset = false;
2919  }
2920  break;
2921  case 32:
2922  if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2923  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2924  Opc = AArch64::LD1Fourv1d;
2925  Offset = false;
2926  } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2927  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2928  Opc = AArch64::LD1Twov2d;
2929  Offset = false;
2930  }
2931  break;
2932  case 48:
2933  if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2934  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2935  Opc = AArch64::LD1Threev2d;
2936  Offset = false;
2937  }
2938  break;
2939  case 64:
2940  if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2941  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2942  Opc = AArch64::LD1Fourv2d;
2943  Offset = false;
2944  }
2945  break;
2946  }
2947  assert(Opc && "Unknown register class");
2948 
2949  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
2950  .addReg(DestReg, getDefRegState(true))
2951  .addFrameIndex(FI);
2952  if (Offset)
2953  MI.addImm(0);
2954  MI.addMemOperand(MMO);
2955 }
2956 
2958  MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
2959  unsigned DestReg, unsigned SrcReg, int Offset,
2960  const TargetInstrInfo *TII,
2961  MachineInstr::MIFlag Flag, bool SetNZCV,
2962  bool NeedsWinCFI, bool *HasWinCFI) {
2963  if (DestReg == SrcReg && Offset == 0)
2964  return;
2965 
2966  assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
2967  "SP increment/decrement not 16-byte aligned");
2968 
2969  bool isSub = Offset < 0;
2970  if (isSub)
2971  Offset = -Offset;
2972 
2973  // FIXME: If the offset won't fit in 24-bits, compute the offset into a
2974  // scratch register. If DestReg is a virtual register, use it as the
2975  // scratch register; otherwise, create a new virtual register (to be
2976  // replaced by the scavenger at the end of PEI). That case can be optimized
2977  // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
2978  // register can be loaded with offset%8 and the add/sub can use an extending
2979  // instruction with LSL#3.
2980  // Currently the function handles any offsets but generates a poor sequence
2981  // of code.
2982  // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
2983 
2984  unsigned Opc;
2985  if (SetNZCV)
2986  Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
2987  else
2988  Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
2989  const unsigned MaxEncoding = 0xfff;
2990  const unsigned ShiftSize = 12;
2991  const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
2992  while (((unsigned)Offset) >= (1 << ShiftSize)) {
2993  unsigned ThisVal;
2994  if (((unsigned)Offset) > MaxEncodableValue) {
2995  ThisVal = MaxEncodableValue;
2996  } else {
2997  ThisVal = Offset & MaxEncodableValue;
2998  }
2999  assert((ThisVal >> ShiftSize) <= MaxEncoding &&
3000  "Encoding cannot handle value that big");
3001  BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
3002  .addReg(SrcReg)
3003  .addImm(ThisVal >> ShiftSize)
3005  .setMIFlag(Flag);
3006 
3007  if (NeedsWinCFI && SrcReg == AArch64::SP && DestReg == AArch64::SP) {
3008  if (HasWinCFI)
3009  *HasWinCFI = true;
3010  BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
3011  .addImm(ThisVal)
3012  .setMIFlag(Flag);
3013  }
3014 
3015  SrcReg = DestReg;
3016  Offset -= ThisVal;
3017  if (Offset == 0)
3018  return;
3019  }
3020  BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
3021  .addReg(SrcReg)
3022  .addImm(Offset)
3024  .setMIFlag(Flag);
3025 
3026  if (NeedsWinCFI) {
3027  if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
3028  (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
3029  if (HasWinCFI)
3030  *HasWinCFI = true;
3031  if (Offset == 0)
3032  BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).
3033  setMIFlag(Flag);
3034  else
3035  BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)).
3036  addImm(Offset).setMIFlag(Flag);
3037  } else if (DestReg == AArch64::SP) {
3038  if (HasWinCFI)
3039  *HasWinCFI = true;
3040  BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)).
3041  addImm(Offset).setMIFlag(Flag);
3042  }
3043  }
3044 }
3045 
3049  LiveIntervals *LIS) const {
3050  // This is a bit of a hack. Consider this instruction:
3051  //
3052  // %0 = COPY %sp; GPR64all:%0
3053  //
3054  // We explicitly chose GPR64all for the virtual register so such a copy might
3055  // be eliminated by RegisterCoalescer. However, that may not be possible, and
3056  // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
3057  // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
3058  //
3059  // To prevent that, we are going to constrain the %0 register class here.
3060  //
3061  // <rdar://problem/11522048>
3062  //
3063  if (MI.isFullCopy()) {
3064  unsigned DstReg = MI.getOperand(0).getReg();
3065  unsigned SrcReg = MI.getOperand(1).getReg();
3066  if (SrcReg == AArch64::SP &&
3068  MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
3069  return nullptr;
3070  }
3071  if (DstReg == AArch64::SP &&
3073  MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
3074  return nullptr;
3075  }
3076  }
3077 
3078  // Handle the case where a copy is being spilled or filled but the source
3079  // and destination register class don't match. For example:
3080  //
3081  // %0 = COPY %xzr; GPR64common:%0
3082  //
3083  // In this case we can still safely fold away the COPY and generate the
3084  // following spill code:
3085  //
3086  // STRXui %xzr, %stack.0
3087  //
3088  // This also eliminates spilled cross register class COPYs (e.g. between x and
3089  // d regs) of the same size. For example:
3090  //
3091  // %0 = COPY %1; GPR64:%0, FPR64:%1
3092  //
3093  // will be filled as
3094  //
3095  // LDRDui %0, fi<#0>
3096  //
3097  // instead of
3098  //
3099  // LDRXui %Temp, fi<#0>
3100  // %0 = FMOV %Temp
3101  //
3102  if (MI.isCopy() && Ops.size() == 1 &&
3103  // Make sure we're only folding the explicit COPY defs/uses.
3104  (Ops[0] == 0 || Ops[0] == 1)) {
3105  bool IsSpill = Ops[0] == 0;
3106  bool IsFill = !IsSpill;
3108  const MachineRegisterInfo &MRI = MF.getRegInfo();
3109  MachineBasicBlock &MBB = *MI.getParent();
3110  const MachineOperand &DstMO = MI.getOperand(0);
3111  const MachineOperand &SrcMO = MI.getOperand(1);
3112  unsigned DstReg = DstMO.getReg();
3113  unsigned SrcReg = SrcMO.getReg();
3114  // This is slightly expensive to compute for physical regs since
3115  // getMinimalPhysRegClass is slow.
3116  auto getRegClass = [&](unsigned Reg) {
3118  ? MRI.getRegClass(Reg)
3119  : TRI.getMinimalPhysRegClass(Reg);
3120  };
3121 
3122  if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
3123  assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
3124  TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
3125  "Mismatched register size in non subreg COPY");
3126  if (IsSpill)
3127  storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
3128  getRegClass(SrcReg), &TRI);
3129  else
3130  loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
3131  getRegClass(DstReg), &TRI);
3132  return &*--InsertPt;
3133  }
3134 
3135  // Handle cases like spilling def of:
3136  //
3137  // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
3138  //
3139  // where the physical register source can be widened and stored to the full
3140  // virtual reg destination stack slot, in this case producing:
3141  //
3142  // STRXui %xzr, %stack.0
3143  //
3144  if (IsSpill && DstMO.isUndef() &&
3146  assert(SrcMO.getSubReg() == 0 &&
3147  "Unexpected subreg on physical register");
3148  const TargetRegisterClass *SpillRC;
3149  unsigned SpillSubreg;
3150  switch (DstMO.getSubReg()) {
3151  default:
3152  SpillRC = nullptr;
3153  break;
3154  case AArch64::sub_32:
3155  case AArch64::ssub:
3156  if (AArch64::GPR32RegClass.contains(SrcReg)) {
3157  SpillRC = &AArch64::GPR64RegClass;
3158  SpillSubreg = AArch64::sub_32;
3159  } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
3160  SpillRC = &AArch64::FPR64RegClass;
3161  SpillSubreg = AArch64::ssub;
3162  } else
3163  SpillRC = nullptr;
3164  break;
3165  case AArch64::dsub:
3166  if (AArch64::FPR64RegClass.contains(SrcReg)) {
3167  SpillRC = &AArch64::FPR128RegClass;
3168  SpillSubreg = AArch64::dsub;
3169  } else
3170  SpillRC = nullptr;
3171  break;
3172  }
3173 
3174  if (SpillRC)
3175  if (unsigned WidenedSrcReg =
3176  TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
3177  storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
3178  FrameIndex, SpillRC, &TRI);
3179  return &*--InsertPt;
3180  }
3181  }
3182 
3183  // Handle cases like filling use of:
3184  //
3185  // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
3186  //
3187  // where we can load the full virtual reg source stack slot, into the subreg
3188  // destination, in this case producing:
3189  //
3190  // LDRWui %0:sub_32<def,read-undef>, %stack.0
3191  //
3192  if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
3193  const TargetRegisterClass *FillRC;
3194  switch (DstMO.getSubReg()) {
3195  default:
3196  FillRC = nullptr;
3197  break;
3198  case AArch64::sub_32:
3199  FillRC = &AArch64::GPR32RegClass;
3200  break;
3201  case AArch64::ssub:
3202  FillRC = &AArch64::FPR32RegClass;
3203  break;
3204  case AArch64::dsub:
3205  FillRC = &AArch64::FPR64RegClass;
3206  break;
3207  }
3208 
3209  if (FillRC) {
3210  assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
3211  TRI.getRegSizeInBits(*FillRC) &&
3212  "Mismatched regclass size on folded subreg COPY");
3213  loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
3214  MachineInstr &LoadMI = *--InsertPt;
3215  MachineOperand &LoadDst = LoadMI.getOperand(0);
3216  assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
3217  LoadDst.setSubReg(DstMO.getSubReg());
3218  LoadDst.setIsUndef();
3219  return &LoadMI;
3220  }
3221  }
3222  }
3223 
3224  // Cannot fold.
3225  return nullptr;
3226 }
3227 
3229  bool *OutUseUnscaledOp,
3230  unsigned *OutUnscaledOp,
3231  int *EmittableOffset) {
3232  // Set output values in case of early exit.
3233  if (EmittableOffset)
3234  *EmittableOffset = 0;
3235  if (OutUseUnscaledOp)
3236  *OutUseUnscaledOp = false;
3237  if (OutUnscaledOp)
3238  *OutUnscaledOp = 0;
3239 
3240  // Exit early for structured vector spills/fills as they can't take an
3241  // immediate offset.
3242  switch (MI.getOpcode()) {
3243  default:
3244  break;
3245  case AArch64::LD1Twov2d:
3246  case AArch64::LD1Threev2d:
3247  case AArch64::LD1Fourv2d:
3248  case AArch64::LD1Twov1d:
3249  case AArch64::LD1Threev1d:
3250  case AArch64::LD1Fourv1d:
3251  case AArch64::ST1Twov2d:
3252  case AArch64::ST1Threev2d:
3253  case AArch64::ST1Fourv2d:
3254  case AArch64::ST1Twov1d:
3255  case AArch64::ST1Threev1d:
3256  case AArch64::ST1Fourv1d:
3258  }
3259 
3260  // Get the min/max offset and the scale.
3261  unsigned Scale, Width;
3262  int64_t MinOff, MaxOff;
3263  if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), Scale, Width, MinOff,
3264  MaxOff))
3265  llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
3266 
3267  // Construct the complete offset.
3268  const MachineOperand &ImmOpnd =
3270  Offset += ImmOpnd.getImm() * Scale;
3271 
3272  // If the offset doesn't match the scale, we rewrite the instruction to
3273  // use the unscaled instruction instead. Likewise, if we have a negative
3274  // offset and there is an unscaled op to use.
3275  Optional<unsigned> UnscaledOp =
3277  bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
3278  if (useUnscaledOp &&
3279  !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, Scale, Width, MinOff, MaxOff))
3280  llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
3281 
3282  int64_t Remainder = Offset % Scale;
3283  assert(!(Remainder && useUnscaledOp) &&
3284  "Cannot have remainder when using unscaled op");
3285 
3286  assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
3287  int NewOffset = Offset / Scale;
3288  if (MinOff <= NewOffset && NewOffset <= MaxOff)
3289  Offset = Remainder;
3290  else {
3291  NewOffset = NewOffset < 0 ? MinOff : MaxOff;
3292  Offset = Offset - NewOffset * Scale + Remainder;
3293  }
3294 
3295  if (EmittableOffset)
3296  *EmittableOffset = NewOffset;
3297  if (OutUseUnscaledOp)
3298  *OutUseUnscaledOp = useUnscaledOp;
3299  if (OutUnscaledOp && UnscaledOp)
3300  *OutUnscaledOp = *UnscaledOp;
3301 
3303  (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
3304 }
3305 
3306 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
3307  unsigned FrameReg, int &Offset,
3308  const AArch64InstrInfo *TII) {
3309  unsigned Opcode = MI.getOpcode();
3310  unsigned ImmIdx = FrameRegIdx + 1;
3311 
3312  if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
3313  Offset += MI.getOperand(ImmIdx).getImm();
3314  emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
3315  MI.getOperand(0).getReg(), FrameReg, Offset, TII,
3316  MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
3317  MI.eraseFromParent();
3318  Offset = 0;
3319  return true;
3320  }
3321 
3322  int NewOffset;
3323  unsigned UnscaledOp;
3324  bool UseUnscaledOp;
3325  int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
3326  &UnscaledOp, &NewOffset);
3327  if (Status & AArch64FrameOffsetCanUpdate) {
3328  if (Status & AArch64FrameOffsetIsLegal)
3329  // Replace the FrameIndex with FrameReg.
3330  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
3331  if (UseUnscaledOp)
3332  MI.setDesc(TII->get(UnscaledOp));
3333 
3334  MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
3335  return Offset == 0;
3336  }
3337 
3338  return false;
3339 }
3340 
3341 void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
3342  NopInst.setOpcode(AArch64::HINT);
3343  NopInst.addOperand(MCOperand::createImm(0));
3344 }
3345 
3346 // AArch64 supports MachineCombiner.
3347 bool AArch64InstrInfo::useMachineCombiner() const { return true; }
3348 
3349 // True when Opc sets flag
3350 static bool isCombineInstrSettingFlag(unsigned Opc) {
3351  switch (Opc) {
3352  case AArch64::ADDSWrr:
3353  case AArch64::ADDSWri:
3354  case AArch64::ADDSXrr:
3355  case AArch64::ADDSXri:
3356  case AArch64::SUBSWrr:
3357  case AArch64::SUBSXrr:
3358  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3359  case AArch64::SUBSWri:
3360  case AArch64::SUBSXri:
3361  return true;
3362  default:
3363  break;
3364  }
3365  return false;
3366 }
3367 
3368 // 32b Opcodes that can be combined with a MUL
3369 static bool isCombineInstrCandidate32(unsigned Opc) {
3370  switch (Opc) {
3371  case AArch64::ADDWrr:
3372  case AArch64::ADDWri:
3373  case AArch64::SUBWrr:
3374  case AArch64::ADDSWrr:
3375  case AArch64::ADDSWri:
3376  case AArch64::SUBSWrr:
3377  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3378  case AArch64::SUBWri:
3379  case AArch64::SUBSWri:
3380  return true;
3381  default:
3382  break;
3383  }
3384  return false;
3385 }
3386 
3387 // 64b Opcodes that can be combined with a MUL
3388 static bool isCombineInstrCandidate64(unsigned Opc) {
3389  switch (Opc) {
3390  case AArch64::ADDXrr:
3391  case AArch64::ADDXri:
3392  case AArch64::SUBXrr:
3393  case AArch64::ADDSXrr:
3394  case AArch64::ADDSXri:
3395  case AArch64::SUBSXrr:
3396  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3397  case AArch64::SUBXri:
3398  case AArch64::SUBSXri:
3399  return true;
3400  default:
3401  break;
3402  }
3403  return false;
3404 }
3405 
3406 // FP Opcodes that can be combined with a FMUL
3407 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
3408  switch (Inst.getOpcode()) {
3409  default:
3410  break;
3411  case AArch64::FADDSrr:
3412  case AArch64::FADDDrr:
3413  case AArch64::FADDv2f32:
3414  case AArch64::FADDv2f64:
3415  case AArch64::FADDv4f32:
3416  case AArch64::FSUBSrr:
3417  case AArch64::FSUBDrr:
3418  case AArch64::FSUBv2f32:
3419  case AArch64::FSUBv2f64:
3420  case AArch64::FSUBv4f32:
3421  TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
3422  return (Options.UnsafeFPMath ||
3423  Options.AllowFPOpFusion == FPOpFusion::Fast);
3424  }
3425  return false;
3426 }
3427 
3428 // Opcodes that can be combined with a MUL
3429 static bool isCombineInstrCandidate(unsigned Opc) {
3431 }
3432 
3433 //
3434 // Utility routine that checks if \param MO is defined by an
3435 // \param CombineOpc instruction in the basic block \param MBB
3437  unsigned CombineOpc, unsigned ZeroReg = 0,
3438  bool CheckZeroReg = false) {
3439  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3440  MachineInstr *MI = nullptr;
3441 
3443  MI = MRI.getUniqueVRegDef(MO.getReg());
3444  // And it needs to be in the trace (otherwise, it won't have a depth).
3445  if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
3446  return false;
3447  // Must only used by the user we combine with.
3448  if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
3449  return false;
3450 
3451  if (CheckZeroReg) {
3452  assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
3453  MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
3454  MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
3455  // The third input reg must be zero.
3456  if (MI->getOperand(3).getReg() != ZeroReg)
3457  return false;
3458  }
3459 
3460  return true;
3461 }
3462 
3463 //
3464 // Is \param MO defined by an integer multiply and can be combined?
3466  unsigned MulOpc, unsigned ZeroReg) {
3467  return canCombine(MBB, MO, MulOpc, ZeroReg, true);
3468 }
3469 
3470 //
3471 // Is \param MO defined by a floating-point multiply and can be combined?
3473  unsigned MulOpc) {
3474  return canCombine(MBB, MO, MulOpc);
3475 }
3476 
3477 // TODO: There are many more machine instruction opcodes to match:
3478 // 1. Other data types (integer, vectors)
3479 // 2. Other math / logic operations (xor, or)
3480 // 3. Other forms of the same operation (intrinsics and other variants)
3482  const MachineInstr &Inst) const {
3483  switch (Inst.getOpcode()) {
3484  case AArch64::FADDDrr:
3485  case AArch64::FADDSrr:
3486  case AArch64::FADDv2f32:
3487  case AArch64::FADDv2f64:
3488  case AArch64::FADDv4f32:
3489  case AArch64::FMULDrr:
3490  case AArch64::FMULSrr:
3491  case AArch64::FMULX32:
3492  case AArch64::FMULX64:
3493  case AArch64::FMULXv2f32:
3494  case AArch64::FMULXv2f64:
3495  case AArch64::FMULXv4f32:
3496  case AArch64::FMULv2f32:
3497  case AArch64::FMULv2f64:
3498  case AArch64::FMULv4f32:
3499  return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
3500  default:
3501  return false;
3502  }
3503 }
3504 
3505 /// Find instructions that can be turned into madd.
3506 static bool getMaddPatterns(MachineInstr &Root,
3508  unsigned Opc = Root.getOpcode();
3509  MachineBasicBlock &MBB = *Root.getParent();
3510  bool Found = false;
3511 
3512  if (!isCombineInstrCandidate(Opc))
3513  return false;
3514  if (isCombineInstrSettingFlag(Opc)) {
3515  int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
3516  // When NZCV is live bail out.
3517  if (Cmp_NZCV == -1)
3518  return false;
3519  unsigned NewOpc = convertToNonFlagSettingOpc(Root);
3520  // When opcode can't change bail out.
3521  // CHECKME: do we miss any cases for opcode conversion?
3522  if (NewOpc == Opc)
3523  return false;
3524  Opc = NewOpc;
3525  }
3526 
3527  switch (Opc) {
3528  default:
3529  break;
3530  case AArch64::ADDWrr:
3531  assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3532  "ADDWrr does not have register operands");
3533  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3534  AArch64::WZR)) {
3536  Found = true;
3537  }
3538  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3539  AArch64::WZR)) {
3541  Found = true;
3542  }
3543  break;
3544  case AArch64::ADDXrr:
3545  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3546  AArch64::XZR)) {
3548  Found = true;
3549  }
3550  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3551  AArch64::XZR)) {
3553  Found = true;
3554  }
3555  break;
3556  case AArch64::SUBWrr:
3557  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3558  AArch64::WZR)) {
3560  Found = true;
3561  }
3562  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3563  AArch64::WZR)) {
3565  Found = true;
3566  }
3567  break;
3568  case AArch64::SUBXrr:
3569  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3570  AArch64::XZR)) {
3572  Found = true;
3573  }
3574  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3575  AArch64::XZR)) {
3577  Found = true;
3578  }
3579  break;
3580  case AArch64::ADDWri:
3581  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3582  AArch64::WZR)) {
3584  Found = true;
3585  }
3586  break;
3587  case AArch64::ADDXri:
3588  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3589  AArch64::XZR)) {
3591  Found = true;
3592  }
3593  break;
3594  case AArch64::SUBWri:
3595  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3596  AArch64::WZR)) {
3598  Found = true;
3599  }
3600  break;
3601  case AArch64::SUBXri:
3602  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3603  AArch64::XZR)) {
3605  Found = true;
3606  }
3607  break;
3608  }
3609  return Found;
3610 }
3611 /// Floating-Point Support
3612 
3613 /// Find instructions that can be turned into madd.
3614 static bool getFMAPatterns(MachineInstr &Root,
3616 
3617  if (!isCombineInstrCandidateFP(Root))
3618  return false;
3619 
3620  MachineBasicBlock &MBB = *Root.getParent();
3621  bool Found = false;
3622 
3623  switch (Root.getOpcode()) {
3624  default:
3625  assert(false && "Unsupported FP instruction in combiner\n");
3626  break;
3627  case AArch64::FADDSrr:
3628  assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3629  "FADDWrr does not have register operands");
3630  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3632  Found = true;
3633  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3634  AArch64::FMULv1i32_indexed)) {
3636  Found = true;
3637  }
3638  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3640  Found = true;
3641  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3642  AArch64::FMULv1i32_indexed)) {
3644  Found = true;
3645  }
3646  break;
3647  case AArch64::FADDDrr:
3648  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3650  Found = true;
3651  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3652  AArch64::FMULv1i64_indexed)) {
3654  Found = true;
3655  }
3656  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3658  Found = true;
3659  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3660  AArch64::FMULv1i64_indexed)) {
3662  Found = true;
3663  }
3664  break;
3665  case AArch64::FADDv2f32:
3666  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3667  AArch64::FMULv2i32_indexed)) {
3669  Found = true;
3670  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3671  AArch64::FMULv2f32)) {
3673  Found = true;
3674  }
3675  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3676  AArch64::FMULv2i32_indexed)) {
3678  Found = true;
3679  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3680  AArch64::FMULv2f32)) {
3682  Found = true;
3683  }
3684  break;
3685  case AArch64::FADDv2f64:
3686  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3687  AArch64::FMULv2i64_indexed)) {
3689  Found = true;
3690  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3691  AArch64::FMULv2f64)) {
3693  Found = true;
3694  }
3695  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3696  AArch64::FMULv2i64_indexed)) {
3698  Found = true;
3699  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3700  AArch64::FMULv2f64)) {
3702  Found = true;
3703  }
3704  break;
3705  case AArch64::FADDv4f32:
3706  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3707  AArch64::FMULv4i32_indexed)) {
3709  Found = true;
3710  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3711  AArch64::FMULv4f32)) {
3713  Found = true;
3714  }
3715  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3716  AArch64::FMULv4i32_indexed)) {
3718  Found = true;
3719  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3720  AArch64::FMULv4f32)) {
3722  Found = true;
3723  }
3724  break;
3725 
3726  case AArch64::FSUBSrr:
3727  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3729  Found = true;
3730  }
3731  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3733  Found = true;
3734  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3735  AArch64::FMULv1i32_indexed)) {
3737  Found = true;
3738  }
3739  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) {
3741  Found = true;
3742  }
3743  break;
3744  case AArch64::FSUBDrr:
3745  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3747  Found = true;
3748  }
3749  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3751  Found = true;
3752  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3753  AArch64::FMULv1i64_indexed)) {
3755  Found = true;
3756  }
3757  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) {
3759  Found = true;
3760  }
3761  break;
3762  case AArch64::FSUBv2f32:
3763  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3764  AArch64::FMULv2i32_indexed)) {
3766  Found = true;
3767  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3768  AArch64::FMULv2f32)) {
3770  Found = true;
3771  }
3772  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3773  AArch64::FMULv2i32_indexed)) {
3775  Found = true;
3776  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3777  AArch64::FMULv2f32)) {
3779  Found = true;
3780  }
3781  break;
3782  case AArch64::FSUBv2f64:
3783  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3784  AArch64::FMULv2i64_indexed)) {
3786  Found = true;
3787  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3788  AArch64::FMULv2f64)) {
3790  Found = true;
3791  }
3792  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3793  AArch64::FMULv2i64_indexed)) {
3795  Found = true;
3796  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3797  AArch64::FMULv2f64)) {
3799  Found = true;
3800  }
3801  break;
3802  case AArch64::FSUBv4f32:
3803  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3804  AArch64::FMULv4i32_indexed)) {
3806  Found = true;
3807  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3808  AArch64::FMULv4f32)) {
3810  Found = true;
3811  }
3812  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3813  AArch64::FMULv4i32_indexed)) {
3815  Found = true;
3816  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3817  AArch64::FMULv4f32)) {
3819  Found = true;
3820  }
3821  break;
3822  }
3823  return Found;
3824 }
3825 
3826 /// Return true when a code sequence can improve throughput. It
3827 /// should be called only for instructions in loops.
3828 /// \param Pattern - combiner pattern
3830  MachineCombinerPattern Pattern) const {
3831  switch (Pattern) {
3832  default:
3833  break;
3868  return true;
3869  } // end switch (Pattern)
3870  return false;
3871 }
3872 /// Return true when there is potentially a faster code sequence for an
3873 /// instruction chain ending in \p Root. All potential patterns are listed in
3874 /// the \p Pattern vector. Pattern should be sorted in priority order since the
3875 /// pattern evaluator stops checking as soon as it finds a faster sequence.
3876 
3878  MachineInstr &Root,
3879  SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
3880  // Integer patterns
3881  if (getMaddPatterns(Root, Patterns))
3882  return true;
3883  // Floating point patterns
3884  if (getFMAPatterns(Root, Patterns))
3885  return true;
3886 
3887  return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
3888 }
3889 
3891 /// genFusedMultiply - Generate fused multiply instructions.
3892 /// This function supports both integer and floating point instructions.
3893 /// A typical example:
3894 /// F|MUL I=A,B,0
3895 /// F|ADD R,I,C
3896 /// ==> F|MADD R,A,B,C
3897 /// \param MF Containing MachineFunction
3898 /// \param MRI Register information
3899 /// \param TII Target information
3900 /// \param Root is the F|ADD instruction
3901 /// \param [out] InsInstrs is a vector of machine instructions and will
3902 /// contain the generated madd instruction
3903 /// \param IdxMulOpd is index of operand in Root that is the result of
3904 /// the F|MUL. In the example above IdxMulOpd is 1.
3905 /// \param MaddOpc the opcode fo the f|madd instruction
3906 /// \param RC Register class of operands
3907 /// \param kind of fma instruction (addressing mode) to be generated
3908 /// \param ReplacedAddend is the result register from the instruction
3909 /// replacing the non-combined operand, if any.
3910 static MachineInstr *
3912  const TargetInstrInfo *TII, MachineInstr &Root,
3913  SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
3914  unsigned MaddOpc, const TargetRegisterClass *RC,
3916  const unsigned *ReplacedAddend = nullptr) {
3917  assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3918 
3919  unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
3920  MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
3921  unsigned ResultReg = Root.getOperand(0).getReg();
3922  unsigned SrcReg0 = MUL->getOperand(1).getReg();
3923  bool Src0IsKill = MUL->getOperand(1).isKill();
3924  unsigned SrcReg1 = MUL->getOperand(2).getReg();
3925  bool Src1IsKill = MUL->getOperand(2).isKill();
3926 
3927  unsigned SrcReg2;
3928  bool Src2IsKill;
3929  if (ReplacedAddend) {
3930  // If we just generated a new addend, we must be it's only use.
3931  SrcReg2 = *ReplacedAddend;
3932  Src2IsKill = true;
3933  } else {
3934  SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
3935  Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
3936  }
3937 
3939  MRI.constrainRegClass(ResultReg, RC);
3941  MRI.constrainRegClass(SrcReg0, RC);
3943  MRI.constrainRegClass(SrcReg1, RC);
3945  MRI.constrainRegClass(SrcReg2, RC);
3946 
3947  MachineInstrBuilder MIB;
3948  if (kind == FMAInstKind::Default)
3949  MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3950  .addReg(SrcReg0, getKillRegState(Src0IsKill))
3951  .addReg(SrcReg1, getKillRegState(Src1IsKill))
3952  .addReg(SrcReg2, getKillRegState(Src2IsKill));
3953  else if (kind == FMAInstKind::Indexed)
3954  MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3955  .addReg(SrcReg2, getKillRegState(Src2IsKill))
3956  .addReg(SrcReg0, getKillRegState(Src0IsKill))
3957  .addReg(SrcReg1, getKillRegState(Src1IsKill))
3958  .addImm(MUL->getOperand(3).getImm());
3959  else if (kind == FMAInstKind::Accumulator)
3960  MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3961  .addReg(SrcReg2, getKillRegState(Src2IsKill))
3962  .addReg(SrcReg0, getKillRegState(Src0IsKill))
3963  .addReg(SrcReg1, getKillRegState(Src1IsKill));
3964  else
3965  assert(false && "Invalid FMA instruction kind \n");
3966  // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
3967  InsInstrs.push_back(MIB);
3968  return MUL;
3969 }
3970 
3971 /// genMaddR - Generate madd instruction and combine mul and add using
3972 /// an extra virtual register
3973 /// Example - an ADD intermediate needs to be stored in a register:
3974 /// MUL I=A,B,0
3975 /// ADD R,I,Imm
3976 /// ==> ORR V, ZR, Imm
3977 /// ==> MADD R,A,B,V
3978 /// \param MF Containing MachineFunction
3979 /// \param MRI Register information
3980 /// \param TII Target information
3981 /// \param Root is the ADD instruction
3982 /// \param [out] InsInstrs is a vector of machine instructions and will
3983 /// contain the generated madd instruction
3984 /// \param IdxMulOpd is index of operand in Root that is the result of
3985 /// the MUL. In the example above IdxMulOpd is 1.
3986 /// \param MaddOpc the opcode fo the madd instruction
3987 /// \param VR is a virtual register that holds the value of an ADD operand
3988 /// (V in the example above).
3989 /// \param RC Register class of operands
3991  const TargetInstrInfo *TII, MachineInstr &Root,
3993  unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
3994  const TargetRegisterClass *RC) {
3995  assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3996 
3997  MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
3998  unsigned ResultReg = Root.getOperand(0).getReg();
3999  unsigned SrcReg0 = MUL->getOperand(1).getReg();
4000  bool Src0IsKill = MUL->getOperand(1).isKill();
4001  unsigned SrcReg1 = MUL->getOperand(2).getReg();
4002  bool Src1IsKill = MUL->getOperand(2).isKill();
4003 
4005  MRI.constrainRegClass(ResultReg, RC);
4007  MRI.constrainRegClass(SrcReg0, RC);
4009  MRI.constrainRegClass(SrcReg1, RC);
4011  MRI.constrainRegClass(VR, RC);
4012 
4013  MachineInstrBuilder MIB =
4014  BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4015  .addReg(SrcReg0, getKillRegState(Src0IsKill))
4016  .addReg(SrcReg1, getKillRegState(Src1IsKill))
4017  .addReg(VR);
4018  // Insert the MADD
4019  InsInstrs.push_back(MIB);
4020  return MUL;
4021 }
4022 
4023 /// When getMachineCombinerPatterns() finds potential patterns,
4024 /// this function generates the instructions that could replace the
4025 /// original code sequence
4027  MachineInstr &Root, MachineCombinerPattern Pattern,
4030  DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
4031  MachineBasicBlock &MBB = *Root.getParent();
4032  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
4033  MachineFunction &MF = *MBB.getParent();
4034  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
4035 
4036  MachineInstr *MUL;
4037  const TargetRegisterClass *RC;
4038  unsigned Opc;
4039  switch (Pattern) {
4040  default:
4041  // Reassociate instructions.
4042  TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
4043  DelInstrs, InstrIdxForVirtReg);
4044  return;
4047  // MUL I=A,B,0
4048  // ADD R,I,C
4049  // ==> MADD R,A,B,C
4050  // --- Create(MADD);
4051  if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
4052  Opc = AArch64::MADDWrrr;
4053  RC = &AArch64::GPR32RegClass;
4054  } else {
4055  Opc = AArch64::MADDXrrr;
4056  RC = &AArch64::GPR64RegClass;
4057  }
4058  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4059  break;
4062  // MUL I=A,B,0
4063  // ADD R,C,I
4064  // ==> MADD R,A,B,C
4065  // --- Create(MADD);
4066  if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
4067  Opc = AArch64::MADDWrrr;
4068  RC = &AArch64::GPR32RegClass;
4069  } else {
4070  Opc = AArch64::MADDXrrr;
4071  RC = &AArch64::GPR64RegClass;
4072  }
4073  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4074  break;
4077  // MUL I=A,B,0
4078  // ADD R,I,Imm
4079  // ==> ORR V, ZR, Imm
4080  // ==> MADD R,A,B,V
4081  // --- Create(MADD);
4082  const TargetRegisterClass *OrrRC;
4083  unsigned BitSize, OrrOpc, ZeroReg;
4084  if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
4085  OrrOpc = AArch64::ORRWri;
4086  OrrRC = &AArch64::GPR32spRegClass;
4087  BitSize = 32;
4088  ZeroReg = AArch64::WZR;
4089  Opc = AArch64::MADDWrrr;
4090  RC = &AArch64::GPR32RegClass;
4091  } else {
4092  OrrOpc = AArch64::ORRXri;
4093  OrrRC = &AArch64::GPR64spRegClass;
4094  BitSize = 64;
4095  ZeroReg = AArch64::XZR;
4096  Opc = AArch64::MADDXrrr;
4097  RC = &AArch64::GPR64RegClass;
4098  }
4099  unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4100  uint64_t Imm = Root.getOperand(2).getImm();
4101 
4102  if (Root.getOperand(3).isImm()) {
4103  unsigned Val = Root.getOperand(3).getImm();
4104  Imm = Imm << Val;
4105  }
4106  uint64_t UImm = SignExtend64(Imm, BitSize);
4107  uint64_t Encoding;
4108  if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4109  MachineInstrBuilder MIB1 =
4110  BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4111  .addReg(ZeroReg)
4112  .addImm(Encoding);
4113  InsInstrs.push_back(MIB1);
4114  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4115  MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4116  }
4117  break;
4118  }
4121  // MUL I=A,B,0
4122  // SUB R,I, C
4123  // ==> SUB V, 0, C
4124  // ==> MADD R,A,B,V // = -C + A*B
4125  // --- Create(MADD);
4126  const TargetRegisterClass *SubRC;
4127  unsigned SubOpc, ZeroReg;
4128  if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
4129  SubOpc = AArch64::SUBWrr;
4130  SubRC = &AArch64::GPR32spRegClass;
4131  ZeroReg = AArch64::WZR;
4132  Opc = AArch64::MADDWrrr;
4133  RC = &AArch64::GPR32RegClass;
4134  } else {
4135  SubOpc = AArch64::SUBXrr;
4136  SubRC = &AArch64::GPR64spRegClass;
4137  ZeroReg = AArch64::XZR;
4138  Opc = AArch64::MADDXrrr;
4139  RC = &AArch64::GPR64RegClass;
4140  }
4141  unsigned NewVR = MRI.createVirtualRegister(SubRC);
4142  // SUB NewVR, 0, C
4143  MachineInstrBuilder MIB1 =
4144  BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
4145  .addReg(ZeroReg)
4146  .add(Root.getOperand(2));
4147  InsInstrs.push_back(MIB1);
4148  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4149  MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4150  break;
4151  }
4154  // MUL I=A,B,0
4155  // SUB R,C,I
4156  // ==> MSUB R,A,B,C (computes C - A*B)
4157  // --- Create(MSUB);
4158  if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
4159  Opc = AArch64::MSUBWrrr;
4160  RC = &AArch64::GPR32RegClass;
4161  } else {
4162  Opc = AArch64::MSUBXrrr;
4163  RC = &AArch64::GPR64RegClass;
4164  }
4165  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4166  break;
4169  // MUL I=A,B,0
4170  // SUB R,I, Imm
4171  // ==> ORR V, ZR, -Imm
4172  // ==> MADD R,A,B,V // = -Imm + A*B
4173  // --- Create(MADD);
4174  const TargetRegisterClass *OrrRC;
4175  unsigned BitSize, OrrOpc, ZeroReg;
4176  if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
4177  OrrOpc = AArch64::ORRWri;
4178  OrrRC = &AArch64::GPR32spRegClass;
4179  BitSize = 32;
4180  ZeroReg = AArch64::WZR;
4181  Opc = AArch64::MADDWrrr;
4182  RC = &AArch64::GPR32RegClass;
4183  } else {
4184  OrrOpc = AArch64::ORRXri;
4185  OrrRC = &AArch64::GPR64spRegClass;
4186  BitSize = 64;
4187  ZeroReg = AArch64::XZR;
4188  Opc = AArch64::MADDXrrr;
4189  RC = &AArch64::GPR64RegClass;
4190  }
4191  unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4192  uint64_t Imm = Root.getOperand(2).getImm();
4193  if (Root.getOperand(3).isImm()) {
4194  unsigned Val = Root.getOperand(3).getImm();
4195  Imm = Imm << Val;
4196  }
4197  uint64_t UImm = SignExtend64(-Imm, BitSize);
4198  uint64_t Encoding;
4199  if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4200  MachineInstrBuilder MIB1 =
4201  BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4202  .addReg(ZeroReg)
4203  .addImm(Encoding);
4204  InsInstrs.push_back(MIB1);
4205  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4206  MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4207  }
4208  break;
4209  }
4210  // Floating Point Support
4213  // MUL I=A,B,0
4214  // ADD R,I,C
4215  // ==> MADD R,A,B,C
4216  // --- Create(MADD);
4217  if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
4218  Opc = AArch64::FMADDSrrr;
4219  RC = &AArch64::FPR32RegClass;
4220  } else {
4221  Opc = AArch64::FMADDDrrr;
4222  RC = &AArch64::FPR64RegClass;
4223  }
4224  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4225  break;
4228  // FMUL I=A,B,0
4229  // FADD R,C,I
4230  // ==> FMADD R,A,B,C
4231  // --- Create(FMADD);
4232  if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
4233  Opc = AArch64::FMADDSrrr;
4234  RC = &AArch64::FPR32RegClass;
4235  } else {
4236  Opc = AArch64::FMADDDrrr;
4237  RC = &AArch64::FPR64RegClass;
4238  }
4239  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4240  break;
4241 
4243  Opc = AArch64::FMLAv1i32_indexed;
4244  RC = &AArch64::FPR32RegClass;
4245  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4247  break;
4249  Opc = AArch64::FMLAv1i32_indexed;
4250  RC = &AArch64::FPR32RegClass;
4251  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4253  break;
4254 
4256  Opc = AArch64::FMLAv1i64_indexed;
4257  RC = &AArch64::FPR64RegClass;
4258  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4260  break;
4262  Opc = AArch64::FMLAv1i64_indexed;
4263  RC = &AArch64::FPR64RegClass;
4264  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4266  break;
4267 
4270  RC = &AArch64::FPR64RegClass;
4272  Opc = AArch64::FMLAv2i32_indexed;
4273  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4275  } else {
4276  Opc = AArch64::FMLAv2f32;
4277  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4279  }
4280  break;
4283  RC = &AArch64::FPR64RegClass;
4285  Opc = AArch64::FMLAv2i32_indexed;
4286  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4288  } else {
4289  Opc = AArch64::FMLAv2f32;
4290  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4292  }
4293  break;
4294 
4297  RC = &AArch64::FPR128RegClass;
4299  Opc = AArch64::FMLAv2i64_indexed;
4300  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4302  } else {
4303  Opc = AArch64::FMLAv2f64;
4304  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4306  }
4307  break;
4310  RC = &AArch64::FPR128RegClass;
4312  Opc = AArch64::FMLAv2i64_indexed;
4313  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4315  } else {
4316  Opc = AArch64::FMLAv2f64;
4317  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4319  }
4320  break;
4321 
4324  RC = &AArch64::FPR128RegClass;
4326  Opc = AArch64::FMLAv4i32_indexed;
4327  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4329  } else {
4330  Opc = AArch64::FMLAv4f32;
4331  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4333  }
4334  break;
4335 
4338  RC = &AArch64::FPR128RegClass;
4340  Opc = AArch64::FMLAv4i32_indexed;
4341  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4343  } else {
4344  Opc = AArch64::FMLAv4f32;
4345  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4347  }
4348  break;
4349 
4352  // FMUL I=A,B,0
4353  // FSUB R,I,C
4354  // ==> FNMSUB R,A,B,C // = -C + A*B
4355  // --- Create(FNMSUB);
4356  if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
4357  Opc = AArch64::FNMSUBSrrr;
4358  RC = &AArch64::FPR32RegClass;
4359  } else {
4360  Opc = AArch64::FNMSUBDrrr;
4361  RC = &AArch64::FPR64RegClass;
4362  }
4363  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4364  break;
4365  }
4366 
4369  // FNMUL I=A,B,0
4370  // FSUB R,I,C
4371  // ==> FNMADD R,A,B,C // = -A*B - C
4372  // --- Create(FNMADD);
4373  if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) {
4374  Opc = AArch64::FNMADDSrrr;
4375  RC = &AArch64::FPR32RegClass;
4376  } else {
4377  Opc = AArch64::FNMADDDrrr;
4378  RC = &AArch64::FPR64RegClass;
4379  }
4380  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4381  break;
4382  }
4383 
4386  // FMUL I=A,B,0
4387  // FSUB R,C,I
4388  // ==> FMSUB R,A,B,C (computes C - A*B)
4389  // --- Create(FMSUB);
4390  if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
4391  Opc = AArch64::FMSUBSrrr;
4392  RC = &AArch64::FPR32RegClass;
4393  } else {
4394  Opc = AArch64::FMSUBDrrr;
4395  RC = &AArch64::FPR64RegClass;
4396  }
4397  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4398  break;
4399  }
4400 
4402  Opc = AArch64::FMLSv1i32_indexed;
4403  RC = &AArch64::FPR32RegClass;
4404  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4406  break;
4407 
4409  Opc = AArch64::FMLSv1i64_indexed;
4410  RC = &AArch64::FPR64RegClass;
4411  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4413  break;
4414 
4417  RC = &AArch64::FPR64RegClass;
4419  Opc = AArch64::FMLSv2i32_indexed;
4420  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4422  } else {
4423  Opc = AArch64::FMLSv2f32;
4424  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4426  }
4427  break;
4428 
4431  RC = &AArch64::FPR128RegClass;
4433  Opc = AArch64::FMLSv2i64_indexed;
4434  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4436  } else {
4437  Opc = AArch64::FMLSv2f64;
4438  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4440  }
4441  break;
4442 
4445  RC = &AArch64::FPR128RegClass;
4447  Opc = AArch64::FMLSv4i32_indexed;
4448  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4450  } else {
4451  Opc = AArch64::FMLSv4f32;
4452  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4454  }
4455  break;
4458  RC = &AArch64::FPR64RegClass;
4459  unsigned NewVR = MRI.createVirtualRegister(RC);
4460  MachineInstrBuilder MIB1 =
4461  BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
4462  .add(Root.getOperand(2));
4463  InsInstrs.push_back(MIB1);
4464  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4466  Opc = AArch64::FMLAv2i32_indexed;
4467  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4468  FMAInstKind::Indexed, &NewVR);
4469  } else {
4470  Opc = AArch64::FMLAv2f32;
4471  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4472  FMAInstKind::Accumulator, &NewVR);
4473  }
4474  break;
4475  }
4478  RC = &AArch64::FPR128RegClass;
4479  unsigned NewVR = MRI.createVirtualRegister(RC);
4480  MachineInstrBuilder MIB1 =
4481  BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
4482  .add(Root.getOperand(2));
4483  InsInstrs.push_back(MIB1);
4484  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4486  Opc = AArch64::FMLAv4i32_indexed;
4487  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4488  FMAInstKind::Indexed, &NewVR);
4489  } else {
4490  Opc = AArch64::FMLAv4f32;
4491  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4492  FMAInstKind::Accumulator, &NewVR);
4493  }
4494  break;
4495  }
4498  RC = &AArch64::FPR128RegClass;
4499  unsigned NewVR = MRI.createVirtualRegister(RC);
4500  MachineInstrBuilder MIB1 =
4501  BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
4502  .add(Root.getOperand(2));
4503  InsInstrs.push_back(MIB1);
4504  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4506  Opc = AArch64::FMLAv2i64_indexed;
4507  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4508  FMAInstKind::Indexed, &NewVR);
4509  } else {
4510  Opc = AArch64::FMLAv2f64;
4511  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4512  FMAInstKind::Accumulator, &NewVR);
4513  }
4514  break;
4515  }
4516  } // end switch (Pattern)
4517  // Record MUL and ADD/SUB for deletion
4518  DelInstrs.push_back(MUL);
4519  DelInstrs.push_back(&Root);
4520 }
4521 
4522 /// Replace csincr-branch sequence by simple conditional branch
4523 ///
4524 /// Examples:
4525 /// 1. \code
4526 /// csinc w9, wzr, wzr, <condition code>
4527 /// tbnz w9, #0, 0x44
4528 /// \endcode
4529 /// to
4530 /// \code
4531 /// b.<inverted condition code>
4532 /// \endcode
4533 ///
4534 /// 2. \code
4535 /// csinc w9, wzr, wzr, <condition code>
4536 /// tbz w9, #0, 0x44
4537 /// \endcode
4538 /// to
4539 /// \code
4540 /// b.<condition code>
4541 /// \endcode
4542 ///
4543 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
4544 /// compare's constant operand is power of 2.
4545 ///
4546 /// Examples:
4547 /// \code
4548 /// and w8, w8, #0x400
4549 /// cbnz w8, L1
4550 /// \endcode
4551 /// to
4552 /// \code
4553 /// tbnz w8, #10, L1
4554 /// \endcode
4555 ///
4556 /// \param MI Conditional Branch
4557 /// \return True when the simple conditional branch is generated
4558 ///
4560  bool IsNegativeBranch = false;
4561  bool IsTestAndBranch = false;
4562  unsigned TargetBBInMI = 0;
4563  switch (MI.getOpcode()) {
4564  default:
4565  llvm_unreachable("Unknown branch instruction?");
4566  case AArch64::Bcc:
4567  return false;
4568  case AArch64::CBZW:
4569  case AArch64::CBZX:
4570  TargetBBInMI = 1;
4571  break;
4572  case AArch64::CBNZW:
4573  case AArch64::CBNZX:
4574  TargetBBInMI = 1;
4575  IsNegativeBranch = true;
4576  break;
4577  case AArch64::TBZW:
4578  case AArch64::TBZX:
4579  TargetBBInMI = 2;
4580  IsTestAndBranch = true;
4581  break;
4582  case AArch64::TBNZW:
4583  case AArch64::TBNZX:
4584  TargetBBInMI = 2;
4585  IsNegativeBranch = true;
4586  IsTestAndBranch = true;
4587  break;
4588  }
4589  // So we increment a zero register and test for bits other
4590  // than bit 0? Conservatively bail out in case the verifier
4591  // missed this case.
4592  if (IsTestAndBranch && MI.getOperand(1).getImm())
4593  return false;
4594 
4595  // Find Definition.
4596  assert(MI.getParent() && "Incomplete machine instruciton\n");
4597  MachineBasicBlock *MBB = MI.getParent();
4598  MachineFunction *MF = MBB->getParent();
4599  MachineRegisterInfo *MRI = &MF->getRegInfo();
4600  unsigned VReg = MI.getOperand(0).getReg();
4602  return false;
4603 
4604  MachineInstr *DefMI = MRI->getVRegDef(VReg);
4605 
4606  // Look through COPY instructions to find definition.
4607  while (DefMI->isCopy()) {
4608  unsigned CopyVReg = DefMI->getOperand(1).getReg();
4609  if (!MRI->hasOneNonDBGUse(CopyVReg))
4610  return false;
4611  if (!MRI->hasOneDef(CopyVReg))
4612  return false;
4613  DefMI = MRI->getVRegDef(CopyVReg);
4614  }
4615 
4616  switch (DefMI->getOpcode()) {
4617  default:
4618  return false;
4619  // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
4620  case AArch64::ANDWri:
4621  case AArch64::ANDXri: {
4622  if (IsTestAndBranch)
4623  return false;
4624  if (DefMI->getParent() != MBB)
4625  return false;
4626  if (!MRI->hasOneNonDBGUse(VReg))
4627  return false;
4628 
4629  bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
4631  DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
4632  if (!isPowerOf2_64(Mask))
4633  return false;
4634 
4635  MachineOperand &MO = DefMI->getOperand(1);
4636  unsigned NewReg = MO.getReg();
4638  return false;
4639 
4640  assert(!MRI->def_empty(NewReg) && "Register must be defined.");
4641 
4642  MachineBasicBlock &RefToMBB = *MBB;
4643  MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
4644  DebugLoc DL = MI.getDebugLoc();
4645  unsigned Imm = Log2_64(Mask);
4646  unsigned Opc = (Imm < 32)
4647  ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
4648  : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
4649  MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
4650  .addReg(NewReg)
4651  .addImm(Imm)
4652  .addMBB(TBB);
4653  // Register lives on to the CBZ now.
4654  MO.setIsKill(false);
4655 
4656  // For immediate smaller than 32, we need to use the 32-bit
4657  // variant (W) in all cases. Indeed the 64-bit variant does not
4658  // allow to encode them.
4659  // Therefore, if the input register is 64-bit, we need to take the
4660  // 32-bit sub-part.
4661  if (!Is32Bit && Imm < 32)
4662  NewMI->getOperand(0).setSubReg(AArch64::sub_32);
4663  MI.eraseFromParent();
4664  return true;
4665  }
4666  // Look for CSINC
4667  case AArch64::CSINCWr:
4668  case AArch64::CSINCXr: {
4669  if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
4670  DefMI->getOperand(2).getReg() == AArch64::WZR) &&
4671  !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
4672  DefMI->getOperand(2).getReg() == AArch64::XZR))
4673  return false;
4674 
4675  if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
4676  return false;
4677 
4679  // Convert only when the condition code is not modified between
4680  // the CSINC and the branch. The CC may be used by other
4681  // instructions in between.
4683  return false;
4684  MachineBasicBlock &RefToMBB = *MBB;
4685  MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
4686  DebugLoc DL = MI.getDebugLoc();
4687  if (IsNegativeBranch)
4689  BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
4690  MI.eraseFromParent();
4691  return true;
4692  }
4693  }
4694 }
4695 
4696 std::pair<unsigned, unsigned>
4698  const unsigned Mask = AArch64II::MO_FRAGMENT;
4699  return std::make_pair(TF & Mask, TF & ~Mask);
4700 }
4701 
4704  using namespace AArch64II;
4705 
4706  static const std::pair<unsigned, const char *> TargetFlags[] = {
4707  {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
4708  {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
4709  {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
4710  {MO_HI12, "aarch64-hi12"}};
4711  return makeArrayRef(TargetFlags);
4712 }
4713 
4716  using namespace AArch64II;
4717 
4718  static const std::pair<unsigned, const char *> TargetFlags[] = {
4719  {MO_COFFSTUB, "aarch64-coffstub"},
4720  {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"},
4721  {MO_S, "aarch64-s"}, {MO_TLS, "aarch64-tls"},
4722  {MO_DLLIMPORT, "aarch64-dllimport"}};
4723  return makeArrayRef(TargetFlags);
4724 }
4725 
4728  static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
4729  {{MOSuppressPair, "aarch64-suppress-pair"},
4730  {MOStridedAccess, "aarch64-strided-access"}};
4731  return makeArrayRef(TargetFlags);
4732 }
4733 
4734 /// Constants defining how certain sequences should be outlined.
4735 /// This encompasses how an outlined function should be called, and what kind of
4736 /// frame should be emitted for that outlined function.
4737 ///
4738 /// \p MachineOutlinerDefault implies that the function should be called with
4739 /// a save and restore of LR to the stack.
4740 ///
4741 /// That is,
4742 ///
4743 /// I1 Save LR OUTLINED_FUNCTION:
4744 /// I2 --> BL OUTLINED_FUNCTION I1
4745 /// I3 Restore LR I2
4746 /// I3
4747 /// RET
4748 ///
4749 /// * Call construction overhead: 3 (save + BL + restore)
4750 /// * Frame construction overhead: 1 (ret)
4751 /// * Requires stack fixups? Yes
4752 ///
4753 /// \p MachineOutlinerTailCall implies that the function is being created from
4754 /// a sequence of instructions ending in a return.
4755 ///
4756 /// That is,
4757 ///
4758 /// I1 OUTLINED_FUNCTION:
4759 /// I2 --> B OUTLINED_FUNCTION I1
4760 /// RET I2
4761 /// RET
4762 ///
4763 /// * Call construction overhead: 1 (B)
4764 /// * Frame construction overhead: 0 (Return included in sequence)
4765 /// * Requires stack fixups? No
4766 ///
4767 /// \p MachineOutlinerNoLRSave implies that the function should be called using
4768 /// a BL instruction, but doesn't require LR to be saved and restored. This
4769 /// happens when LR is known to be dead.
4770 ///
4771 /// That is,
4772 ///
4773 /// I1 OUTLINED_FUNCTION:
4774 /// I2 --> BL OUTLINED_FUNCTION I1
4775 /// I3 I2
4776 /// I3
4777 /// RET
4778 ///
4779 /// * Call construction overhead: 1 (BL)
4780 /// * Frame construction overhead: 1 (RET)
4781 /// * Requires stack fixups? No
4782 ///
4783 /// \p MachineOutlinerThunk implies that the function is being created from
4784 /// a sequence of instructions ending in a call. The outlined function is
4785 /// called with a BL instruction, and the outlined function tail-calls the
4786 /// original call destination.
4787 ///
4788 /// That is,
4789 ///
4790 /// I1 OUTLINED_FUNCTION:
4791 /// I2 --> BL OUTLINED_FUNCTION I1
4792 /// BL f I2
4793 /// B f
4794 /// * Call construction overhead: 1 (BL)
4795 /// * Frame construction overhead: 0
4796 /// * Requires stack fixups? No
4797 ///
4798 /// \p MachineOutlinerRegSave implies that the function should be called with a
4799 /// save and restore of LR to an available register. This allows us to avoid
4800 /// stack fixups. Note that this outlining variant is compatible with the
4801 /// NoLRSave case.
4802 ///
4803 /// That is,
4804 ///
4805 /// I1 Save LR OUTLINED_FUNCTION:
4806 /// I2 --> BL OUTLINED_FUNCTION I1
4807 /// I3 Restore LR I2
4808 /// I3
4809 /// RET
4810 ///
4811 /// * Call construction overhead: 3 (save + BL + restore)
4812 /// * Frame construction overhead: 1 (ret)
4813 /// * Requires stack fixups? No
4815  MachineOutlinerDefault, /// Emit a save, restore, call, and return.
4816  MachineOutlinerTailCall, /// Only emit a branch.
4817  MachineOutlinerNoLRSave, /// Emit a call and return.
4818  MachineOutlinerThunk, /// Emit a call and tail-call.
4819  MachineOutlinerRegSave /// Same as default, but save to a register.
4820 };
4821 
4824  HasCalls = 0x4,
4826 };
4827 
4828 unsigned
4829 AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
4830  assert(C.LRUWasSet && "LRU wasn't set?");
4831  MachineFunction *MF = C.getMF();
4832  const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
4833  MF->getSubtarget().getRegisterInfo());
4834 
4835  // Check if there is an available register across the sequence that we can
4836  // use.
4837  for (unsigned Reg : AArch64::GPR64RegClass) {
4838  if (!ARI->isReservedReg(*MF, Reg) &&
4839  Reg != AArch64::LR && // LR is not reserved, but don't use it.
4840  Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
4841  Reg != AArch64::X17 && // Ditto for X17.
4843  return Reg;
4844  }
4845 
4846  // No suitable register. Return 0.
4847  return 0u;
4848 }
4849 
4852  std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
4853  outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
4854  unsigned SequenceSize =
4855  std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0,
4856  [this](unsigned Sum, const MachineInstr &MI) {
4857  return Sum + getInstSizeInBytes(MI);
4858  });
4859 
4860  // Properties about candidate MBBs that hold for all of them.
4861  unsigned FlagsSetInAll = 0xF;
4862 
4863  // Compute liveness information for each candidate, and set FlagsSetInAll.
4865  std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
4866  [&FlagsSetInAll](outliner::Candidate &C) {
4867  FlagsSetInAll &= C.Flags;
4868  });
4869 
4870  // According to the AArch64 Procedure Call Standard, the following are
4871  // undefined on entry/exit from a function call:
4872  //
4873  // * Registers x16, x17, (and thus w16, w17)
4874  // * Condition codes (and thus the NZCV register)
4875  //
4876  // Because if this, we can't outline any sequence of instructions where
4877  // one
4878  // of these registers is live into/across it. Thus, we need to delete
4879  // those
4880  // candidates.
4881  auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
4882  // If the unsafe registers in this block are all dead, then we don't need
4883  // to compute liveness here.
4884  if (C.Flags & UnsafeRegsDead)
4885  return false;
4886  C.initLRU(TRI);
4887  LiveRegUnits LRU = C.LRU;
4888  return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) ||
4889  !LRU.available(AArch64::NZCV));
4890  };
4891 
4892  // Are there any candidates where those registers are live?
4893  if (!(FlagsSetInAll & UnsafeRegsDead)) {
4894  // Erase every candidate that violates the restrictions above. (It could be
4895  // true that we have viable candidates, so it's not worth bailing out in
4896  // the case that, say, 1 out of 20 candidates violate the restructions.)
4897  RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
4898  RepeatedSequenceLocs.end(),
4899  CantGuaranteeValueAcrossCall),
4900  RepeatedSequenceLocs.end());
4901 
4902  // If the sequence doesn't have enough candidates left, then we're done.
4903  if (RepeatedSequenceLocs.size() < 2)
4904  return outliner::OutlinedFunction();
4905  }
4906 
4907  // At this point, we have only "safe" candidates to outline. Figure out
4908  // frame + call instruction information.
4909 
4910  unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
4911 
4912  // Helper lambda which sets call information for every candidate.
4913  auto SetCandidateCallInfo =
4914  [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
4915  for (outliner::Candidate &C : RepeatedSequenceLocs)
4916  C.setCallInfo(CallID, NumBytesForCall);
4917  };
4918 
4919  unsigned FrameID = MachineOutlinerDefault;
4920  unsigned NumBytesToCreateFrame = 4;
4921 
4922  bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
4923  return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement");
4924  });
4925 
4926  // Returns true if an instructions is safe to fix up, false otherwise.
4927  auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
4928  if (MI.isCall())
4929  return true;
4930 
4931  if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
4932  !MI.readsRegister(AArch64::SP, &TRI))
4933  return true;
4934 
4935  // Any modification of SP will break our code to save/restore LR.
4936  // FIXME: We could handle some instructions which add a constant
4937  // offset to SP, with a bit more work.
4938  if (MI.modifiesRegister(AArch64::SP, &TRI))
4939  return false;
4940 
4941  // At this point, we have a stack instruction that we might need to
4942  // fix up. We'll handle it if it's a load or store.
4943  if (MI.mayLoadOrStore()) {
4944  const MachineOperand *Base; // Filled with the base operand of MI.
4945  int64_t Offset; // Filled with the offset of MI.
4946 
4947  // Does it allow us to offset the base operand and is the base the
4948  // register SP?
4949  if (!getMemOperandWithOffset(MI, Base, Offset, &TRI) || !Base->isReg() ||
4950  Base->getReg() != AArch64::SP)
4951  return false;
4952 
4953  // Find the minimum/maximum offset for this instruction and check
4954  // if fixing it up would be in range.
4955  int64_t MinOffset,
4956  MaxOffset; // Unscaled offsets for the instruction.
4957  unsigned Scale; // The scale to multiply the offsets by.
4958  unsigned DummyWidth;
4959  getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
4960 
4961  Offset += 16; // Update the offset to what it would be if we outlined.
4962  if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
4963  return false;
4964 
4965  // It's in range, so we can outline it.
4966  return true;
4967  }
4968 
4969  // FIXME: Add handling for instructions like "add x0, sp, #8".
4970 
4971  // We can't fix it up, so don't outline it.
4972  return false;
4973  };
4974 
4975  // True if it's possible to fix up each stack instruction in this sequence.
4976  // Important for frames/call variants that modify the stack.
4977  bool AllStackInstrsSafe = std::all_of(
4978  FirstCand.front(), std::next(FirstCand.back()), IsSafeToFixup);
4979 
4980  // If the last instruction in any candidate is a terminator, then we should
4981  // tail call all of the candidates.
4982  if (RepeatedSequenceLocs[0].back()->isTerminator()) {
4983  FrameID = MachineOutlinerTailCall;
4984  NumBytesToCreateFrame = 0;
4985  SetCandidateCallInfo(MachineOutlinerTailCall, 4);
4986  }
4987 
4988  else if (LastInstrOpcode == AArch64::BL ||
4989  (LastInstrOpcode == AArch64::BLR && !HasBTI)) {
4990  // FIXME: Do we need to check if the code after this uses the value of LR?
4991  FrameID = MachineOutlinerThunk;
4992  NumBytesToCreateFrame = 0;
4993  SetCandidateCallInfo(MachineOutlinerThunk, 4);
4994  }
4995 
4996  else {
4997  // We need to decide how to emit calls + frames. We can always emit the same
4998  // frame if we don't need to save to the stack. If we have to save to the
4999  // stack, then we need a different frame.
5000  unsigned NumBytesNoStackCalls = 0;
5001  std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
5002 
5003  for (outliner::Candidate &C : RepeatedSequenceLocs) {
5004  C.initLRU(TRI);
5005 
5006  // Is LR available? If so, we don't need a save.
5007  if (C.LRU.available(AArch64::LR)) {
5008  NumBytesNoStackCalls += 4;
5010  CandidatesWithoutStackFixups.push_back(C);
5011  }
5012 
5013  // Is an unused register available? If so, we won't modify the stack, so
5014  // we can outline with the same frame type as those that don't save LR.
5015  else if (findRegisterToSaveLRTo(C)) {
5016  NumBytesNoStackCalls += 12;
5018  CandidatesWithoutStackFixups.push_back(C);
5019  }
5020 
5021  // Is SP used in the sequence at all? If not, we don't have to modify
5022  // the stack, so we are guaranteed to get the same frame.
5023  else if (C.UsedInSequence.available(AArch64::SP)) {
5024  NumBytesNoStackCalls += 12;
5026  CandidatesWithoutStackFixups.push_back(C);
5027  }
5028 
5029  // If we outline this, we need to modify the stack. Pretend we don't
5030  // outline this by saving all of its bytes.
5031  else {
5032  NumBytesNoStackCalls += SequenceSize;
5033  }
5034  }
5035 
5036  // If there are no places where we have to save LR, then note that we
5037  // don't have to update the stack. Otherwise, give every candidate the
5038  // default call type, as long as it's safe to do so.
5039  if (!AllStackInstrsSafe ||
5040  NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
5041  RepeatedSequenceLocs = CandidatesWithoutStackFixups;
5042  FrameID = MachineOutlinerNoLRSave;
5043  } else {
5044  SetCandidateCallInfo(MachineOutlinerDefault, 12);
5045  }
5046 
5047  // If we dropped all of the candidates, bail out here.
5048  if (RepeatedSequenceLocs.size() < 2) {
5049  RepeatedSequenceLocs.clear();
5050  return outliner::OutlinedFunction();
5051  }
5052  }
5053 
5054  // Does every candidate's MBB contain a call? If so, then we might have a call
5055  // in the range.
5056  if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
5057  // Check if the range contains a call. These require a save + restore of the
5058  // link register.
5059  bool ModStackToSaveLR = false;
5060  if (std::any_of(FirstCand.front(), FirstCand.back(),
5061  [](const MachineInstr &MI) { return MI.isCall(); }))
5062  ModStackToSaveLR = true;
5063 
5064  // Handle the last instruction separately. If this is a tail call, then the
5065  // last instruction is a call. We don't want to save + restore in this case.
5066  // However, it could be possible that the last instruction is a call without
5067  // it being valid to tail call this sequence. We should consider this as
5068  // well.
5069  else if (FrameID != MachineOutlinerThunk &&
5070  FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall())
5071  ModStackToSaveLR = true;
5072 
5073  if (ModStackToSaveLR) {
5074  // We can't fix up the stack. Bail out.
5075  if (!AllStackInstrsSafe) {
5076  RepeatedSequenceLocs.clear();
5077  return outliner::OutlinedFunction();
5078  }
5079 
5080  // Save + restore LR.
5081  NumBytesToCreateFrame += 8;
5082  }
5083  }
5084 
5085  return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
5086  NumBytesToCreateFrame, FrameID);
5087 }
5088 
5090  MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
5091  const Function &F = MF.getFunction();
5092 
5093  // Can F be deduplicated by the linker? If it can, don't outline from it.
5094  if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
5095  return false;
5096 
5097  // Don't outline from functions with section markings; the program could
5098  // expect that all the code is in the named section.
5099  // FIXME: Allow outlining from multiple functions with the same section
5100  // marking.
5101  if (F.hasSection())
5102  return false;
5103 
5104  // Outlining from functions with redzones is unsafe since the outliner may
5105  // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
5106  // outline from it.
5108  if (!AFI || AFI->hasRedZone().getValueOr(true))
5109  return false;
5110 
5111  // It's safe to outline from MF.
5112  return true;
5113 }
5114 
5116  unsigned &Flags) const {
5117  // Check if LR is available through all of the MBB. If it's not, then set
5118  // a flag.
5120  "Suitable Machine Function for outlining must track liveness");
5122 
5123  std::for_each(MBB.rbegin(), MBB.rend(),
5124  [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
5125 
5126  // Check if each of the unsafe registers are available...
5127  bool W16AvailableInBlock = LRU.available(AArch64::W16);
5128  bool W17AvailableInBlock = LRU.available(AArch64::W17);
5129  bool NZCVAvailableInBlock = LRU.available(AArch64::NZCV);
5130 
5131  // If all of these are dead (and not live out), we know we don't have to check
5132  // them later.
5133  if (W16AvailableInBlock && W17AvailableInBlock && NZCVAvailableInBlock)
5135 
5136  // Now, add the live outs to the set.
5137  LRU.addLiveOuts(MBB);
5138 
5139  // If any of these registers is available in the MBB, but also a live out of
5140  // the block, then we know outlining is unsafe.
5141  if (W16AvailableInBlock && !LRU.available(AArch64::W16))
5142  return false;
5143  if (W17AvailableInBlock && !LRU.available(AArch64::W17))
5144  return false;
5145  if (NZCVAvailableInBlock && !LRU.available(AArch64::NZCV))
5146  return false;
5147 
5148  // Check if there's a call inside this MachineBasicBlock. If there is, then
5149  // set a flag.
5150  if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
5152 
5153  MachineFunction *MF = MBB.getParent();
5154 
5155  // In the event that we outline, we may have to save LR. If there is an
5156  // available register in the MBB, then we'll always save LR there. Check if
5157  // this is true.
5158  bool CanSaveLR = false;
5159  const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
5160  MF->getSubtarget().getRegisterInfo());
5161 
5162  // Check if there is an available register across the sequence that we can
5163  // use.
5164  for (unsigned Reg : AArch64::GPR64RegClass) {
5165  if (!ARI->isReservedReg(*MF, Reg) && Reg != AArch64::LR &&
5166  Reg != AArch64::X16 && Reg != AArch64::X17 && LRU.available(Reg)) {
5167  CanSaveLR = true;
5168  break;
5169  }
5170  }
5171 
5172  // Check if we have a register we can save LR to, and if LR was used
5173  // somewhere. If both of those things are true, then we need to evaluate the
5174  // safety of outlining stack instructions later.
5175  if (!CanSaveLR && !LRU.available(AArch64::LR))
5177 
5178  return true;
5179 }
5180 
5183  unsigned Flags) const {
5184  MachineInstr &MI = *MIT;
5185  MachineBasicBlock *MBB = MI.getParent();
5186  MachineFunction *MF = MBB->getParent();
5187  AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
5188 
5189  // Don't outline LOHs.
5190  if (FuncInfo->getLOHRelated().count(&MI))
5192 
5193  // Don't allow debug values to impact outlining type.
5194  if (MI.isDebugInstr() || MI.isIndirectDebugValue())
5196 
5197  // At this point, KILL instructions don't really tell us much so we can go
5198  // ahead and skip over them.
5199  if (MI.isKill())
5201 
5202  // Is this a terminator for a basic block?
5203  if (MI.isTerminator()) {
5204 
5205  // Is this the end of a function?
5206  if (MI.getParent()->succ_empty())
5208 
5209  // It's not, so don't outline it.
5211  }
5212 
5213  // Make sure none of the operands are un-outlinable.
5214  for (const MachineOperand &MOP : MI.operands()) {
5215  if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
5216  MOP.isTargetIndex())
5218 
5219  // If it uses LR or W30 explicitly, then don't touch it.
5220  if (MOP.isReg() && !MOP.isImplicit() &&
5221  (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
5223  }
5224 
5225  // Special cases for instructions that can always be outlined, but will fail
5226  // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
5227  // be outlined because they don't require a *specific* value to be in LR.
5228  if (MI.getOpcode() == AArch64::ADRP)
5230 
5231  // If MI is a call we might be able to outline it. We don't want to outline
5232  // any calls that rely on the position of items on the stack. When we outline
5233  // something containing a call, we have to emit a save and restore of LR in
5234  // the outlined function. Currently, this always happens by saving LR to the
5235  // stack. Thus, if we outline, say, half the parameters for a function call
5236  // plus the call, then we'll break the callee's expectations for the layout
5237  // of the stack.
5238  //
5239  // FIXME: Allow calls to functions which construct a stack frame, as long
5240  // as they don't access arguments on the stack.
5241  // FIXME: Figure out some way to analyze functions defined in other modules.
5242  // We should be able to compute the memory usage based on the IR calling
5243  // convention, even if we can't see the definition.
5244  if (MI.isCall()) {
5245  // Get the function associated with the call. Look at each operand and find
5246  // the one that represents the callee and get its name.
5247  const Function *Callee = nullptr;
5248  for (const MachineOperand &MOP : MI.operands()) {
5249  if (MOP.isGlobal()) {
5250  Callee = dyn_cast<Function>(MOP.getGlobal());
5251  break;
5252  }
5253  }
5254 
5255  // Never outline calls to mcount. There isn't any rule that would require
5256  // this, but the Linux kernel's "ftrace" feature depends on it.
5257  if (Callee && Callee->getName() == "\01_mcount")
5259 
5260  // If we don't know anything about the callee, assume it depends on the
5261  // stack layout of the caller. In that case, it's only legal to outline
5262  // as a tail-call. Whitelist the call instructions we know about so we
5263  // don't get unexpected results with call pseudo-instructions.
5264  auto UnknownCallOutlineType = outliner::InstrType::Illegal;
5265  if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL)
5266  UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
5267 
5268  if (!Callee)
5269  return UnknownCallOutlineType;
5270 
5271  // We have a function we have information about. Check it if it's something
5272  // can safely outline.
5273  MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
5274 
5275  // We don't know what's going on with the callee at all. Don't touch it.
5276  if (!CalleeMF)
5277  return UnknownCallOutlineType;
5278 
5279  // Check if we know anything about the callee saves on the function. If we
5280  // don't, then don't touch it, since that implies that we haven't
5281  // computed anything about its stack frame yet.
5282  MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
5283  if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
5284  MFI.getNumObjects() > 0)
5285  return UnknownCallOutlineType;
5286 
5287  // At this point, we can say that CalleeMF ought to not pass anything on the
5288  // stack. Therefore, we can outline it.
5290  }
5291 
5292  // Don't outline positions.
5293  if (MI.isPosition())
5295 
5296  // Don't touch the link register or W30.
5297  if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
5298  MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
5300 
5301  // Don't outline BTI instructions, because that will prevent the outlining
5302  // site from being indirectly callable.
5303  if (MI.getOpcode() == AArch64::HINT) {
5304  int64_t Imm = MI.getOperand(0).getImm();
5305  if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
5307  }
5308 
5310 }
5311 
5312 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
5313  for (MachineInstr &MI : MBB) {
5314  const MachineOperand *Base;
5315  unsigned Width;
5316  int64_t Offset;
5317 
5318  // Is this a load or store with an immediate offset with SP as the base?
5319  if (!MI.mayLoadOrStore() ||
5320  !getMemOperandWithOffsetWidth(MI, Base, Offset, Width, &RI) ||
5321  (Base->isReg() && Base->getReg() != AArch64::SP))
5322  continue;
5323 
5324  // It is, so we have to fix it up.
5325  unsigned Scale;
5326  int64_t Dummy1, Dummy2;
5327 
5328  MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
5329  assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
5330  getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
5331  assert(Scale != 0 && "Unexpected opcode!");
5332 
5333  // We've pushed the return address to the stack, so add 16 to the offset.
5334  // This is safe, since we already checked if it would overflow when we
5335  // checked if this instruction was legal to outline.
5336  int64_t NewImm = (Offset + 16) / Scale;
5337  StackOffsetOperand.setImm(NewImm);
5338  }
5339 }
5340 
5343  const outliner::OutlinedFunction &OF) const {
5344  // For thunk outlining, rewrite the last instruction from a call to a
5345  // tail-call.
5347  MachineInstr *Call = &*--MBB.instr_end();
5348  unsigned TailOpcode;
5349  if (Call->getOpcode() == AArch64::BL) {
5350  TailOpcode = AArch64::TCRETURNdi;
5351  } else {
5352  assert(Call->getOpcode() == AArch64::BLR);
5353  TailOpcode = AArch64::TCRETURNriALL;
5354  }
5355  MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
5356  .add(Call->getOperand(0))
5357  .addImm(0);
5358  MBB.insert(MBB.end(), TC);
5359  Call->eraseFromParent();
5360  }
5361 
5362  // Is there a call in the outlined range?
5363  auto IsNonTailCall = [](MachineInstr &MI) {
5364  return MI.isCall() && !MI.isReturn();
5365  };
5366  if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) {
5367  // Fix up the instructions in the range, since we're going to modify the
5368  // stack.
5370  "Can only fix up stack references once");
5371  fixupPostOutline(MBB);
5372 
5373  // LR has to be a live in so that we can save it.
5374  MBB.addLiveIn(AArch64::LR);
5375 
5377  MachineBasicBlock::iterator Et = MBB.end();
5378 
5381  Et = std::prev(MBB.end());
5382 
5383  // Insert a save before the outlined region
5384  MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
5385  .addReg(AArch64::SP, RegState::Define)
5386  .addReg(AArch64::LR)
5387  .addReg(AArch64::SP)
5388  .addImm(-16);
5389  It = MBB.insert(It, STRXpre);
5390 
5391  const TargetSubtargetInfo &STI = MF.getSubtarget();
5392  const MCRegisterInfo *MRI = STI.getRegisterInfo();
5393  unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
5394 
5395  // Add a CFI saying the stack was moved 16 B down.
5396  int64_t StackPosEntry =
5398  BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
5399  .addCFIIndex(StackPosEntry)
5401 
5402  // Add a CFI saying that the LR that we want to find is now 16 B higher than
5403  // before.
5404  int64_t LRPosEntry =
5405  MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, 16));
5406  BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
5407  .addCFIIndex(LRPosEntry)
5409 
5410  // Insert a restore before the terminator for the function.
5411  MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
5412  .addReg(AArch64::SP, RegState::Define)
5413  .addReg(AArch64::LR, RegState::Define)
5414  .addReg(AArch64::SP)
5415  .addImm(16);
5416  Et = MBB.insert(Et, LDRXpost);
5417  }
5418 
5419  // If this is a tail call outlined function, then there's already a return.
5422  return;
5423 
5424  // It's not a tail call, so we have to insert the return ourselves.
5425  MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
5426  .addReg(AArch64::LR, RegState::Undef);
5427  MBB.insert(MBB.end(), ret);
5428 
5429  // Did we have to modify the stack by saving the link register?
5431  return;
5432 
5433  // We modified the stack.
5434  // Walk over the basic block and fix up all the stack accesses.
5435  fixupPostOutline(MBB);
5436 }
5437 
5440  MachineFunction &MF, const outliner::Candidate &C) const {
5441 
5442  // Are we tail calling?
5444  // If yes, then we can just branch to the label.
5445  It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
5446  .addGlobalAddress(M.getNamedValue(MF.getName()))
5447  .addImm(0));
5448  return It;
5449  }
5450 
5451  // Are we saving the link register?
5454  // No, so just insert the call.
5455  It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
5456  .addGlobalAddress(M.getNamedValue(MF.getName())));
5457  return It;
5458  }
5459 
5460  // We want to return the spot where we inserted the call.
5462 
5463  // Instructions for saving and restoring LR around the call instruction we're
5464  // going to insert.
5465  MachineInstr *Save;
5466  MachineInstr *Restore;
5467  // Can we save to a register?
5469  // FIXME: This logic should be sunk into a target-specific interface so that
5470  // we don't have to recompute the register.
5471  unsigned Reg = findRegisterToSaveLRTo(C);
5472  assert(Reg != 0 && "No callee-saved register available?");
5473 
5474  // Save and restore LR from that register.
5475  Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
5476  .addReg(AArch64::XZR)
5477  .addReg(AArch64::LR)
5478  .addImm(0);
5479  Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
5480  .addReg(AArch64::XZR)
5481  .addReg(Reg)
5482  .addImm(0);
5483  } else {
5484  // We have the default case. Save and restore from SP.
5485  Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
5486  .addReg(AArch64::SP, RegState::Define)
5487  .addReg(AArch64::LR)
5488  .addReg(AArch64::SP)
5489  .addImm(-16);
5490  Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
5491  .addReg(AArch64::SP, RegState::Define)
5492  .addReg(AArch64::LR, RegState::Define)
5493  .addReg(AArch64::SP)