LLVM  15.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the AArch64 implementation of the TargetInstrInfo class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AArch64InstrInfo.h"
15 #include "AArch64Subtarget.h"
17 #include "Utils/AArch64BaseInfo.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/CodeGen/StackMaps.h"
34 #include "llvm/IR/DebugLoc.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/MC/MCAsmInfo.h"
37 #include "llvm/MC/MCInst.h"
38 #include "llvm/MC/MCInstBuilder.h"
39 #include "llvm/MC/MCInstrDesc.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/CodeGen.h"
43 #include "llvm/Support/Compiler.h"
45 #include "llvm/Support/LEB128.h"
49 #include <cassert>
50 #include <cstdint>
51 #include <iterator>
52 #include <utility>
53 
54 using namespace llvm;
55 
56 #define GET_INSTRINFO_CTOR_DTOR
57 #include "AArch64GenInstrInfo.inc"
58 
60  "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
61  cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
62 
64  "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
65  cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
66 
67 static cl::opt<unsigned>
68  BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
69  cl::desc("Restrict range of Bcc instructions (DEBUG)"));
70 
72  : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
73  AArch64::CATCHRET),
74  RI(STI.getTargetTriple()), Subtarget(STI) {}
75 
76 /// GetInstSize - Return the number of bytes of code the specified
77 /// instruction may be. This returns the maximum number of bytes.
79  const MachineBasicBlock &MBB = *MI.getParent();
80  const MachineFunction *MF = MBB.getParent();
81  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
82 
83  {
84  auto Op = MI.getOpcode();
86  return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
87  }
88 
89  // Meta-instructions emit no code.
90  if (MI.isMetaInstruction())
91  return 0;
92 
93  // FIXME: We currently only handle pseudoinstructions that don't get expanded
94  // before the assembly printer.
95  unsigned NumBytes = 0;
96  const MCInstrDesc &Desc = MI.getDesc();
97 
98  // Size should be preferably set in
99  // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
100  // Specific cases handle instructions of variable sizes
101  switch (Desc.getOpcode()) {
102  default:
103  if (Desc.getSize())
104  return Desc.getSize();
105 
106  // Anything not explicitly designated otherwise (i.e. pseudo-instructions
107  // with fixed constant size but not specified in .td file) is a normal
108  // 4-byte insn.
109  NumBytes = 4;
110  break;
112  // The upper bound for a stackmap intrinsic is the full length of its shadow
113  NumBytes = StackMapOpers(&MI).getNumPatchBytes();
114  assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
115  break;
117  // The size of the patchpoint intrinsic is the number of bytes requested
118  NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
119  assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
120  break;
121  case TargetOpcode::STATEPOINT:
122  NumBytes = StatepointOpers(&MI).getNumPatchBytes();
123  assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
124  // No patch bytes means a normal call inst is emitted
125  if (NumBytes == 0)
126  NumBytes = 4;
127  break;
128  case AArch64::SPACE:
129  NumBytes = MI.getOperand(1).getImm();
130  break;
131  case TargetOpcode::BUNDLE:
132  NumBytes = getInstBundleLength(MI);
133  break;
134  }
135 
136  return NumBytes;
137 }
138 
139 unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
140  unsigned Size = 0;
142  MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
143  while (++I != E && I->isInsideBundle()) {
144  assert(!I->isBundle() && "No nested bundle!");
145  Size += getInstSizeInBytes(*I);
146  }
147  return Size;
148 }
149 
152  // Block ends with fall-through condbranch.
153  switch (LastInst->getOpcode()) {
154  default:
155  llvm_unreachable("Unknown branch instruction?");
156  case AArch64::Bcc:
157  Target = LastInst->getOperand(1).getMBB();
158  Cond.push_back(LastInst->getOperand(0));
159  break;
160  case AArch64::CBZW:
161  case AArch64::CBZX:
162  case AArch64::CBNZW:
163  case AArch64::CBNZX:
164  Target = LastInst->getOperand(1).getMBB();
165  Cond.push_back(MachineOperand::CreateImm(-1));
166  Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
167  Cond.push_back(LastInst->getOperand(0));
168  break;
169  case AArch64::TBZW:
170  case AArch64::TBZX:
171  case AArch64::TBNZW:
172  case AArch64::TBNZX:
173  Target = LastInst->getOperand(2).getMBB();
174  Cond.push_back(MachineOperand::CreateImm(-1));
175  Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
176  Cond.push_back(LastInst->getOperand(0));
177  Cond.push_back(LastInst->getOperand(1));
178  }
179 }
180 
181 static unsigned getBranchDisplacementBits(unsigned Opc) {
182  switch (Opc) {
183  default:
184  llvm_unreachable("unexpected opcode!");
185  case AArch64::B:
186  return 64;
187  case AArch64::TBNZW:
188  case AArch64::TBZW:
189  case AArch64::TBNZX:
190  case AArch64::TBZX:
191  return TBZDisplacementBits;
192  case AArch64::CBNZW:
193  case AArch64::CBZW:
194  case AArch64::CBNZX:
195  case AArch64::CBZX:
196  return CBZDisplacementBits;
197  case AArch64::Bcc:
198  return BCCDisplacementBits;
199  }
200 }
201 
203  int64_t BrOffset) const {
204  unsigned Bits = getBranchDisplacementBits(BranchOp);
205  assert(Bits >= 3 && "max branch displacement must be enough to jump"
206  "over conditional branch expansion");
207  return isIntN(Bits, BrOffset / 4);
208 }
209 
212  switch (MI.getOpcode()) {
213  default:
214  llvm_unreachable("unexpected opcode!");
215  case AArch64::B:
216  return MI.getOperand(0).getMBB();
217  case AArch64::TBZW:
218  case AArch64::TBNZW:
219  case AArch64::TBZX:
220  case AArch64::TBNZX:
221  return MI.getOperand(2).getMBB();
222  case AArch64::CBZW:
223  case AArch64::CBNZW:
224  case AArch64::CBZX:
225  case AArch64::CBNZX:
226  case AArch64::Bcc:
227  return MI.getOperand(1).getMBB();
228  }
229 }
230 
231 // Branch analysis.
233  MachineBasicBlock *&TBB,
234  MachineBasicBlock *&FBB,
236  bool AllowModify) const {
237  // If the block has no terminators, it just falls into the block after it.
239  if (I == MBB.end())
240  return false;
241 
242  // Skip over SpeculationBarrierEndBB terminators
243  if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
244  I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
245  --I;
246  }
247 
248  if (!isUnpredicatedTerminator(*I))
249  return false;
250 
251  // Get the last instruction in the block.
252  MachineInstr *LastInst = &*I;
253 
254  // If there is only one terminator instruction, process it.
255  unsigned LastOpc = LastInst->getOpcode();
256  if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
257  if (isUncondBranchOpcode(LastOpc)) {
258  TBB = LastInst->getOperand(0).getMBB();
259  return false;
260  }
261  if (isCondBranchOpcode(LastOpc)) {
262  // Block ends with fall-through condbranch.
263  parseCondBranch(LastInst, TBB, Cond);
264  return false;
265  }
266  return true; // Can't handle indirect branch.
267  }
268 
269  // Get the instruction before it if it is a terminator.
270  MachineInstr *SecondLastInst = &*I;
271  unsigned SecondLastOpc = SecondLastInst->getOpcode();
272 
273  // If AllowModify is true and the block ends with two or more unconditional
274  // branches, delete all but the first unconditional branch.
275  if (AllowModify && isUncondBranchOpcode(LastOpc)) {
276  while (isUncondBranchOpcode(SecondLastOpc)) {
277  LastInst->eraseFromParent();
278  LastInst = SecondLastInst;
279  LastOpc = LastInst->getOpcode();
280  if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
281  // Return now the only terminator is an unconditional branch.
282  TBB = LastInst->getOperand(0).getMBB();
283  return false;
284  } else {
285  SecondLastInst = &*I;
286  SecondLastOpc = SecondLastInst->getOpcode();
287  }
288  }
289  }
290 
291  // If we're allowed to modify and the block ends in a unconditional branch
292  // which could simply fallthrough, remove the branch. (Note: This case only
293  // matters when we can't understand the whole sequence, otherwise it's also
294  // handled by BranchFolding.cpp.)
295  if (AllowModify && isUncondBranchOpcode(LastOpc) &&
297  LastInst->eraseFromParent();
298  LastInst = SecondLastInst;
299  LastOpc = LastInst->getOpcode();
300  if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
301  assert(!isUncondBranchOpcode(LastOpc) &&
302  "unreachable unconditional branches removed above");
303 
304  if (isCondBranchOpcode(LastOpc)) {
305  // Block ends with fall-through condbranch.
306  parseCondBranch(LastInst, TBB, Cond);
307  return false;
308  }
309  return true; // Can't handle indirect branch.
310  } else {
311  SecondLastInst = &*I;
312  SecondLastOpc = SecondLastInst->getOpcode();
313  }
314  }
315 
316  // If there are three terminators, we don't know what sort of block this is.
317  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
318  return true;
319 
320  // If the block ends with a B and a Bcc, handle it.
321  if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
322  parseCondBranch(SecondLastInst, TBB, Cond);
323  FBB = LastInst->getOperand(0).getMBB();
324  return false;
325  }
326 
327  // If the block ends with two unconditional branches, handle it. The second
328  // one is not executed, so remove it.
329  if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
330  TBB = SecondLastInst->getOperand(0).getMBB();
331  I = LastInst;
332  if (AllowModify)
333  I->eraseFromParent();
334  return false;
335  }
336 
337  // ...likewise if it ends with an indirect branch followed by an unconditional
338  // branch.
339  if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
340  I = LastInst;
341  if (AllowModify)
342  I->eraseFromParent();
343  return true;
344  }
345 
346  // Otherwise, can't handle this.
347  return true;
348 }
349 
351  MachineBranchPredicate &MBP,
352  bool AllowModify) const {
353  // For the moment, handle only a block which ends with a cb(n)zx followed by
354  // a fallthrough. Why this? Because it is a common form.
355  // TODO: Should we handle b.cc?
356 
358  if (I == MBB.end())
359  return true;
360 
361  // Skip over SpeculationBarrierEndBB terminators
362  if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
363  I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
364  --I;
365  }
366 
367  if (!isUnpredicatedTerminator(*I))
368  return true;
369 
370  // Get the last instruction in the block.
371  MachineInstr *LastInst = &*I;
372  unsigned LastOpc = LastInst->getOpcode();
373  if (!isCondBranchOpcode(LastOpc))
374  return true;
375 
376  switch (LastOpc) {
377  default:
378  return true;
379  case AArch64::CBZW:
380  case AArch64::CBZX:
381  case AArch64::CBNZW:
382  case AArch64::CBNZX:
383  break;
384  };
385 
386  MBP.TrueDest = LastInst->getOperand(1).getMBB();
387  assert(MBP.TrueDest && "expected!");
388  MBP.FalseDest = MBB.getNextNode();
389 
390  MBP.ConditionDef = nullptr;
391  MBP.SingleUseCondition = false;
392 
393  MBP.LHS = LastInst->getOperand(0);
394  MBP.RHS = MachineOperand::CreateImm(0);
395  MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
397  return false;
398 }
399 
402  if (Cond[0].getImm() != -1) {
403  // Regular Bcc
404  AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
405  Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
406  } else {
407  // Folded compare-and-branch
408  switch (Cond[1].getImm()) {
409  default:
410  llvm_unreachable("Unknown conditional branch!");
411  case AArch64::CBZW:
412  Cond[1].setImm(AArch64::CBNZW);
413  break;
414  case AArch64::CBNZW:
415  Cond[1].setImm(AArch64::CBZW);
416  break;
417  case AArch64::CBZX:
418  Cond[1].setImm(AArch64::CBNZX);
419  break;
420  case AArch64::CBNZX:
421  Cond[1].setImm(AArch64::CBZX);
422  break;
423  case AArch64::TBZW:
424  Cond[1].setImm(AArch64::TBNZW);
425  break;
426  case AArch64::TBNZW:
427  Cond[1].setImm(AArch64::TBZW);
428  break;
429  case AArch64::TBZX:
430  Cond[1].setImm(AArch64::TBNZX);
431  break;
432  case AArch64::TBNZX:
433  Cond[1].setImm(AArch64::TBZX);
434  break;
435  }
436  }
437 
438  return false;
439 }
440 
442  int *BytesRemoved) const {
444  if (I == MBB.end())
445  return 0;
446 
447  if (!isUncondBranchOpcode(I->getOpcode()) &&
448  !isCondBranchOpcode(I->getOpcode()))
449  return 0;
450 
451  // Remove the branch.
452  I->eraseFromParent();
453 
454  I = MBB.end();
455 
456  if (I == MBB.begin()) {
457  if (BytesRemoved)
458  *BytesRemoved = 4;
459  return 1;
460  }
461  --I;
462  if (!isCondBranchOpcode(I->getOpcode())) {
463  if (BytesRemoved)
464  *BytesRemoved = 4;
465  return 1;
466  }
467 
468  // Remove the branch.
469  I->eraseFromParent();
470  if (BytesRemoved)
471  *BytesRemoved = 8;
472 
473  return 2;
474 }
475 
476 void AArch64InstrInfo::instantiateCondBranch(
479  if (Cond[0].getImm() != -1) {
480  // Regular Bcc
481  BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
482  } else {
483  // Folded compare-and-branch
484  // Note that we use addOperand instead of addReg to keep the flags.
485  const MachineInstrBuilder MIB =
486  BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
487  if (Cond.size() > 3)
488  MIB.addImm(Cond[3].getImm());
489  MIB.addMBB(TBB);
490  }
491 }
492 
495  ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
496  // Shouldn't be a fall through.
497  assert(TBB && "insertBranch must not be told to insert a fallthrough");
498 
499  if (!FBB) {
500  if (Cond.empty()) // Unconditional branch?
501  BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
502  else
503  instantiateCondBranch(MBB, DL, TBB, Cond);
504 
505  if (BytesAdded)
506  *BytesAdded = 4;
507 
508  return 1;
509  }
510 
511  // Two-way conditional branch.
512  instantiateCondBranch(MBB, DL, TBB, Cond);
513  BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
514 
515  if (BytesAdded)
516  *BytesAdded = 8;
517 
518  return 2;
519 }
520 
521 // Find the original register that VReg is copied from.
522 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
523  while (Register::isVirtualRegister(VReg)) {
524  const MachineInstr *DefMI = MRI.getVRegDef(VReg);
525  if (!DefMI->isFullCopy())
526  return VReg;
527  VReg = DefMI->getOperand(1).getReg();
528  }
529  return VReg;
530 }
531 
532 // Determine if VReg is defined by an instruction that can be folded into a
533 // csel instruction. If so, return the folded opcode, and the replacement
534 // register.
535 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
536  unsigned *NewVReg = nullptr) {
537  VReg = removeCopies(MRI, VReg);
538  if (!Register::isVirtualRegister(VReg))
539  return 0;
540 
541  bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
542  const MachineInstr *DefMI = MRI.getVRegDef(VReg);
543  unsigned Opc = 0;
544  unsigned SrcOpNum = 0;
545  switch (DefMI->getOpcode()) {
546  case AArch64::ADDSXri:
547  case AArch64::ADDSWri:
548  // if NZCV is used, do not fold.
549  if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
550  return 0;
551  // fall-through to ADDXri and ADDWri.
553  case AArch64::ADDXri:
554  case AArch64::ADDWri:
555  // add x, 1 -> csinc.
556  if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
557  DefMI->getOperand(3).getImm() != 0)
558  return 0;
559  SrcOpNum = 1;
560  Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
561  break;
562 
563  case AArch64::ORNXrr:
564  case AArch64::ORNWrr: {
565  // not x -> csinv, represented as orn dst, xzr, src.
566  unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
567  if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
568  return 0;
569  SrcOpNum = 2;
570  Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
571  break;
572  }
573 
574  case AArch64::SUBSXrr:
575  case AArch64::SUBSWrr:
576  // if NZCV is used, do not fold.
577  if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
578  return 0;
579  // fall-through to SUBXrr and SUBWrr.
581  case AArch64::SUBXrr:
582  case AArch64::SUBWrr: {
583  // neg x -> csneg, represented as sub dst, xzr, src.
584  unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
585  if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
586  return 0;
587  SrcOpNum = 2;
588  Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
589  break;
590  }
591  default:
592  return 0;
593  }
594  assert(Opc && SrcOpNum && "Missing parameters");
595 
596  if (NewVReg)
597  *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
598  return Opc;
599 }
600 
603  Register DstReg, Register TrueReg,
604  Register FalseReg, int &CondCycles,
605  int &TrueCycles,
606  int &FalseCycles) const {
607  // Check register classes.
609  const TargetRegisterClass *RC =
610  RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
611  if (!RC)
612  return false;
613 
614  // Also need to check the dest regclass, in case we're trying to optimize
615  // something like:
616  // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
617  if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
618  return false;
619 
620  // Expanding cbz/tbz requires an extra cycle of latency on the condition.
621  unsigned ExtraCondLat = Cond.size() != 1;
622 
623  // GPRs are handled by csel.
624  // FIXME: Fold in x+1, -x, and ~x when applicable.
625  if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
626  AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
627  // Single-cycle csel, csinc, csinv, and csneg.
628  CondCycles = 1 + ExtraCondLat;
629  TrueCycles = FalseCycles = 1;
630  if (canFoldIntoCSel(MRI, TrueReg))
631  TrueCycles = 0;
632  else if (canFoldIntoCSel(MRI, FalseReg))
633  FalseCycles = 0;
634  return true;
635  }
636 
637  // Scalar floating point is handled by fcsel.
638  // FIXME: Form fabs, fmin, and fmax when applicable.
639  if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
640  AArch64::FPR32RegClass.hasSubClassEq(RC)) {
641  CondCycles = 5 + ExtraCondLat;
642  TrueCycles = FalseCycles = 2;
643  return true;
644  }
645 
646  // Can't do vectors.
647  return false;
648 }
649 
652  const DebugLoc &DL, Register DstReg,
654  Register TrueReg, Register FalseReg) const {
656 
657  // Parse the condition code, see parseCondBranch() above.
659  switch (Cond.size()) {
660  default:
661  llvm_unreachable("Unknown condition opcode in Cond");
662  case 1: // b.cc
663  CC = AArch64CC::CondCode(Cond[0].getImm());
664  break;
665  case 3: { // cbz/cbnz
666  // We must insert a compare against 0.
667  bool Is64Bit;
668  switch (Cond[1].getImm()) {
669  default:
670  llvm_unreachable("Unknown branch opcode in Cond");
671  case AArch64::CBZW:
672  Is64Bit = false;
673  CC = AArch64CC::EQ;
674  break;
675  case AArch64::CBZX:
676  Is64Bit = true;
677  CC = AArch64CC::EQ;
678  break;
679  case AArch64::CBNZW:
680  Is64Bit = false;
681  CC = AArch64CC::NE;
682  break;
683  case AArch64::CBNZX:
684  Is64Bit = true;
685  CC = AArch64CC::NE;
686  break;
687  }
688  Register SrcReg = Cond[2].getReg();
689  if (Is64Bit) {
690  // cmp reg, #0 is actually subs xzr, reg, #0.
691  MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
692  BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
693  .addReg(SrcReg)
694  .addImm(0)
695  .addImm(0);
696  } else {
697  MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
698  BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
699  .addReg(SrcReg)
700  .addImm(0)
701  .addImm(0);
702  }
703  break;
704  }
705  case 4: { // tbz/tbnz
706  // We must insert a tst instruction.
707  switch (Cond[1].getImm()) {
708  default:
709  llvm_unreachable("Unknown branch opcode in Cond");
710  case AArch64::TBZW:
711  case AArch64::TBZX:
712  CC = AArch64CC::EQ;
713  break;
714  case AArch64::TBNZW:
715  case AArch64::TBNZX:
716  CC = AArch64CC::NE;
717  break;
718  }
719  // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
720  if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
721  BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
722  .addReg(Cond[2].getReg())
723  .addImm(
724  AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
725  else
726  BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
727  .addReg(Cond[2].getReg())
728  .addImm(
729  AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
730  break;
731  }
732  }
733 
734  unsigned Opc = 0;
735  const TargetRegisterClass *RC = nullptr;
736  bool TryFold = false;
737  if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
738  RC = &AArch64::GPR64RegClass;
739  Opc = AArch64::CSELXr;
740  TryFold = true;
741  } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
742  RC = &AArch64::GPR32RegClass;
743  Opc = AArch64::CSELWr;
744  TryFold = true;
745  } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
746  RC = &AArch64::FPR64RegClass;
747  Opc = AArch64::FCSELDrrr;
748  } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
749  RC = &AArch64::FPR32RegClass;
750  Opc = AArch64::FCSELSrrr;
751  }
752  assert(RC && "Unsupported regclass");
753 
754  // Try folding simple instructions into the csel.
755  if (TryFold) {
756  unsigned NewVReg = 0;
757  unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
758  if (FoldedOpc) {
759  // The folded opcodes csinc, csinc and csneg apply the operation to
760  // FalseReg, so we need to invert the condition.
762  TrueReg = FalseReg;
763  } else
764  FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
765 
766  // Fold the operation. Leave any dead instructions for DCE to clean up.
767  if (FoldedOpc) {
768  FalseReg = NewVReg;
769  Opc = FoldedOpc;
770  // The extends the live range of NewVReg.
771  MRI.clearKillFlags(NewVReg);
772  }
773  }
774 
775  // Pull all virtual register into the appropriate class.
776  MRI.constrainRegClass(TrueReg, RC);
777  MRI.constrainRegClass(FalseReg, RC);
778 
779  // Insert the csel.
780  BuildMI(MBB, I, DL, get(Opc), DstReg)
781  .addReg(TrueReg)
782  .addReg(FalseReg)
783  .addImm(CC);
784 }
785 
786 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
787 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
788  uint64_t Imm = MI.getOperand(1).getImm();
789  uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
790  uint64_t Encoding;
791  return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
792 }
793 
794 // FIXME: this implementation should be micro-architecture dependent, so a
795 // micro-architecture target hook should be introduced here in future.
797  if (!Subtarget.hasCustomCheapAsMoveHandling())
798  return MI.isAsCheapAsAMove();
799 
800  const unsigned Opcode = MI.getOpcode();
801 
802  // Firstly, check cases gated by features.
803 
804  if (Subtarget.hasZeroCycleZeroingFP()) {
805  if (Opcode == AArch64::FMOVH0 ||
806  Opcode == AArch64::FMOVS0 ||
807  Opcode == AArch64::FMOVD0)
808  return true;
809  }
810 
811  if (Subtarget.hasZeroCycleZeroingGP()) {
812  if (Opcode == TargetOpcode::COPY &&
813  (MI.getOperand(1).getReg() == AArch64::WZR ||
814  MI.getOperand(1).getReg() == AArch64::XZR))
815  return true;
816  }
817 
818  // Secondly, check cases specific to sub-targets.
819 
820  if (Subtarget.hasExynosCheapAsMoveHandling()) {
821  if (isExynosCheapAsMove(MI))
822  return true;
823 
824  return MI.isAsCheapAsAMove();
825  }
826 
827  // Finally, check generic cases.
828 
829  switch (Opcode) {
830  default:
831  return false;
832 
833  // add/sub on register without shift
834  case AArch64::ADDWri:
835  case AArch64::ADDXri:
836  case AArch64::SUBWri:
837  case AArch64::SUBXri:
838  return (MI.getOperand(3).getImm() == 0);
839 
840  // logical ops on immediate
841  case AArch64::ANDWri:
842  case AArch64::ANDXri:
843  case AArch64::EORWri:
844  case AArch64::EORXri:
845  case AArch64::ORRWri:
846  case AArch64::ORRXri:
847  return true;
848 
849  // logical ops on register without shift
850  case AArch64::ANDWrr:
851  case AArch64::ANDXrr:
852  case AArch64::BICWrr:
853  case AArch64::BICXrr:
854  case AArch64::EONWrr:
855  case AArch64::EONXrr:
856  case AArch64::EORWrr:
857  case AArch64::EORXrr:
858  case AArch64::ORNWrr:
859  case AArch64::ORNXrr:
860  case AArch64::ORRWrr:
861  case AArch64::ORRXrr:
862  return true;
863 
864  // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
865  // ORRXri, it is as cheap as MOV
866  case AArch64::MOVi32imm:
867  return canBeExpandedToORR(MI, 32);
868  case AArch64::MOVi64imm:
869  return canBeExpandedToORR(MI, 64);
870  }
871 
872  llvm_unreachable("Unknown opcode to check as cheap as a move!");
873 }
874 
876  switch (MI.getOpcode()) {
877  default:
878  return false;
879 
880  case AArch64::ADDWrs:
881  case AArch64::ADDXrs:
882  case AArch64::ADDSWrs:
883  case AArch64::ADDSXrs: {
884  unsigned Imm = MI.getOperand(3).getImm();
885  unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
886  if (ShiftVal == 0)
887  return true;
888  return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
889  }
890 
891  case AArch64::ADDWrx:
892  case AArch64::ADDXrx:
893  case AArch64::ADDXrx64:
894  case AArch64::ADDSWrx:
895  case AArch64::ADDSXrx:
896  case AArch64::ADDSXrx64: {
897  unsigned Imm = MI.getOperand(3).getImm();
899  default:
900  return false;
901  case AArch64_AM::UXTB:
902  case AArch64_AM::UXTH:
903  case AArch64_AM::UXTW:
904  case AArch64_AM::UXTX:
905  return AArch64_AM::getArithShiftValue(Imm) <= 4;
906  }
907  }
908 
909  case AArch64::SUBWrs:
910  case AArch64::SUBSWrs: {
911  unsigned Imm = MI.getOperand(3).getImm();
912  unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
913  return ShiftVal == 0 ||
914  (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
915  }
916 
917  case AArch64::SUBXrs:
918  case AArch64::SUBSXrs: {
919  unsigned Imm = MI.getOperand(3).getImm();
920  unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
921  return ShiftVal == 0 ||
922  (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
923  }
924 
925  case AArch64::SUBWrx:
926  case AArch64::SUBXrx:
927  case AArch64::SUBXrx64:
928  case AArch64::SUBSWrx:
929  case AArch64::SUBSXrx:
930  case AArch64::SUBSXrx64: {
931  unsigned Imm = MI.getOperand(3).getImm();
933  default:
934  return false;
935  case AArch64_AM::UXTB:
936  case AArch64_AM::UXTH:
937  case AArch64_AM::UXTW:
938  case AArch64_AM::UXTX:
939  return AArch64_AM::getArithShiftValue(Imm) == 0;
940  }
941  }
942 
943  case AArch64::LDRBBroW:
944  case AArch64::LDRBBroX:
945  case AArch64::LDRBroW:
946  case AArch64::LDRBroX:
947  case AArch64::LDRDroW:
948  case AArch64::LDRDroX:
949  case AArch64::LDRHHroW:
950  case AArch64::LDRHHroX:
951  case AArch64::LDRHroW:
952  case AArch64::LDRHroX:
953  case AArch64::LDRQroW:
954  case AArch64::LDRQroX:
955  case AArch64::LDRSBWroW:
956  case AArch64::LDRSBWroX:
957  case AArch64::LDRSBXroW:
958  case AArch64::LDRSBXroX:
959  case AArch64::LDRSHWroW:
960  case AArch64::LDRSHWroX:
961  case AArch64::LDRSHXroW:
962  case AArch64::LDRSHXroX:
963  case AArch64::LDRSWroW:
964  case AArch64::LDRSWroX:
965  case AArch64::LDRSroW:
966  case AArch64::LDRSroX:
967  case AArch64::LDRWroW:
968  case AArch64::LDRWroX:
969  case AArch64::LDRXroW:
970  case AArch64::LDRXroX:
971  case AArch64::PRFMroW:
972  case AArch64::PRFMroX:
973  case AArch64::STRBBroW:
974  case AArch64::STRBBroX:
975  case AArch64::STRBroW:
976  case AArch64::STRBroX:
977  case AArch64::STRDroW:
978  case AArch64::STRDroX:
979  case AArch64::STRHHroW:
980  case AArch64::STRHHroX:
981  case AArch64::STRHroW:
982  case AArch64::STRHroX:
983  case AArch64::STRQroW:
984  case AArch64::STRQroX:
985  case AArch64::STRSroW:
986  case AArch64::STRSroX:
987  case AArch64::STRWroW:
988  case AArch64::STRWroX:
989  case AArch64::STRXroW:
990  case AArch64::STRXroX: {
991  unsigned IsSigned = MI.getOperand(3).getImm();
992  return !IsSigned;
993  }
994  }
995 }
996 
998  unsigned Opc = MI.getOpcode();
999  switch (Opc) {
1000  default:
1001  return false;
1002  case AArch64::SEH_StackAlloc:
1003  case AArch64::SEH_SaveFPLR:
1004  case AArch64::SEH_SaveFPLR_X:
1005  case AArch64::SEH_SaveReg:
1006  case AArch64::SEH_SaveReg_X:
1007  case AArch64::SEH_SaveRegP:
1008  case AArch64::SEH_SaveRegP_X:
1009  case AArch64::SEH_SaveFReg:
1010  case AArch64::SEH_SaveFReg_X:
1011  case AArch64::SEH_SaveFRegP:
1012  case AArch64::SEH_SaveFRegP_X:
1013  case AArch64::SEH_SetFP:
1014  case AArch64::SEH_AddFP:
1015  case AArch64::SEH_Nop:
1016  case AArch64::SEH_PrologEnd:
1017  case AArch64::SEH_EpilogStart:
1018  case AArch64::SEH_EpilogEnd:
1019  return true;
1020  }
1021 }
1022 
1024  Register &SrcReg, Register &DstReg,
1025  unsigned &SubIdx) const {
1026  switch (MI.getOpcode()) {
1027  default:
1028  return false;
1029  case AArch64::SBFMXri: // aka sxtw
1030  case AArch64::UBFMXri: // aka uxtw
1031  // Check for the 32 -> 64 bit extension case, these instructions can do
1032  // much more.
1033  if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1034  return false;
1035  // This is a signed or unsigned 32 -> 64 bit extension.
1036  SrcReg = MI.getOperand(1).getReg();
1037  DstReg = MI.getOperand(0).getReg();
1038  SubIdx = AArch64::sub_32;
1039  return true;
1040  }
1041 }
1042 
1044  const MachineInstr &MIa, const MachineInstr &MIb) const {
1046  const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1047  int64_t OffsetA = 0, OffsetB = 0;
1048  unsigned WidthA = 0, WidthB = 0;
1049  bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1050 
1051  assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1052  assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1053 
1054  if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
1056  return false;
1057 
1058  // Retrieve the base, offset from the base and width. Width
1059  // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1060  // base are identical, and the offset of a lower memory access +
1061  // the width doesn't overlap the offset of a higher memory access,
1062  // then the memory accesses are different.
1063  // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1064  // are assumed to have the same scale (vscale).
1065  if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1066  WidthA, TRI) &&
1067  getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1068  WidthB, TRI)) {
1069  if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1070  OffsetAIsScalable == OffsetBIsScalable) {
1071  int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1072  int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1073  int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1074  if (LowOffset + LowWidth <= HighOffset)
1075  return true;
1076  }
1077  }
1078  return false;
1079 }
1080 
1082  const MachineBasicBlock *MBB,
1083  const MachineFunction &MF) const {
1085  return true;
1086  switch (MI.getOpcode()) {
1087  case AArch64::HINT:
1088  // CSDB hints are scheduling barriers.
1089  if (MI.getOperand(0).getImm() == 0x14)
1090  return true;
1091  break;
1092  case AArch64::DSB:
1093  case AArch64::ISB:
1094  // DSB and ISB also are scheduling barriers.
1095  return true;
1096  default:;
1097  }
1098  if (isSEHInstruction(MI))
1099  return true;
1100  auto Next = std::next(MI.getIterator());
1101  return Next != MBB->end() && Next->isCFIInstruction();
1102 }
1103 
1104 /// analyzeCompare - For a comparison instruction, return the source registers
1105 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1106 /// Return true if the comparison instruction can be analyzed.
1108  Register &SrcReg2, int64_t &CmpMask,
1109  int64_t &CmpValue) const {
1110  // The first operand can be a frame index where we'd normally expect a
1111  // register.
1112  assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1113  if (!MI.getOperand(1).isReg())
1114  return false;
1115 
1116  switch (MI.getOpcode()) {
1117  default:
1118  break;
1119  case AArch64::PTEST_PP:
1120  SrcReg = MI.getOperand(0).getReg();
1121  SrcReg2 = MI.getOperand(1).getReg();
1122  // Not sure about the mask and value for now...
1123  CmpMask = ~0;
1124  CmpValue = 0;
1125  return true;
1126  case AArch64::SUBSWrr:
1127  case AArch64::SUBSWrs:
1128  case AArch64::SUBSWrx:
1129  case AArch64::SUBSXrr:
1130  case AArch64::SUBSXrs:
1131  case AArch64::SUBSXrx:
1132  case AArch64::ADDSWrr:
1133  case AArch64::ADDSWrs:
1134  case AArch64::ADDSWrx:
1135  case AArch64::ADDSXrr:
1136  case AArch64::ADDSXrs:
1137  case AArch64::ADDSXrx:
1138  // Replace SUBSWrr with SUBWrr if NZCV is not used.
1139  SrcReg = MI.getOperand(1).getReg();
1140  SrcReg2 = MI.getOperand(2).getReg();
1141  CmpMask = ~0;
1142  CmpValue = 0;
1143  return true;
1144  case AArch64::SUBSWri:
1145  case AArch64::ADDSWri:
1146  case AArch64::SUBSXri:
1147  case AArch64::ADDSXri:
1148  SrcReg = MI.getOperand(1).getReg();
1149  SrcReg2 = 0;
1150  CmpMask = ~0;
1151  CmpValue = MI.getOperand(2).getImm();
1152  return true;
1153  case AArch64::ANDSWri:
1154  case AArch64::ANDSXri:
1155  // ANDS does not use the same encoding scheme as the others xxxS
1156  // instructions.
1157  SrcReg = MI.getOperand(1).getReg();
1158  SrcReg2 = 0;
1159  CmpMask = ~0;
1161  MI.getOperand(2).getImm(),
1162  MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1163  return true;
1164  }
1165 
1166  return false;
1167 }
1168 
1170  MachineBasicBlock *MBB = Instr.getParent();
1171  assert(MBB && "Can't get MachineBasicBlock here");
1172  MachineFunction *MF = MBB->getParent();
1173  assert(MF && "Can't get MachineFunction here");
1174  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1177 
1178  for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1179  ++OpIdx) {
1180  MachineOperand &MO = Instr.getOperand(OpIdx);
1181  const TargetRegisterClass *OpRegCstraints =
1182  Instr.getRegClassConstraint(OpIdx, TII, TRI);
1183 
1184  // If there's no constraint, there's nothing to do.
1185  if (!OpRegCstraints)
1186  continue;
1187  // If the operand is a frame index, there's nothing to do here.
1188  // A frame index operand will resolve correctly during PEI.
1189  if (MO.isFI())
1190  continue;
1191 
1192  assert(MO.isReg() &&
1193  "Operand has register constraints without being a register!");
1194 
1195  Register Reg = MO.getReg();
1197  if (!OpRegCstraints->contains(Reg))
1198  return false;
1199  } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1200  !MRI->constrainRegClass(Reg, OpRegCstraints))
1201  return false;
1202  }
1203 
1204  return true;
1205 }
1206 
1207 /// Return the opcode that does not set flags when possible - otherwise
1208 /// return the original opcode. The caller is responsible to do the actual
1209 /// substitution and legality checking.
1210 static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
1211  // Don't convert all compare instructions, because for some the zero register
1212  // encoding becomes the sp register.
1213  bool MIDefinesZeroReg = false;
1214  if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
1215  MIDefinesZeroReg = true;
1216 
1217  switch (MI.getOpcode()) {
1218  default:
1219  return MI.getOpcode();
1220  case AArch64::ADDSWrr:
1221  return AArch64::ADDWrr;
1222  case AArch64::ADDSWri:
1223  return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1224  case AArch64::ADDSWrs:
1225  return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1226  case AArch64::ADDSWrx:
1227  return AArch64::ADDWrx;
1228  case AArch64::ADDSXrr:
1229  return AArch64::ADDXrr;
1230  case AArch64::ADDSXri:
1231  return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1232  case AArch64::ADDSXrs:
1233  return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1234  case AArch64::ADDSXrx:
1235  return AArch64::ADDXrx;
1236  case AArch64::SUBSWrr:
1237  return AArch64::SUBWrr;
1238  case AArch64::SUBSWri:
1239  return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1240  case AArch64::SUBSWrs:
1241  return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1242  case AArch64::SUBSWrx:
1243  return AArch64::SUBWrx;
1244  case AArch64::SUBSXrr:
1245  return AArch64::SUBXrr;
1246  case AArch64::SUBSXri:
1247  return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1248  case AArch64::SUBSXrs:
1249  return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1250  case AArch64::SUBSXrx:
1251  return AArch64::SUBXrx;
1252  }
1253 }
1254 
1255 enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1256 
1257 /// True when condition flags are accessed (either by writing or reading)
1258 /// on the instruction trace starting at From and ending at To.
1259 ///
1260 /// Note: If From and To are from different blocks it's assumed CC are accessed
1261 /// on the path.
1264  const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1265  // Early exit if To is at the beginning of the BB.
1266  if (To == To->getParent()->begin())
1267  return true;
1268 
1269  // Check whether the instructions are in the same basic block
1270  // If not, assume the condition flags might get modified somewhere.
1271  if (To->getParent() != From->getParent())
1272  return true;
1273 
1274  // From must be above To.
1276  ++To.getReverse(), To->getParent()->rend(),
1277  [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1278 
1279  // We iterate backward starting at \p To until we hit \p From.
1280  for (const MachineInstr &Instr :
1281  instructionsWithoutDebug(++To.getReverse(), From.getReverse())) {
1282  if (((AccessToCheck & AK_Write) &&
1283  Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1284  ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1285  return true;
1286  }
1287  return false;
1288 }
1289 
1290 /// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1291 /// operation which could set the flags in an identical manner
1292 bool AArch64InstrInfo::optimizePTestInstr(
1293  MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1294  const MachineRegisterInfo *MRI) const {
1295  auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1296  auto *Pred = MRI->getUniqueVRegDef(PredReg);
1297  auto NewOp = Pred->getOpcode();
1298  bool OpChanged = false;
1299 
1300  unsigned MaskOpcode = Mask->getOpcode();
1301  unsigned PredOpcode = Pred->getOpcode();
1302  bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1303  bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1304 
1305  if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike)) {
1306  // For PTEST(PTRUE, OTHER_INST), PTEST is redundant when PTRUE doesn't
1307  // deactivate any lanes OTHER_INST might set.
1308  uint64_t MaskElementSize = getElementSizeForOpcode(MaskOpcode);
1309  uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1310 
1311  // Must be an all active predicate of matching element size.
1312  if ((PredElementSize != MaskElementSize) ||
1313  (Mask->getOperand(1).getImm() != 31))
1314  return false;
1315 
1316  // Fallthough to simply remove the PTEST.
1317  } else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike)) {
1318  // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1319  // instruction that sets the flags as PTEST would.
1320 
1321  // Fallthough to simply remove the PTEST.
1322  } else if (PredIsPTestLike) {
1323  // For PTEST(PG_1, PTEST_LIKE(PG2, ...)), PTEST is redundant when both
1324  // instructions use the same predicate.
1325  auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1326  if (Mask != PTestLikeMask)
1327  return false;
1328 
1329  // Fallthough to simply remove the PTEST.
1330  } else {
1331  switch (Pred->getOpcode()) {
1332  case AArch64::BRKB_PPzP:
1333  case AArch64::BRKPB_PPzPP: {
1334  // Op 0 is chain, 1 is the mask, 2 the previous predicate to
1335  // propagate, 3 the new predicate.
1336 
1337  // Check to see if our mask is the same as the brkpb's. If
1338  // not the resulting flag bits may be different and we
1339  // can't remove the ptest.
1340  auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1341  if (Mask != PredMask)
1342  return false;
1343 
1344  // Switch to the new opcode
1345  NewOp = Pred->getOpcode() == AArch64::BRKB_PPzP ? AArch64::BRKBS_PPzP
1346  : AArch64::BRKPBS_PPzPP;
1347  OpChanged = true;
1348  break;
1349  }
1350  case AArch64::BRKN_PPzP: {
1351  auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1352  if (Mask != PredMask)
1353  return false;
1354 
1355  NewOp = AArch64::BRKNS_PPzP;
1356  OpChanged = true;
1357  break;
1358  }
1359  case AArch64::RDFFR_PPz: {
1360  // rdffr p1.b, PredMask=p0/z <--- Definition of Pred
1361  // ptest Mask=p0, Pred=p1.b <--- If equal masks, remove this and use
1362  // `rdffrs p1.b, p0/z` above.
1363  auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1364  if (Mask != PredMask)
1365  return false;
1366 
1367  NewOp = AArch64::RDFFRS_PPz;
1368  OpChanged = true;
1369  break;
1370  }
1371  default:
1372  // Bail out if we don't recognize the input
1373  return false;
1374  }
1375  }
1376 
1378 
1379  // If another instruction between Pred and PTest accesses flags, don't remove
1380  // the ptest or update the earlier instruction to modify them.
1381  if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1382  return false;
1383 
1384  // If we pass all the checks, it's safe to remove the PTEST and use the flags
1385  // as they are prior to PTEST. Sometimes this requires the tested PTEST
1386  // operand to be replaced with an equivalent instruction that also sets the
1387  // flags.
1388  Pred->setDesc(get(NewOp));
1389  PTest->eraseFromParent();
1390  if (OpChanged) {
1391  bool succeeded = UpdateOperandRegClass(*Pred);
1392  (void)succeeded;
1393  assert(succeeded && "Operands have incompatible register classes!");
1394  Pred->addRegisterDefined(AArch64::NZCV, TRI);
1395  }
1396 
1397  // Ensure that the flags def is live.
1398  if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1399  unsigned i = 0, e = Pred->getNumOperands();
1400  for (; i != e; ++i) {
1401  MachineOperand &MO = Pred->getOperand(i);
1402  if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1403  MO.setIsDead(false);
1404  break;
1405  }
1406  }
1407  }
1408  return true;
1409 }
1410 
1411 /// Try to optimize a compare instruction. A compare instruction is an
1412 /// instruction which produces AArch64::NZCV. It can be truly compare
1413 /// instruction
1414 /// when there are no uses of its destination register.
1415 ///
1416 /// The following steps are tried in order:
1417 /// 1. Convert CmpInstr into an unconditional version.
1418 /// 2. Remove CmpInstr if above there is an instruction producing a needed
1419 /// condition code or an instruction which can be converted into such an
1420 /// instruction.
1421 /// Only comparison with zero is supported.
1423  MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1424  int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1425  assert(CmpInstr.getParent());
1426  assert(MRI);
1427 
1428  // Replace SUBSWrr with SUBWrr if NZCV is not used.
1429  int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
1430  if (DeadNZCVIdx != -1) {
1431  if (CmpInstr.definesRegister(AArch64::WZR) ||
1432  CmpInstr.definesRegister(AArch64::XZR)) {
1433  CmpInstr.eraseFromParent();
1434  return true;
1435  }
1436  unsigned Opc = CmpInstr.getOpcode();
1437  unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1438  if (NewOpc == Opc)
1439  return false;
1440  const MCInstrDesc &MCID = get(NewOpc);
1441  CmpInstr.setDesc(MCID);
1442  CmpInstr.removeOperand(DeadNZCVIdx);
1443  bool succeeded = UpdateOperandRegClass(CmpInstr);
1444  (void)succeeded;
1445  assert(succeeded && "Some operands reg class are incompatible!");
1446  return true;
1447  }
1448 
1449  if (CmpInstr.getOpcode() == AArch64::PTEST_PP)
1450  return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1451 
1452  if (SrcReg2 != 0)
1453  return false;
1454 
1455  // CmpInstr is a Compare instruction if destination register is not used.
1456  if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1457  return false;
1458 
1459  if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1460  return true;
1461  return (CmpValue == 0 || CmpValue == 1) &&
1462  removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1463 }
1464 
1465 /// Get opcode of S version of Instr.
1466 /// If Instr is S version its opcode is returned.
1467 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1468 /// or we are not interested in it.
1469 static unsigned sForm(MachineInstr &Instr) {
1470  switch (Instr.getOpcode()) {
1471  default:
1472  return AArch64::INSTRUCTION_LIST_END;
1473 
1474  case AArch64::ADDSWrr:
1475  case AArch64::ADDSWri:
1476  case AArch64::ADDSXrr:
1477  case AArch64::ADDSXri:
1478  case AArch64::SUBSWrr:
1479  case AArch64::SUBSWri:
1480  case AArch64::SUBSXrr:
1481  case AArch64::SUBSXri:
1482  return Instr.getOpcode();
1483 
1484  case AArch64::ADDWrr:
1485  return AArch64::ADDSWrr;
1486  case AArch64::ADDWri:
1487  return AArch64::ADDSWri;
1488  case AArch64::ADDXrr:
1489  return AArch64::ADDSXrr;
1490  case AArch64::ADDXri:
1491  return AArch64::ADDSXri;
1492  case AArch64::ADCWr:
1493  return AArch64::ADCSWr;
1494  case AArch64::ADCXr:
1495  return AArch64::ADCSXr;
1496  case AArch64::SUBWrr:
1497  return AArch64::SUBSWrr;
1498  case AArch64::SUBWri:
1499  return AArch64::SUBSWri;
1500  case AArch64::SUBXrr:
1501  return AArch64::SUBSXrr;
1502  case AArch64::SUBXri:
1503  return AArch64::SUBSXri;
1504  case AArch64::SBCWr:
1505  return AArch64::SBCSWr;
1506  case AArch64::SBCXr:
1507  return AArch64::SBCSXr;
1508  case AArch64::ANDWri:
1509  return AArch64::ANDSWri;
1510  case AArch64::ANDXri:
1511  return AArch64::ANDSXri;
1512  }
1513 }
1514 
1515 /// Check if AArch64::NZCV should be alive in successors of MBB.
1517  for (auto *BB : MBB->successors())
1518  if (BB->isLiveIn(AArch64::NZCV))
1519  return true;
1520  return false;
1521 }
1522 
1523 /// \returns The condition code operand index for \p Instr if it is a branch
1524 /// or select and -1 otherwise.
1525 static int
1527  switch (Instr.getOpcode()) {
1528  default:
1529  return -1;
1530 
1531  case AArch64::Bcc: {
1532  int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1533  assert(Idx >= 2);
1534  return Idx - 2;
1535  }
1536 
1537  case AArch64::CSINVWr:
1538  case AArch64::CSINVXr:
1539  case AArch64::CSINCWr:
1540  case AArch64::CSINCXr:
1541  case AArch64::CSELWr:
1542  case AArch64::CSELXr:
1543  case AArch64::CSNEGWr:
1544  case AArch64::CSNEGXr:
1545  case AArch64::FCSELSrrr:
1546  case AArch64::FCSELDrrr: {
1547  int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1548  assert(Idx >= 1);
1549  return Idx - 1;
1550  }
1551  }
1552 }
1553 
1554 /// Find a condition code used by the instruction.
1555 /// Returns AArch64CC::Invalid if either the instruction does not use condition
1556 /// codes or we don't optimize CmpInstr in the presence of such instructions.
1558  int CCIdx = findCondCodeUseOperandIdxForBranchOrSelect(Instr);
1559  return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
1560  Instr.getOperand(CCIdx).getImm())
1562 }
1563 
1565  assert(CC != AArch64CC::Invalid);
1566  UsedNZCV UsedFlags;
1567  switch (CC) {
1568  default:
1569  break;
1570 
1571  case AArch64CC::EQ: // Z set
1572  case AArch64CC::NE: // Z clear
1573  UsedFlags.Z = true;
1574  break;
1575 
1576  case AArch64CC::HI: // Z clear and C set
1577  case AArch64CC::LS: // Z set or C clear
1578  UsedFlags.Z = true;
1580  case AArch64CC::HS: // C set
1581  case AArch64CC::LO: // C clear
1582  UsedFlags.C = true;
1583  break;
1584 
1585  case AArch64CC::MI: // N set
1586  case AArch64CC::PL: // N clear
1587  UsedFlags.N = true;
1588  break;
1589 
1590  case AArch64CC::VS: // V set
1591  case AArch64CC::VC: // V clear
1592  UsedFlags.V = true;
1593  break;
1594 
1595  case AArch64CC::GT: // Z clear, N and V the same
1596  case AArch64CC::LE: // Z set, N and V differ
1597  UsedFlags.Z = true;
1599  case AArch64CC::GE: // N and V the same
1600  case AArch64CC::LT: // N and V differ
1601  UsedFlags.N = true;
1602  UsedFlags.V = true;
1603  break;
1604  }
1605  return UsedFlags;
1606 }
1607 
1608 /// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1609 /// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1610 /// \returns None otherwise.
1611 ///
1612 /// Collect instructions using that flags in \p CCUseInstrs if provided.
1615  const TargetRegisterInfo &TRI,
1616  SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
1617  MachineBasicBlock *CmpParent = CmpInstr.getParent();
1618  if (MI.getParent() != CmpParent)
1619  return None;
1620 
1621  if (areCFlagsAliveInSuccessors(CmpParent))
1622  return None;
1623 
1624  UsedNZCV NZCVUsedAfterCmp;
1625  for (MachineInstr &Instr : instructionsWithoutDebug(
1626  std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
1627  if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
1629  if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1630  return None;
1631  NZCVUsedAfterCmp |= getUsedNZCV(CC);
1632  if (CCUseInstrs)
1633  CCUseInstrs->push_back(&Instr);
1634  }
1635  if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
1636  break;
1637  }
1638  return NZCVUsedAfterCmp;
1639 }
1640 
1641 static bool isADDSRegImm(unsigned Opcode) {
1642  return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1643 }
1644 
1645 static bool isSUBSRegImm(unsigned Opcode) {
1646  return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1647 }
1648 
1649 /// Check if CmpInstr can be substituted by MI.
1650 ///
1651 /// CmpInstr can be substituted:
1652 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1653 /// - and, MI and CmpInstr are from the same MachineBB
1654 /// - and, condition flags are not alive in successors of the CmpInstr parent
1655 /// - and, if MI opcode is the S form there must be no defs of flags between
1656 /// MI and CmpInstr
1657 /// or if MI opcode is not the S form there must be neither defs of flags
1658 /// nor uses of flags between MI and CmpInstr.
1659 /// - and C/V flags are not used after CmpInstr
1661  const TargetRegisterInfo &TRI) {
1662  assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
1663 
1664  const unsigned CmpOpcode = CmpInstr.getOpcode();
1665  if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1666  return false;
1667 
1668  Optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
1669  if (!NZVCUsed || NZVCUsed->C || NZVCUsed->V)
1670  return false;
1671 
1672  AccessKind AccessToCheck = AK_Write;
1673  if (sForm(MI) != MI.getOpcode())
1674  AccessToCheck = AK_All;
1675  return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
1676 }
1677 
1678 /// Substitute an instruction comparing to zero with another instruction
1679 /// which produces needed condition flags.
1680 ///
1681 /// Return true on success.
1682 bool AArch64InstrInfo::substituteCmpToZero(
1683  MachineInstr &CmpInstr, unsigned SrcReg,
1684  const MachineRegisterInfo &MRI) const {
1685  // Get the unique definition of SrcReg.
1686  MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1687  if (!MI)
1688  return false;
1689 
1691 
1692  unsigned NewOpc = sForm(*MI);
1693  if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1694  return false;
1695 
1696  if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
1697  return false;
1698 
1699  // Update the instruction to set NZCV.
1700  MI->setDesc(get(NewOpc));
1701  CmpInstr.eraseFromParent();
1702  bool succeeded = UpdateOperandRegClass(*MI);
1703  (void)succeeded;
1704  assert(succeeded && "Some operands reg class are incompatible!");
1705  MI->addRegisterDefined(AArch64::NZCV, &TRI);
1706  return true;
1707 }
1708 
1709 /// \returns True if \p CmpInstr can be removed.
1710 ///
1711 /// \p IsInvertCC is true if, after removing \p CmpInstr, condition
1712 /// codes used in \p CCUseInstrs must be inverted.
1714  int CmpValue, const TargetRegisterInfo &TRI,
1715  SmallVectorImpl<MachineInstr *> &CCUseInstrs,
1716  bool &IsInvertCC) {
1717  assert((CmpValue == 0 || CmpValue == 1) &&
1718  "Only comparisons to 0 or 1 considered for removal!");
1719 
1720  // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
1721  unsigned MIOpc = MI.getOpcode();
1722  if (MIOpc == AArch64::CSINCWr) {
1723  if (MI.getOperand(1).getReg() != AArch64::WZR ||
1724  MI.getOperand(2).getReg() != AArch64::WZR)
1725  return false;
1726  } else if (MIOpc == AArch64::CSINCXr) {
1727  if (MI.getOperand(1).getReg() != AArch64::XZR ||
1728  MI.getOperand(2).getReg() != AArch64::XZR)
1729  return false;
1730  } else {
1731  return false;
1732  }
1734  if (MICC == AArch64CC::Invalid)
1735  return false;
1736 
1737  // NZCV needs to be defined
1738  if (MI.findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
1739  return false;
1740 
1741  // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
1742  const unsigned CmpOpcode = CmpInstr.getOpcode();
1743  bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
1744  if (CmpValue && !IsSubsRegImm)
1745  return false;
1746  if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
1747  return false;
1748 
1749  // MI conditions allowed: eq, ne, mi, pl
1750  UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
1751  if (MIUsedNZCV.C || MIUsedNZCV.V)
1752  return false;
1753 
1754  Optional<UsedNZCV> NZCVUsedAfterCmp =
1755  examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
1756  // Condition flags are not used in CmpInstr basic block successors and only
1757  // Z or N flags allowed to be used after CmpInstr within its basic block
1758  if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
1759  return false;
1760  // Z or N flag used after CmpInstr must correspond to the flag used in MI
1761  if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
1762  (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
1763  return false;
1764  // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
1765  if (MIUsedNZCV.N && !CmpValue)
1766  return false;
1767 
1768  // There must be no defs of flags between MI and CmpInstr
1769  if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
1770  return false;
1771 
1772  // Condition code is inverted in the following cases:
1773  // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1774  // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
1775  IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
1776  (!CmpValue && MICC == AArch64CC::NE);
1777  return true;
1778 }
1779 
1780 /// Remove comparision in csinc-cmp sequence
1781 ///
1782 /// Examples:
1783 /// 1. \code
1784 /// csinc w9, wzr, wzr, ne
1785 /// cmp w9, #0
1786 /// b.eq
1787 /// \endcode
1788 /// to
1789 /// \code
1790 /// csinc w9, wzr, wzr, ne
1791 /// b.ne
1792 /// \endcode
1793 ///
1794 /// 2. \code
1795 /// csinc x2, xzr, xzr, mi
1796 /// cmp x2, #1
1797 /// b.pl
1798 /// \endcode
1799 /// to
1800 /// \code
1801 /// csinc x2, xzr, xzr, mi
1802 /// b.pl
1803 /// \endcode
1804 ///
1805 /// \param CmpInstr comparison instruction
1806 /// \return True when comparison removed
1807 bool AArch64InstrInfo::removeCmpToZeroOrOne(
1808  MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
1809  const MachineRegisterInfo &MRI) const {
1810  MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1811  if (!MI)
1812  return false;
1814  SmallVector<MachineInstr *, 4> CCUseInstrs;
1815  bool IsInvertCC = false;
1816  if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
1817  IsInvertCC))
1818  return false;
1819  // Make transformation
1820  CmpInstr.eraseFromParent();
1821  if (IsInvertCC) {
1822  // Invert condition codes in CmpInstr CC users
1823  for (MachineInstr *CCUseInstr : CCUseInstrs) {
1824  int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr);
1825  assert(Idx >= 0 && "Unexpected instruction using CC.");
1826  MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
1828  static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
1829  CCOperand.setImm(CCUse);
1830  }
1831  }
1832  return true;
1833 }
1834 
1836  if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1837  MI.getOpcode() != AArch64::CATCHRET)
1838  return false;
1839 
1840  MachineBasicBlock &MBB = *MI.getParent();
1841  auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
1842  auto TRI = Subtarget.getRegisterInfo();
1843  DebugLoc DL = MI.getDebugLoc();
1844 
1845  if (MI.getOpcode() == AArch64::CATCHRET) {
1846  // Skip to the first instruction before the epilog.
1847  const TargetInstrInfo *TII =
1849  MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
1851  MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
1852  while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
1853  FirstEpilogSEH != MBB.begin())
1854  FirstEpilogSEH = std::prev(FirstEpilogSEH);
1855  if (FirstEpilogSEH != MBB.begin())
1856  FirstEpilogSEH = std::next(FirstEpilogSEH);
1857  BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
1858  .addReg(AArch64::X0, RegState::Define)
1859  .addMBB(TargetMBB);
1860  BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
1861  .addReg(AArch64::X0, RegState::Define)
1862  .addReg(AArch64::X0)
1863  .addMBB(TargetMBB)
1864  .addImm(0);
1865  return true;
1866  }
1867 
1868  Register Reg = MI.getOperand(0).getReg();
1870  if (M.getStackProtectorGuard() == "sysreg") {
1871  const AArch64SysReg::SysReg *SrcReg =
1872  AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
1873  if (!SrcReg)
1874  report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
1875 
1876  // mrs xN, sysreg
1879  .addImm(SrcReg->Encoding);
1880  int Offset = M.getStackProtectorGuardOffset();
1881  if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
1882  // ldr xN, [xN, #offset]
1883  BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
1884  .addDef(Reg)
1886  .addImm(Offset / 8);
1887  } else if (Offset >= -256 && Offset <= 255) {
1888  // ldur xN, [xN, #offset]
1889  BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
1890  .addDef(Reg)
1892  .addImm(Offset);
1893  } else if (Offset >= -4095 && Offset <= 4095) {
1894  if (Offset > 0) {
1895  // add xN, xN, #offset
1896  BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
1897  .addDef(Reg)
1899  .addImm(Offset)
1900  .addImm(0);
1901  } else {
1902  // sub xN, xN, #offset
1903  BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
1904  .addDef(Reg)
1906  .addImm(-Offset)
1907  .addImm(0);
1908  }
1909  // ldr xN, [xN]
1910  BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
1911  .addDef(Reg)
1913  .addImm(0);
1914  } else {
1915  // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
1916  // than 23760.
1917  // It might be nice to use AArch64::MOVi32imm here, which would get
1918  // expanded in PreSched2 after PostRA, but our lone scratch Reg already
1919  // contains the MRS result. findScratchNonCalleeSaveRegister() in
1920  // AArch64FrameLowering might help us find such a scratch register
1921  // though. If we failed to find a scratch register, we could emit a
1922  // stream of add instructions to build up the immediate. Or, we could try
1923  // to insert a AArch64::MOVi32imm before register allocation so that we
1924  // didn't need to scavenge for a scratch register.
1925  report_fatal_error("Unable to encode Stack Protector Guard Offset");
1926  }
1927  MBB.erase(MI);
1928  return true;
1929  }
1930 
1931  const GlobalValue *GV =
1932  cast<GlobalValue>((*MI.memoperands_begin())->getValue());
1933  const TargetMachine &TM = MBB.getParent()->getTarget();
1934  unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1935  const unsigned char MO_NC = AArch64II::MO_NC;
1936 
1937  if ((OpFlags & AArch64II::MO_GOT) != 0) {
1939  .addGlobalAddress(GV, 0, OpFlags);
1940  if (Subtarget.isTargetILP32()) {
1941  unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
1942  BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
1943  .addDef(Reg32, RegState::Dead)
1945  .addImm(0)
1946  .addMemOperand(*MI.memoperands_begin())
1948  } else {
1949  BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1951  .addImm(0)
1952  .addMemOperand(*MI.memoperands_begin());
1953  }
1954  } else if (TM.getCodeModel() == CodeModel::Large) {
1955  assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
1956  BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1958  .addImm(0);
1959  BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1962  .addImm(16);
1963  BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1966  .addImm(32);
1967  BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1970  .addImm(48);
1971  BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1973  .addImm(0)
1974  .addMemOperand(*MI.memoperands_begin());
1975  } else if (TM.getCodeModel() == CodeModel::Tiny) {
1977  .addGlobalAddress(GV, 0, OpFlags);
1978  } else {
1980  .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1981  unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1982  if (Subtarget.isTargetILP32()) {
1983  unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
1984  BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
1985  .addDef(Reg32, RegState::Dead)
1987  .addGlobalAddress(GV, 0, LoFlags)
1988  .addMemOperand(*MI.memoperands_begin())
1990  } else {
1991  BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1993  .addGlobalAddress(GV, 0, LoFlags)
1994  .addMemOperand(*MI.memoperands_begin());
1995  }
1996  }
1997 
1998  MBB.erase(MI);
1999 
2000  return true;
2001 }
2002 
2003 // Return true if this instruction simply sets its single destination register
2004 // to zero. This is equivalent to a register rename of the zero-register.
2006  switch (MI.getOpcode()) {
2007  default:
2008  break;
2009  case AArch64::MOVZWi:
2010  case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2011  if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2012  assert(MI.getDesc().getNumOperands() == 3 &&
2013  MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2014  return true;
2015  }
2016  break;
2017  case AArch64::ANDWri: // and Rd, Rzr, #imm
2018  return MI.getOperand(1).getReg() == AArch64::WZR;
2019  case AArch64::ANDXri:
2020  return MI.getOperand(1).getReg() == AArch64::XZR;
2021  case TargetOpcode::COPY:
2022  return MI.getOperand(1).getReg() == AArch64::WZR;
2023  }
2024  return false;
2025 }
2026 
2027 // Return true if this instruction simply renames a general register without
2028 // modifying bits.
2030  switch (MI.getOpcode()) {
2031  default:
2032  break;
2033  case TargetOpcode::COPY: {
2034  // GPR32 copies will by lowered to ORRXrs
2035  Register DstReg = MI.getOperand(0).getReg();
2036  return (AArch64::GPR32RegClass.contains(DstReg) ||
2037  AArch64::GPR64RegClass.contains(DstReg));
2038  }
2039  case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2040  if (MI.getOperand(1).getReg() == AArch64::XZR) {
2041  assert(MI.getDesc().getNumOperands() == 4 &&
2042  MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2043  return true;
2044  }
2045  break;
2046  case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2047  if (MI.getOperand(2).getImm() == 0) {
2048  assert(MI.getDesc().getNumOperands() == 4 &&
2049  MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2050  return true;
2051  }
2052  break;
2053  }
2054  return false;
2055 }
2056 
2057 // Return true if this instruction simply renames a general register without
2058 // modifying bits.
2060  switch (MI.getOpcode()) {
2061  default:
2062  break;
2063  case TargetOpcode::COPY: {
2064  Register DstReg = MI.getOperand(0).getReg();
2065  return AArch64::FPR128RegClass.contains(DstReg);
2066  }
2067  case AArch64::ORRv16i8:
2068  if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2069  assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2070  "invalid ORRv16i8 operands");
2071  return true;
2072  }
2073  break;
2074  }
2075  return false;
2076 }
2077 
2079  int &FrameIndex) const {
2080  switch (MI.getOpcode()) {
2081  default:
2082  break;
2083  case AArch64::LDRWui:
2084  case AArch64::LDRXui:
2085  case AArch64::LDRBui:
2086  case AArch64::LDRHui:
2087  case AArch64::LDRSui:
2088  case AArch64::LDRDui:
2089  case AArch64::LDRQui:
2090  if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2091  MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2092  FrameIndex = MI.getOperand(1).getIndex();
2093  return MI.getOperand(0).getReg();
2094  }
2095  break;
2096  }
2097 
2098  return 0;
2099 }
2100 
2102  int &FrameIndex) const {
2103  switch (MI.getOpcode()) {
2104  default:
2105  break;
2106  case AArch64::STRWui:
2107  case AArch64::STRXui:
2108  case AArch64::STRBui:
2109  case AArch64::STRHui:
2110  case AArch64::STRSui:
2111  case AArch64::STRDui:
2112  case AArch64::STRQui:
2113  case AArch64::LDR_PXI:
2114  case AArch64::STR_PXI:
2115  if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2116  MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2117  FrameIndex = MI.getOperand(1).getIndex();
2118  return MI.getOperand(0).getReg();
2119  }
2120  break;
2121  }
2122  return 0;
2123 }
2124 
2125 /// Check all MachineMemOperands for a hint to suppress pairing.
2127  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2128  return MMO->getFlags() & MOSuppressPair;
2129  });
2130 }
2131 
2132 /// Set a flag on the first MachineMemOperand to suppress pairing.
2134  if (MI.memoperands_empty())
2135  return;
2136  (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2137 }
2138 
2139 /// Check all MachineMemOperands for a hint that the load/store is strided.
2141  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2142  return MMO->getFlags() & MOStridedAccess;
2143  });
2144 }
2145 
2147  switch (Opc) {
2148  default:
2149  return false;
2150  case AArch64::STURSi:
2151  case AArch64::STRSpre:
2152  case AArch64::STURDi:
2153  case AArch64::STRDpre:
2154  case AArch64::STURQi:
2155  case AArch64::STRQpre:
2156  case AArch64::STURBBi:
2157  case AArch64::STURHHi:
2158  case AArch64::STURWi:
2159  case AArch64::STRWpre:
2160  case AArch64::STURXi:
2161  case AArch64::STRXpre:
2162  case AArch64::LDURSi:
2163  case AArch64::LDRSpre:
2164  case AArch64::LDURDi:
2165  case AArch64::LDRDpre:
2166  case AArch64::LDURQi:
2167  case AArch64::LDRQpre:
2168  case AArch64::LDURWi:
2169  case AArch64::LDRWpre:
2170  case AArch64::LDURXi:
2171  case AArch64::LDRXpre:
2172  case AArch64::LDURSWi:
2173  case AArch64::LDURHHi:
2174  case AArch64::LDURBBi:
2175  case AArch64::LDURSBWi:
2176  case AArch64::LDURSHWi:
2177  return true;
2178  }
2179 }
2180 
2182  switch (Opc) {
2183  default: return {};
2184  case AArch64::PRFMui: return AArch64::PRFUMi;
2185  case AArch64::LDRXui: return AArch64::LDURXi;
2186  case AArch64::LDRWui: return AArch64::LDURWi;
2187  case AArch64::LDRBui: return AArch64::LDURBi;
2188  case AArch64::LDRHui: return AArch64::LDURHi;
2189  case AArch64::LDRSui: return AArch64::LDURSi;
2190  case AArch64::LDRDui: return AArch64::LDURDi;
2191  case AArch64::LDRQui: return AArch64::LDURQi;
2192  case AArch64::LDRBBui: return AArch64::LDURBBi;
2193  case AArch64::LDRHHui: return AArch64::LDURHHi;
2194  case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2195  case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2196  case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2197  case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2198  case AArch64::LDRSWui: return AArch64::LDURSWi;
2199  case AArch64::STRXui: return AArch64::STURXi;
2200  case AArch64::STRWui: return AArch64::STURWi;
2201  case AArch64::STRBui: return AArch64::STURBi;
2202  case AArch64::STRHui: return AArch64::STURHi;
2203  case AArch64::STRSui: return AArch64::STURSi;
2204  case AArch64::STRDui: return AArch64::STURDi;
2205  case AArch64::STRQui: return AArch64::STURQi;
2206  case AArch64::STRBBui: return AArch64::STURBBi;
2207  case AArch64::STRHHui: return AArch64::STURHHi;
2208  }
2209 }
2210 
2212  switch (Opc) {
2213  default:
2214  return 2;
2215  case AArch64::LDPXi:
2216  case AArch64::LDPDi:
2217  case AArch64::STPXi:
2218  case AArch64::STPDi:
2219  case AArch64::LDNPXi:
2220  case AArch64::LDNPDi:
2221  case AArch64::STNPXi:
2222  case AArch64::STNPDi:
2223  case AArch64::LDPQi:
2224  case AArch64::STPQi:
2225  case AArch64::LDNPQi:
2226  case AArch64::STNPQi:
2227  case AArch64::LDPWi:
2228  case AArch64::LDPSi:
2229  case AArch64::STPWi:
2230  case AArch64::STPSi:
2231  case AArch64::LDNPWi:
2232  case AArch64::LDNPSi:
2233  case AArch64::STNPWi:
2234  case AArch64::STNPSi:
2235  case AArch64::LDG:
2236  case AArch64::STGPi:
2237 
2238  case AArch64::LD1B_IMM:
2239  case AArch64::LD1B_H_IMM:
2240  case AArch64::LD1B_S_IMM:
2241  case AArch64::LD1B_D_IMM:
2242  case AArch64::LD1SB_H_IMM:
2243  case AArch64::LD1SB_S_IMM:
2244  case AArch64::LD1SB_D_IMM:
2245  case AArch64::LD1H_IMM:
2246  case AArch64::LD1H_S_IMM:
2247  case AArch64::LD1H_D_IMM:
2248  case AArch64::LD1SH_S_IMM:
2249  case AArch64::LD1SH_D_IMM:
2250  case AArch64::LD1W_IMM:
2251  case AArch64::LD1W_D_IMM:
2252  case AArch64::LD1SW_D_IMM:
2253  case AArch64::LD1D_IMM:
2254 
2255  case AArch64::LD2B_IMM:
2256  case AArch64::LD2H_IMM:
2257  case AArch64::LD2W_IMM:
2258  case AArch64::LD2D_IMM:
2259  case AArch64::LD3B_IMM:
2260  case AArch64::LD3H_IMM:
2261  case AArch64::LD3W_IMM:
2262  case AArch64::LD3D_IMM:
2263  case AArch64::LD4B_IMM:
2264  case AArch64::LD4H_IMM:
2265  case AArch64::LD4W_IMM:
2266  case AArch64::LD4D_IMM:
2267 
2268  case AArch64::ST1B_IMM:
2269  case AArch64::ST1B_H_IMM:
2270  case AArch64::ST1B_S_IMM:
2271  case AArch64::ST1B_D_IMM:
2272  case AArch64::ST1H_IMM:
2273  case AArch64::ST1H_S_IMM:
2274  case AArch64::ST1H_D_IMM:
2275  case AArch64::ST1W_IMM:
2276  case AArch64::ST1W_D_IMM:
2277  case AArch64::ST1D_IMM:
2278 
2279  case AArch64::ST2B_IMM:
2280  case AArch64::ST2H_IMM:
2281  case AArch64::ST2W_IMM:
2282  case AArch64::ST2D_IMM:
2283  case AArch64::ST3B_IMM:
2284  case AArch64::ST3H_IMM:
2285  case AArch64::ST3W_IMM:
2286  case AArch64::ST3D_IMM:
2287  case AArch64::ST4B_IMM:
2288  case AArch64::ST4H_IMM:
2289  case AArch64::ST4W_IMM:
2290  case AArch64::ST4D_IMM:
2291 
2292  case AArch64::LD1RB_IMM:
2293  case AArch64::LD1RB_H_IMM:
2294  case AArch64::LD1RB_S_IMM:
2295  case AArch64::LD1RB_D_IMM:
2296  case AArch64::LD1RSB_H_IMM:
2297  case AArch64::LD1RSB_S_IMM:
2298  case AArch64::LD1RSB_D_IMM:
2299  case AArch64::LD1RH_IMM:
2300  case AArch64::LD1RH_S_IMM:
2301  case AArch64::LD1RH_D_IMM:
2302  case AArch64::LD1RSH_S_IMM:
2303  case AArch64::LD1RSH_D_IMM:
2304  case AArch64::LD1RW_IMM:
2305  case AArch64::LD1RW_D_IMM:
2306  case AArch64::LD1RSW_IMM:
2307  case AArch64::LD1RD_IMM:
2308 
2309  case AArch64::LDNT1B_ZRI:
2310  case AArch64::LDNT1H_ZRI:
2311  case AArch64::LDNT1W_ZRI:
2312  case AArch64::LDNT1D_ZRI:
2313  case AArch64::STNT1B_ZRI:
2314  case AArch64::STNT1H_ZRI:
2315  case AArch64::STNT1W_ZRI:
2316  case AArch64::STNT1D_ZRI:
2317 
2318  case AArch64::LDNF1B_IMM:
2319  case AArch64::LDNF1B_H_IMM:
2320  case AArch64::LDNF1B_S_IMM:
2321  case AArch64::LDNF1B_D_IMM:
2322  case AArch64::LDNF1SB_H_IMM:
2323  case AArch64::LDNF1SB_S_IMM:
2324  case AArch64::LDNF1SB_D_IMM:
2325  case AArch64::LDNF1H_IMM:
2326  case AArch64::LDNF1H_S_IMM:
2327  case AArch64::LDNF1H_D_IMM:
2328  case AArch64::LDNF1SH_S_IMM:
2329  case AArch64::LDNF1SH_D_IMM:
2330  case AArch64::LDNF1W_IMM:
2331  case AArch64::LDNF1W_D_IMM:
2332  case AArch64::LDNF1SW_D_IMM:
2333  case AArch64::LDNF1D_IMM:
2334  return 3;
2335  case AArch64::ADDG:
2336  case AArch64::STGOffset:
2337  case AArch64::LDR_PXI:
2338  case AArch64::STR_PXI:
2339  return 2;
2340  }
2341 }
2342 
2344  switch (MI.getOpcode()) {
2345  default:
2346  return false;
2347  // Scaled instructions.
2348  case AArch64::STRSui:
2349  case AArch64::STRDui:
2350  case AArch64::STRQui:
2351  case AArch64::STRXui:
2352  case AArch64::STRWui:
2353  case AArch64::LDRSui:
2354  case AArch64::LDRDui:
2355  case AArch64::LDRQui:
2356  case AArch64::LDRXui:
2357  case AArch64::LDRWui:
2358  case AArch64::LDRSWui:
2359  // Unscaled instructions.
2360  case AArch64::STURSi:
2361  case AArch64::STRSpre:
2362  case AArch64::STURDi:
2363  case AArch64::STRDpre:
2364  case AArch64::STURQi:
2365  case AArch64::STRQpre:
2366  case AArch64::STURWi:
2367  case AArch64::STRWpre:
2368  case AArch64::STURXi:
2369  case AArch64::STRXpre:
2370  case AArch64::LDURSi:
2371  case AArch64::LDRSpre:
2372  case AArch64::LDURDi:
2373  case AArch64::LDRDpre:
2374  case AArch64::LDURQi:
2375  case AArch64::LDRQpre:
2376  case AArch64::LDURWi:
2377  case AArch64::LDRWpre:
2378  case AArch64::LDURXi:
2379  case AArch64::LDRXpre:
2380  case AArch64::LDURSWi:
2381  return true;
2382  }
2383 }
2384 
2386  bool &Is64Bit) {
2387  switch (Opc) {
2388  default:
2389  llvm_unreachable("Opcode has no flag setting equivalent!");
2390  // 32-bit cases:
2391  case AArch64::ADDWri:
2392  Is64Bit = false;
2393  return AArch64::ADDSWri;
2394  case AArch64::ADDWrr:
2395  Is64Bit = false;
2396  return AArch64::ADDSWrr;
2397  case AArch64::ADDWrs:
2398  Is64Bit = false;
2399  return AArch64::ADDSWrs;
2400  case AArch64::ADDWrx:
2401  Is64Bit = false;
2402  return AArch64::ADDSWrx;
2403  case AArch64::ANDWri:
2404  Is64Bit = false;
2405  return AArch64::ANDSWri;
2406  case AArch64::ANDWrr:
2407  Is64Bit = false;
2408  return AArch64::ANDSWrr;
2409  case AArch64::ANDWrs:
2410  Is64Bit = false;
2411  return AArch64::ANDSWrs;
2412  case AArch64::BICWrr:
2413  Is64Bit = false;
2414  return AArch64::BICSWrr;
2415  case AArch64::BICWrs:
2416  Is64Bit = false;
2417  return AArch64::BICSWrs;
2418  case AArch64::SUBWri:
2419  Is64Bit = false;
2420  return AArch64::SUBSWri;
2421  case AArch64::SUBWrr:
2422  Is64Bit = false;
2423  return AArch64::SUBSWrr;
2424  case AArch64::SUBWrs:
2425  Is64Bit = false;
2426  return AArch64::SUBSWrs;
2427  case AArch64::SUBWrx:
2428  Is64Bit = false;
2429  return AArch64::SUBSWrx;
2430  // 64-bit cases:
2431  case AArch64::ADDXri:
2432  Is64Bit = true;
2433  return AArch64::ADDSXri;
2434  case AArch64::ADDXrr:
2435  Is64Bit = true;
2436  return AArch64::ADDSXrr;
2437  case AArch64::ADDXrs:
2438  Is64Bit = true;
2439  return AArch64::ADDSXrs;
2440  case AArch64::ADDXrx:
2441  Is64Bit = true;
2442  return AArch64::ADDSXrx;
2443  case AArch64::ANDXri:
2444  Is64Bit = true;
2445  return AArch64::ANDSXri;
2446  case AArch64::ANDXrr:
2447  Is64Bit = true;
2448  return AArch64::ANDSXrr;
2449  case AArch64::ANDXrs:
2450  Is64Bit = true;
2451  return AArch64::ANDSXrs;
2452  case AArch64::BICXrr:
2453  Is64Bit = true;
2454  return AArch64::BICSXrr;
2455  case AArch64::BICXrs:
2456  Is64Bit = true;
2457  return AArch64::BICSXrs;
2458  case AArch64::SUBXri:
2459  Is64Bit = true;
2460  return AArch64::SUBSXri;
2461  case AArch64::SUBXrr:
2462  Is64Bit = true;
2463  return AArch64::SUBSXrr;
2464  case AArch64::SUBXrs:
2465  Is64Bit = true;
2466  return AArch64::SUBSXrs;
2467  case AArch64::SUBXrx:
2468  Is64Bit = true;
2469  return AArch64::SUBSXrx;
2470  }
2471 }
2472 
2473 // Is this a candidate for ld/st merging or pairing? For example, we don't
2474 // touch volatiles or load/stores that have a hint to avoid pair formation.
2476 
2477  bool IsPreLdSt = isPreLdSt(MI);
2478 
2479  // If this is a volatile load/store, don't mess with it.
2480  if (MI.hasOrderedMemoryRef())
2481  return false;
2482 
2483  // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2484  // For Pre-inc LD/ST, the operand is shifted by one.
2485  assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
2486  MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
2487  "Expected a reg or frame index operand.");
2488 
2489  // For Pre-indexed addressing quadword instructions, the third operand is the
2490  // immediate value.
2491  bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
2492 
2493  if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
2494  return false;
2495 
2496  // Can't merge/pair if the instruction modifies the base register.
2497  // e.g., ldr x0, [x0]
2498  // This case will never occur with an FI base.
2499  // However, if the instruction is an LDR/STR<S,D,Q,W,X>pre, it can be merged.
2500  // For example:
2501  // ldr q0, [x11, #32]!
2502  // ldr q1, [x11, #16]
2503  // to
2504  // ldp q0, q1, [x11, #32]!
2505  if (MI.getOperand(1).isReg() && !IsPreLdSt) {
2506  Register BaseReg = MI.getOperand(1).getReg();
2508  if (MI.modifiesRegister(BaseReg, TRI))
2509  return false;
2510  }
2511 
2512  // Check if this load/store has a hint to avoid pair formation.
2513  // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2514  if (isLdStPairSuppressed(MI))
2515  return false;
2516 
2517  // Do not pair any callee-save store/reload instructions in the
2518  // prologue/epilogue if the CFI information encoded the operations as separate
2519  // instructions, as that will cause the size of the actual prologue to mismatch
2520  // with the prologue size recorded in the Windows CFI.
2521  const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
2522  bool NeedsWinCFI = MAI->usesWindowsCFI() &&
2523  MI.getMF()->getFunction().needsUnwindTableEntry();
2524  if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
2525  MI.getFlag(MachineInstr::FrameDestroy)))
2526  return false;
2527 
2528  // On some CPUs quad load/store pairs are slower than two single load/stores.
2529  if (Subtarget.isPaired128Slow()) {
2530  switch (MI.getOpcode()) {
2531  default:
2532  break;
2533  case AArch64::LDURQi:
2534  case AArch64::STURQi:
2535  case AArch64::LDRQui:
2536  case AArch64::STRQui:
2537  return false;
2538  }
2539  }
2540 
2541  return true;
2542 }
2543 
2546  int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
2547  const TargetRegisterInfo *TRI) const {
2548  if (!LdSt.mayLoadOrStore())
2549  return false;
2550 
2551  const MachineOperand *BaseOp;
2552  if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
2553  Width, TRI))
2554  return false;
2555  BaseOps.push_back(BaseOp);
2556  return true;
2557 }
2558 
2561  const TargetRegisterInfo *TRI) const {
2562  const MachineOperand *Base; // Filled with the base operand of MI.
2563  int64_t Offset; // Filled with the offset of MI.
2564  bool OffsetIsScalable;
2565  if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
2566  return None;
2567 
2568  if (!Base->isReg())
2569  return None;
2570  ExtAddrMode AM;
2571  AM.BaseReg = Base->getReg();
2572  AM.Displacement = Offset;
2573  AM.ScaledReg = 0;
2574  AM.Scale = 0;
2575  return AM;
2576 }
2577 
2579  const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
2580  bool &OffsetIsScalable, unsigned &Width,
2581  const TargetRegisterInfo *TRI) const {
2582  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
2583  // Handle only loads/stores with base register followed by immediate offset.
2584  if (LdSt.getNumExplicitOperands() == 3) {
2585  // Non-paired instruction (e.g., ldr x1, [x0, #8]).
2586  if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
2587  !LdSt.getOperand(2).isImm())
2588  return false;
2589  } else if (LdSt.getNumExplicitOperands() == 4) {
2590  // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
2591  if (!LdSt.getOperand(1).isReg() ||
2592  (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
2593  !LdSt.getOperand(3).isImm())
2594  return false;
2595  } else
2596  return false;
2597 
2598  // Get the scaling factor for the instruction and set the width for the
2599  // instruction.
2600  TypeSize Scale(0U, false);
2601  int64_t Dummy1, Dummy2;
2602 
2603  // If this returns false, then it's an instruction we don't want to handle.
2604  if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
2605  return false;
2606 
2607  // Compute the offset. Offset is calculated as the immediate operand
2608  // multiplied by the scaling factor. Unscaled instructions have scaling factor
2609  // set to 1.
2610  if (LdSt.getNumExplicitOperands() == 3) {
2611  BaseOp = &LdSt.getOperand(1);
2612  Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinSize();
2613  } else {
2614  assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
2615  BaseOp = &LdSt.getOperand(2);
2616  Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinSize();
2617  }
2618  OffsetIsScalable = Scale.isScalable();
2619 
2620  if (!BaseOp->isReg() && !BaseOp->isFI())
2621  return false;
2622 
2623  return true;
2624 }
2625 
2628  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
2629  MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
2630  assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
2631  return OfsOp;
2632 }
2633 
2634 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
2635  unsigned &Width, int64_t &MinOffset,
2636  int64_t &MaxOffset) {
2637  const unsigned SVEMaxBytesPerVector = AArch64::SVEMaxBitsPerVector / 8;
2638  switch (Opcode) {
2639  // Not a memory operation or something we want to handle.
2640  default:
2641  Scale = TypeSize::Fixed(0);
2642  Width = 0;
2643  MinOffset = MaxOffset = 0;
2644  return false;
2645  case AArch64::STRWpost:
2646  case AArch64::LDRWpost:
2647  Width = 32;
2648  Scale = TypeSize::Fixed(4);
2649  MinOffset = -256;
2650  MaxOffset = 255;
2651  break;
2652  case AArch64::LDURQi:
2653  case AArch64::STURQi:
2654  Width = 16;
2655  Scale = TypeSize::Fixed(1);
2656  MinOffset = -256;
2657  MaxOffset = 255;
2658  break;
2659  case AArch64::PRFUMi:
2660  case AArch64::LDURXi:
2661  case AArch64::LDURDi:
2662  case AArch64::STURXi:
2663  case AArch64::STURDi:
2664  Width = 8;
2665  Scale = TypeSize::Fixed(1);
2666  MinOffset = -256;
2667  MaxOffset = 255;
2668  break;
2669  case AArch64::LDURWi:
2670  case AArch64::LDURSi:
2671  case AArch64::LDURSWi:
2672  case AArch64::STURWi:
2673  case AArch64::STURSi:
2674  Width = 4;
2675  Scale = TypeSize::Fixed(1);
2676  MinOffset = -256;
2677  MaxOffset = 255;
2678  break;
2679  case AArch64::LDURHi:
2680  case AArch64::LDURHHi:
2681  case AArch64::LDURSHXi:
2682  case AArch64::LDURSHWi:
2683  case AArch64::STURHi:
2684  case AArch64::STURHHi:
2685  Width = 2;
2686  Scale = TypeSize::Fixed(1);
2687  MinOffset = -256;
2688  MaxOffset = 255;
2689  break;
2690  case AArch64::LDURBi:
2691  case AArch64::LDURBBi:
2692  case AArch64::LDURSBXi:
2693  case AArch64::LDURSBWi:
2694  case AArch64::STURBi:
2695  case AArch64::STURBBi:
2696  Width = 1;
2697  Scale = TypeSize::Fixed(1);
2698  MinOffset = -256;
2699  MaxOffset = 255;
2700  break;
2701  case AArch64::LDPQi:
2702  case AArch64::LDNPQi:
2703  case AArch64::STPQi:
2704  case AArch64::STNPQi:
2705  Scale = TypeSize::Fixed(16);
2706  Width = 32;
2707  MinOffset = -64;
2708  MaxOffset = 63;
2709  break;
2710  case AArch64::LDRQui:
2711  case AArch64::STRQui:
2712  Scale = TypeSize::Fixed(16);
2713  Width = 16;
2714  MinOffset = 0;
2715  MaxOffset = 4095;
2716  break;
2717  case AArch64::LDPXi:
2718  case AArch64::LDPDi:
2719  case AArch64::LDNPXi:
2720  case AArch64::LDNPDi:
2721  case AArch64::STPXi:
2722  case AArch64::STPDi:
2723  case AArch64::STNPXi:
2724  case AArch64::STNPDi:
2725  Scale = TypeSize::Fixed(8);
2726  Width = 16;
2727  MinOffset = -64;
2728  MaxOffset = 63;
2729  break;
2730  case AArch64::PRFMui:
2731  case AArch64::LDRXui:
2732  case AArch64::LDRDui:
2733  case AArch64::STRXui:
2734  case AArch64::STRDui:
2735  Scale = TypeSize::Fixed(8);
2736  Width = 8;
2737  MinOffset = 0;
2738  MaxOffset = 4095;
2739  break;
2740  case AArch64::StoreSwiftAsyncContext:
2741  // Store is an STRXui, but there might be an ADDXri in the expansion too.
2742  Scale = TypeSize::Fixed(1);
2743  Width = 8;
2744  MinOffset = 0;
2745  MaxOffset = 4095;
2746  break;
2747  case AArch64::LDPWi:
2748  case AArch64::LDPSi:
2749  case AArch64::LDNPWi:
2750  case AArch64::LDNPSi:
2751  case AArch64::STPWi:
2752  case AArch64::STPSi:
2753  case AArch64::STNPWi:
2754  case AArch64::STNPSi:
2755  Scale = TypeSize::Fixed(4);
2756  Width = 8;
2757  MinOffset = -64;
2758  MaxOffset = 63;
2759  break;
2760  case AArch64::LDRWui:
2761  case AArch64::LDRSui:
2762  case AArch64::LDRSWui:
2763  case AArch64::STRWui:
2764  case AArch64::STRSui:
2765  Scale = TypeSize::Fixed(4);
2766  Width = 4;
2767  MinOffset = 0;
2768  MaxOffset = 4095;
2769  break;
2770  case AArch64::LDRHui:
2771  case AArch64::LDRHHui:
2772  case AArch64::LDRSHWui:
2773  case AArch64::LDRSHXui:
2774  case AArch64::STRHui:
2775  case AArch64::STRHHui:
2776  Scale = TypeSize::Fixed(2);
2777  Width = 2;
2778  MinOffset = 0;
2779  MaxOffset = 4095;
2780  break;
2781  case AArch64::LDRBui:
2782  case AArch64::LDRBBui:
2783  case AArch64::LDRSBWui:
2784  case AArch64::LDRSBXui:
2785  case AArch64::STRBui:
2786  case AArch64::STRBBui:
2787  Scale = TypeSize::Fixed(1);
2788  Width = 1;
2789  MinOffset = 0;
2790  MaxOffset = 4095;
2791  break;
2792  case AArch64::STPXpre:
2793  case AArch64::LDPXpost:
2794  case AArch64::STPDpre:
2795  case AArch64::LDPDpost:
2796  Scale = TypeSize::Fixed(8);
2797  Width = 8;
2798  MinOffset = -512;
2799  MaxOffset = 504;
2800  break;
2801  case AArch64::STPQpre:
2802  case AArch64::LDPQpost:
2803  Scale = TypeSize::Fixed(16);
2804  Width = 16;
2805  MinOffset = -1024;
2806  MaxOffset = 1008;
2807  break;
2808  case AArch64::STRXpre:
2809  case AArch64::STRDpre:
2810  case AArch64::LDRXpost:
2811  case AArch64::LDRDpost:
2812  Scale = TypeSize::Fixed(1);
2813  Width = 8;
2814  MinOffset = -256;
2815  MaxOffset = 255;
2816  break;
2817  case AArch64::STRQpre:
2818  case AArch64::LDRQpost:
2819  Scale = TypeSize::Fixed(1);
2820  Width = 16;
2821  MinOffset = -256;
2822  MaxOffset = 255;
2823  break;
2824  case AArch64::ADDG:
2825  Scale = TypeSize::Fixed(16);
2826  Width = 0;
2827  MinOffset = 0;
2828  MaxOffset = 63;
2829  break;
2830  case AArch64::TAGPstack:
2831  Scale = TypeSize::Fixed(16);
2832  Width = 0;
2833  // TAGP with a negative offset turns into SUBP, which has a maximum offset
2834  // of 63 (not 64!).
2835  MinOffset = -63;
2836  MaxOffset = 63;
2837  break;
2838  case AArch64::LDG:
2839  case AArch64::STGOffset:
2840  case AArch64::STZGOffset:
2841  Scale = TypeSize::Fixed(16);
2842  Width = 16;
2843  MinOffset = -256;
2844  MaxOffset = 255;
2845  break;
2846  case AArch64::STR_ZZZZXI:
2847  case AArch64::LDR_ZZZZXI:
2848  Scale = TypeSize::Scalable(16);
2849  Width = SVEMaxBytesPerVector * 4;
2850  MinOffset = -256;
2851  MaxOffset = 252;
2852  break;
2853  case AArch64::STR_ZZZXI:
2854  case AArch64::LDR_ZZZXI:
2855  Scale = TypeSize::Scalable(16);
2856  Width = SVEMaxBytesPerVector * 3;
2857  MinOffset = -256;
2858  MaxOffset = 253;
2859  break;
2860  case AArch64::STR_ZZXI:
2861  case AArch64::LDR_ZZXI:
2862  Scale = TypeSize::Scalable(16);
2863  Width = SVEMaxBytesPerVector * 2;
2864  MinOffset = -256;
2865  MaxOffset = 254;
2866  break;
2867  case AArch64::LDR_PXI:
2868  case AArch64::STR_PXI:
2869  Scale = TypeSize::Scalable(2);
2870  Width = SVEMaxBytesPerVector / 8;
2871  MinOffset = -256;
2872  MaxOffset = 255;
2873  break;
2874  case AArch64::LDR_ZXI:
2875  case AArch64::STR_ZXI:
2876  Scale = TypeSize::Scalable(16);
2877  Width = SVEMaxBytesPerVector;
2878  MinOffset = -256;
2879  MaxOffset = 255;
2880  break;
2881  case AArch64::LD1B_IMM:
2882  case AArch64::LD1H_IMM:
2883  case AArch64::LD1W_IMM:
2884  case AArch64::LD1D_IMM:
2885  case AArch64::LDNT1B_ZRI:
2886  case AArch64::LDNT1H_ZRI:
2887  case AArch64::LDNT1W_ZRI:
2888  case AArch64::LDNT1D_ZRI:
2889  case AArch64::ST1B_IMM:
2890  case AArch64::ST1H_IMM:
2891  case AArch64::ST1W_IMM:
2892  case AArch64::ST1D_IMM:
2893  case AArch64::STNT1B_ZRI:
2894  case AArch64::STNT1H_ZRI:
2895  case AArch64::STNT1W_ZRI:
2896  case AArch64::STNT1D_ZRI:
2897  case AArch64::LDNF1B_IMM:
2898  case AArch64::LDNF1H_IMM:
2899  case AArch64::LDNF1W_IMM:
2900  case AArch64::LDNF1D_IMM:
2901  // A full vectors worth of data
2902  // Width = mbytes * elements
2903  Scale = TypeSize::Scalable(16);
2904  Width = SVEMaxBytesPerVector;
2905  MinOffset = -8;
2906  MaxOffset = 7;
2907  break;
2908  case AArch64::LD2B_IMM:
2909  case AArch64::LD2H_IMM:
2910  case AArch64::LD2W_IMM:
2911  case AArch64::LD2D_IMM:
2912  case AArch64::ST2B_IMM:
2913  case AArch64::ST2H_IMM:
2914  case AArch64::ST2W_IMM:
2915  case AArch64::ST2D_IMM:
2916  Scale = TypeSize::Scalable(32);
2917  Width = SVEMaxBytesPerVector * 2;
2918  MinOffset = -8;
2919  MaxOffset = 7;
2920  break;
2921  case AArch64::LD3B_IMM:
2922  case AArch64::LD3H_IMM:
2923  case AArch64::LD3W_IMM:
2924  case AArch64::LD3D_IMM:
2925  case AArch64::ST3B_IMM:
2926  case AArch64::ST3H_IMM:
2927  case AArch64::ST3W_IMM:
2928  case AArch64::ST3D_IMM:
2929  Scale = TypeSize::Scalable(48);
2930  Width = SVEMaxBytesPerVector * 3;
2931  MinOffset = -8;
2932  MaxOffset = 7;
2933  break;
2934  case AArch64::LD4B_IMM:
2935  case AArch64::LD4H_IMM:
2936  case AArch64::LD4W_IMM:
2937  case AArch64::LD4D_IMM:
2938  case AArch64::ST4B_IMM:
2939  case AArch64::ST4H_IMM:
2940  case AArch64::ST4W_IMM:
2941  case AArch64::ST4D_IMM:
2942  Scale = TypeSize::Scalable(64);
2943  Width = SVEMaxBytesPerVector * 4;
2944  MinOffset = -8;
2945  MaxOffset = 7;
2946  break;
2947  case AArch64::LD1B_H_IMM:
2948  case AArch64::LD1SB_H_IMM:
2949  case AArch64::LD1H_S_IMM:
2950  case AArch64::LD1SH_S_IMM:
2951  case AArch64::LD1W_D_IMM:
2952  case AArch64::LD1SW_D_IMM:
2953  case AArch64::ST1B_H_IMM:
2954  case AArch64::ST1H_S_IMM:
2955  case AArch64::ST1W_D_IMM:
2956  case AArch64::LDNF1B_H_IMM:
2957  case AArch64::LDNF1SB_H_IMM:
2958  case AArch64::LDNF1H_S_IMM:
2959  case AArch64::LDNF1SH_S_IMM:
2960  case AArch64::LDNF1W_D_IMM:
2961  case AArch64::LDNF1SW_D_IMM:
2962  // A half vector worth of data
2963  // Width = mbytes * elements
2964  Scale = TypeSize::Scalable(8);
2965  Width = SVEMaxBytesPerVector / 2;
2966  MinOffset = -8;
2967  MaxOffset = 7;
2968  break;
2969  case AArch64::LD1B_S_IMM:
2970  case AArch64::LD1SB_S_IMM:
2971  case AArch64::LD1H_D_IMM:
2972  case AArch64::LD1SH_D_IMM:
2973  case AArch64::ST1B_S_IMM:
2974  case AArch64::ST1H_D_IMM:
2975  case AArch64::LDNF1B_S_IMM:
2976  case AArch64::LDNF1SB_S_IMM:
2977  case AArch64::LDNF1H_D_IMM:
2978  case AArch64::LDNF1SH_D_IMM:
2979  // A quarter vector worth of data
2980  // Width = mbytes * elements
2981  Scale = TypeSize::Scalable(4);
2982  Width = SVEMaxBytesPerVector / 4;
2983  MinOffset = -8;
2984  MaxOffset = 7;
2985  break;
2986  case AArch64::LD1B_D_IMM:
2987  case AArch64::LD1SB_D_IMM:
2988  case AArch64::ST1B_D_IMM:
2989  case AArch64::LDNF1B_D_IMM:
2990  case AArch64::LDNF1SB_D_IMM:
2991  // A eighth vector worth of data
2992  // Width = mbytes * elements
2993  Scale = TypeSize::Scalable(2);
2994  Width = SVEMaxBytesPerVector / 8;
2995  MinOffset = -8;
2996  MaxOffset = 7;
2997  break;
2998  case AArch64::ST2GOffset:
2999  case AArch64::STZ2GOffset:
3000  Scale = TypeSize::Fixed(16);
3001  Width = 32;
3002  MinOffset = -256;
3003  MaxOffset = 255;
3004  break;
3005  case AArch64::STGPi:
3006  Scale = TypeSize::Fixed(16);
3007  Width = 16;
3008  MinOffset = -64;
3009  MaxOffset = 63;
3010  break;
3011  case AArch64::LD1RB_IMM:
3012  case AArch64::LD1RB_H_IMM:
3013  case AArch64::LD1RB_S_IMM:
3014  case AArch64::LD1RB_D_IMM:
3015  case AArch64::LD1RSB_H_IMM:
3016  case AArch64::LD1RSB_S_IMM:
3017  case AArch64::LD1RSB_D_IMM:
3018  Scale = TypeSize::Fixed(1);
3019  Width = 1;
3020  MinOffset = 0;
3021  MaxOffset = 63;
3022  break;
3023  case AArch64::LD1RH_IMM:
3024  case AArch64::LD1RH_S_IMM:
3025  case AArch64::LD1RH_D_IMM:
3026  case AArch64::LD1RSH_S_IMM:
3027  case AArch64::LD1RSH_D_IMM:
3028  Scale = TypeSize::Fixed(2);
3029  Width = 2;
3030  MinOffset = 0;
3031  MaxOffset = 63;
3032  break;
3033  case AArch64::LD1RW_IMM:
3034  case AArch64::LD1RW_D_IMM:
3035  case AArch64::LD1RSW_IMM:
3036  Scale = TypeSize::Fixed(4);
3037  Width = 4;
3038  MinOffset = 0;
3039  MaxOffset = 63;
3040  break;
3041  case AArch64::LD1RD_IMM:
3042  Scale = TypeSize::Fixed(8);
3043  Width = 8;
3044  MinOffset = 0;
3045  MaxOffset = 63;
3046  break;
3047  }
3048 
3049  return true;
3050 }
3051 
3052 // Scaling factor for unscaled load or store.
3054  switch (Opc) {
3055  default:
3056  llvm_unreachable("Opcode has unknown scale!");
3057  case AArch64::LDRBBui:
3058  case AArch64::LDURBBi:
3059  case AArch64::LDRSBWui:
3060  case AArch64::LDURSBWi:
3061  case AArch64::STRBBui:
3062  case AArch64::STURBBi:
3063  return 1;
3064  case AArch64::LDRHHui:
3065  case AArch64::LDURHHi:
3066  case AArch64::LDRSHWui:
3067  case AArch64::LDURSHWi:
3068  case AArch64::STRHHui:
3069  case AArch64::STURHHi:
3070  return 2;
3071  case AArch64::LDRSui:
3072  case AArch64::LDURSi:
3073  case AArch64::LDRSpre:
3074  case AArch64::LDRSWui:
3075  case AArch64::LDURSWi:
3076  case AArch64::LDRWpre:
3077  case AArch64::LDRWui:
3078  case AArch64::LDURWi:
3079  case AArch64::STRSui:
3080  case AArch64::STURSi:
3081  case AArch64::STRSpre:
3082  case AArch64::STRWui:
3083  case AArch64::STURWi:
3084  case AArch64::STRWpre:
3085  case AArch64::LDPSi:
3086  case AArch64::LDPSWi:
3087  case AArch64::LDPWi:
3088  case AArch64::STPSi:
3089  case AArch64::STPWi:
3090  return 4;
3091  case AArch64::LDRDui:
3092  case AArch64::LDURDi:
3093  case AArch64::LDRDpre:
3094  case AArch64::LDRXui:
3095  case AArch64::LDURXi:
3096  case AArch64::LDRXpre:
3097  case AArch64::STRDui:
3098  case AArch64::STURDi:
3099  case AArch64::STRDpre:
3100  case AArch64::STRXui:
3101  case AArch64::STURXi:
3102  case AArch64::STRXpre:
3103  case AArch64::LDPDi:
3104  case AArch64::LDPXi:
3105  case AArch64::STPDi:
3106  case AArch64::STPXi:
3107  return 8;
3108  case AArch64::LDRQui:
3109  case AArch64::LDURQi:
3110  case AArch64::STRQui:
3111  case AArch64::STURQi:
3112  case AArch64::STRQpre:
3113  case AArch64::LDPQi:
3114  case AArch64::LDRQpre:
3115  case AArch64::STPQi:
3116  case AArch64::STGOffset:
3117  case AArch64::STZGOffset:
3118  case AArch64::ST2GOffset:
3119  case AArch64::STZ2GOffset:
3120  case AArch64::STGPi:
3121  return 16;
3122  }
3123 }
3124 
3126  switch (MI.getOpcode()) {
3127  default:
3128  return false;
3129  case AArch64::LDRWpre:
3130  case AArch64::LDRXpre:
3131  case AArch64::LDRSpre:
3132  case AArch64::LDRDpre:
3133  case AArch64::LDRQpre:
3134  return true;
3135  }
3136 }
3137 
3139  switch (MI.getOpcode()) {
3140  default:
3141  return false;
3142  case AArch64::STRWpre:
3143  case AArch64::STRXpre:
3144  case AArch64::STRSpre:
3145  case AArch64::STRDpre:
3146  case AArch64::STRQpre:
3147  return true;
3148  }
3149 }
3150 
3152  return isPreLd(MI) || isPreSt(MI);
3153 }
3154 
3156  switch (MI.getOpcode()) {
3157  default:
3158  return false;
3159  case AArch64::LDPSi:
3160  case AArch64::LDPSWi:
3161  case AArch64::LDPDi:
3162  case AArch64::LDPQi:
3163  case AArch64::LDPWi:
3164  case AArch64::LDPXi:
3165  case AArch64::STPSi:
3166  case AArch64::STPDi:
3167  case AArch64::STPQi:
3168  case AArch64::STPWi:
3169  case AArch64::STPXi:
3170  case AArch64::STGPi:
3171  return true;
3172  }
3173 }
3174 
3176  unsigned Idx =
3178  : 1;
3179  return MI.getOperand(Idx);
3180 }
3181 
3182 const MachineOperand &
3184  unsigned Idx =
3186  : 2;
3187  return MI.getOperand(Idx);
3188 }
3189 
3191  Register Reg) {
3192  if (MI.getParent() == nullptr)
3193  return nullptr;
3194  const MachineFunction *MF = MI.getParent()->getParent();
3195  return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
3196 }
3197 
3199  auto IsQFPR = [&](const MachineOperand &Op) {
3200  if (!Op.isReg())
3201  return false;
3202  auto Reg = Op.getReg();
3203  if (Reg.isPhysical())
3204  return AArch64::FPR128RegClass.contains(Reg);
3205  const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
3206  return TRC == &AArch64::FPR128RegClass ||
3207  TRC == &AArch64::FPR128_loRegClass;
3208  };
3209  return llvm::any_of(MI.operands(), IsQFPR);
3210 }
3211 
3213  auto IsFPR = [&](const MachineOperand &Op) {
3214  if (!Op.isReg())
3215  return false;
3216  auto Reg = Op.getReg();
3217  if (Reg.isPhysical())
3218  return AArch64::FPR128RegClass.contains(Reg) ||
3219  AArch64::FPR64RegClass.contains(Reg) ||
3220  AArch64::FPR32RegClass.contains(Reg) ||
3221  AArch64::FPR16RegClass.contains(Reg) ||
3222  AArch64::FPR8RegClass.contains(Reg);
3223 
3224  const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
3225  return TRC == &AArch64::FPR128RegClass ||
3226  TRC == &AArch64::FPR128_loRegClass ||
3227  TRC == &AArch64::FPR64RegClass ||
3228  TRC == &AArch64::FPR64_loRegClass ||
3229  TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
3230  TRC == &AArch64::FPR8RegClass;
3231  };
3232  return llvm::any_of(MI.operands(), IsFPR);
3233 }
3234 
3235 // Scale the unscaled offsets. Returns false if the unscaled offset can't be
3236 // scaled.
3237 static bool scaleOffset(unsigned Opc, int64_t &Offset) {
3238  int Scale = AArch64InstrInfo::getMemScale(Opc);
3239 
3240  // If the byte-offset isn't a multiple of the stride, we can't scale this
3241  // offset.
3242  if (Offset % Scale != 0)
3243  return false;
3244 
3245  // Convert the byte-offset used by unscaled into an "element" offset used
3246  // by the scaled pair load/store instructions.
3247  Offset /= Scale;
3248  return true;
3249 }
3250 
3251 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
3252  if (FirstOpc == SecondOpc)
3253  return true;
3254  // We can also pair sign-ext and zero-ext instructions.
3255  switch (FirstOpc) {
3256  default:
3257  return false;
3258  case AArch64::LDRWui:
3259  case AArch64::LDURWi:
3260  return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
3261  case AArch64::LDRSWui:
3262  case AArch64::LDURSWi:
3263  return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
3264  }
3265  // These instructions can't be paired based on their opcodes.
3266  return false;
3267 }
3268 
3269 static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
3270  int64_t Offset1, unsigned Opcode1, int FI2,
3271  int64_t Offset2, unsigned Opcode2) {
3272  // Accesses through fixed stack object frame indices may access a different
3273  // fixed stack slot. Check that the object offsets + offsets match.
3274  if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
3275  int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
3276  int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
3277  assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
3278  // Convert to scaled object offsets.
3279  int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
3280  if (ObjectOffset1 % Scale1 != 0)
3281  return false;
3282  ObjectOffset1 /= Scale1;
3283  int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
3284  if (ObjectOffset2 % Scale2 != 0)
3285  return false;
3286  ObjectOffset2 /= Scale2;
3287  ObjectOffset1 += Offset1;
3288  ObjectOffset2 += Offset2;
3289  return ObjectOffset1 + 1 == ObjectOffset2;
3290  }
3291 
3292  return FI1 == FI2;
3293 }
3294 
3295 /// Detect opportunities for ldp/stp formation.
3296 ///
3297 /// Only called for LdSt for which getMemOperandWithOffset returns true.
3300  ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads,
3301  unsigned NumBytes) const {
3302  assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
3303  const MachineOperand &BaseOp1 = *BaseOps1.front();
3304  const MachineOperand &BaseOp2 = *BaseOps2.front();
3305  const MachineInstr &FirstLdSt = *BaseOp1.getParent();
3306  const MachineInstr &SecondLdSt = *BaseOp2.getParent();
3307  if (BaseOp1.getType() != BaseOp2.getType())
3308  return false;
3309 
3310  assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
3311  "Only base registers and frame indices are supported.");
3312 
3313  // Check for both base regs and base FI.
3314  if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
3315  return false;
3316 
3317  // Only cluster up to a single pair.
3318  if (NumLoads > 2)
3319  return false;
3320 
3321  if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
3322  return false;
3323 
3324  // Can we pair these instructions based on their opcodes?
3325  unsigned FirstOpc = FirstLdSt.getOpcode();
3326  unsigned SecondOpc = SecondLdSt.getOpcode();
3327  if (!canPairLdStOpc(FirstOpc, SecondOpc))
3328  return false;
3329 
3330  // Can't merge volatiles or load/stores that have a hint to avoid pair
3331  // formation, for example.
3332  if (!isCandidateToMergeOrPair(FirstLdSt) ||
3333  !isCandidateToMergeOrPair(SecondLdSt))
3334  return false;
3335 
3336  // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
3337  int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
3338  if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
3339  return false;
3340 
3341  int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
3342  if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
3343  return false;
3344 
3345  // Pairwise instructions have a 7-bit signed offset field.
3346  if (Offset1 > 63 || Offset1 < -64)
3347  return false;
3348 
3349  // The caller should already have ordered First/SecondLdSt by offset.
3350  // Note: except for non-equal frame index bases
3351  if (BaseOp1.isFI()) {
3352  assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
3353  "Caller should have ordered offsets.");
3354 
3355  const MachineFrameInfo &MFI =
3356  FirstLdSt.getParent()->getParent()->getFrameInfo();
3357  return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
3358  BaseOp2.getIndex(), Offset2, SecondOpc);
3359  }
3360 
3361  assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
3362 
3363  return Offset1 + 1 == Offset2;
3364 }
3365 
3367  unsigned Reg, unsigned SubIdx,
3368  unsigned State,
3369  const TargetRegisterInfo *TRI) {
3370  if (!SubIdx)
3371  return MIB.addReg(Reg, State);
3372 
3374  return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
3375  return MIB.addReg(Reg, State, SubIdx);
3376 }
3377 
3378 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
3379  unsigned NumRegs) {
3380  // We really want the positive remainder mod 32 here, that happens to be
3381  // easily obtainable with a mask.
3382  return ((DestReg - SrcReg) & 0x1f) < NumRegs;
3383 }
3384 
3387  const DebugLoc &DL, MCRegister DestReg,
3388  MCRegister SrcReg, bool KillSrc,
3389  unsigned Opcode,
3390  ArrayRef<unsigned> Indices) const {
3391  assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
3393  uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
3394  uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
3395  unsigned NumRegs = Indices.size();
3396 
3397  int SubReg = 0, End = NumRegs, Incr = 1;
3398  if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
3399  SubReg = NumRegs - 1;
3400  End = -1;
3401  Incr = -1;
3402  }
3403 
3404  for (; SubReg != End; SubReg += Incr) {
3405  const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
3406  AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
3407  AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
3408  AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
3409  }
3410 }
3411 
3414  DebugLoc DL, unsigned DestReg,
3415  unsigned SrcReg, bool KillSrc,
3416  unsigned Opcode, unsigned ZeroReg,
3417  llvm::ArrayRef<unsigned> Indices) const {
3419  unsigned NumRegs = Indices.size();
3420 
3421 #ifndef NDEBUG
3422  uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
3423  uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
3424  assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
3425  "GPR reg sequences should not be able to overlap");
3426 #endif
3427 
3428  for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
3429  const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
3430  AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
3431  MIB.addReg(ZeroReg);
3432  AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
3433  MIB.addImm(0);
3434  }
3435 }
3436 
3439  const DebugLoc &DL, MCRegister DestReg,
3440  MCRegister SrcReg, bool KillSrc) const {
3441  if (AArch64::GPR32spRegClass.contains(DestReg) &&
3442  (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
3444 
3445  if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
3446  // If either operand is WSP, expand to ADD #0.
3447  if (Subtarget.hasZeroCycleRegMove()) {
3448  // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
3449  MCRegister DestRegX = TRI->getMatchingSuperReg(
3450  DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
3451  MCRegister SrcRegX = TRI->getMatchingSuperReg(
3452  SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
3453  // This instruction is reading and writing X registers. This may upset
3454  // the register scavenger and machine verifier, so we need to indicate
3455  // that we are reading an undefined value from SrcRegX, but a proper
3456  // value from SrcReg.
3457  BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
3458  .addReg(SrcRegX, RegState::Undef)
3459  .addImm(0)
3461  .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
3462  } else {
3463  BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
3464  .addReg(SrcReg, getKillRegState(KillSrc))
3465  .addImm(0)
3467  }
3468  } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
3469  BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
3470  .addImm(0)
3472  } else {
3473  if (Subtarget.hasZeroCycleRegMove()) {
3474  // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
3475  MCRegister DestRegX = TRI->getMatchingSuperReg(
3476  DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
3477  MCRegister SrcRegX = TRI->getMatchingSuperReg(
3478  SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
3479  // This instruction is reading and writing X registers. This may upset
3480  // the register scavenger and machine verifier, so we need to indicate
3481  // that we are reading an undefined value from SrcRegX, but a proper
3482  // value from SrcReg.
3483  BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
3484  .addReg(AArch64::XZR)
3485  .addReg(SrcRegX, RegState::Undef)
3486  .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
3487  } else {
3488  // Otherwise, expand to ORR WZR.
3489  BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
3490  .addReg(AArch64::WZR)
3491  .addReg(SrcReg, getKillRegState(KillSrc));
3492  }
3493  }
3494  return;
3495  }
3496 
3497  // Copy a Predicate register by ORRing with itself.
3498  if (AArch64::PPRRegClass.contains(DestReg) &&
3499  AArch64::PPRRegClass.contains(SrcReg)) {
3500  assert((Subtarget.hasSVE() || Subtarget.hasSME()) &&
3501  "Unexpected SVE register.");
3502  BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
3503  .addReg(SrcReg) // Pg
3504  .addReg(SrcReg)
3505  .addReg(SrcReg, getKillRegState(KillSrc));
3506  return;
3507  }
3508 
3509  // Copy a Z register by ORRing with itself.
3510  if (AArch64::ZPRRegClass.contains(DestReg) &&
3511  AArch64::ZPRRegClass.contains(SrcReg)) {
3512  assert((Subtarget.hasSVE() || Subtarget.hasSME()) &&
3513  "Unexpected SVE register.");
3514  BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
3515  .addReg(SrcReg)
3516  .addReg(SrcReg, getKillRegState(KillSrc));
3517  return;
3518  }
3519 
3520  // Copy a Z register pair by copying the individual sub-registers.
3521  if (AArch64::ZPR2RegClass.contains(DestReg) &&
3522  AArch64::ZPR2RegClass.contains(SrcReg)) {
3523  assert((Subtarget.hasSVE() || Subtarget.hasSME()) &&
3524  "Unexpected SVE register.");
3525  static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
3526  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
3527  Indices);
3528  return;
3529  }
3530 
3531  // Copy a Z register triple by copying the individual sub-registers.
3532  if (AArch64::ZPR3RegClass.contains(DestReg) &&
3533  AArch64::ZPR3RegClass.contains(SrcReg)) {
3534  assert((Subtarget.hasSVE() || Subtarget.hasSME()) &&
3535  "Unexpected SVE register.");
3536  static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
3537  AArch64::zsub2};
3538  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
3539  Indices);
3540  return;
3541  }
3542 
3543  // Copy a Z register quad by copying the individual sub-registers.
3544  if (AArch64::ZPR4RegClass.contains(DestReg) &&
3545  AArch64::ZPR4RegClass.contains(SrcReg)) {
3546  assert((Subtarget.hasSVE() || Subtarget.hasSME()) &&
3547  "Unexpected SVE register.");
3548  static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
3549  AArch64::zsub2, AArch64::zsub3};
3550  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
3551  Indices);
3552  return;
3553  }
3554 
3555  if (AArch64::GPR64spRegClass.contains(DestReg) &&
3556  (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
3557  if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
3558  // If either operand is SP, expand to ADD #0.
3559  BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
3560  .addReg(SrcReg, getKillRegState(KillSrc))
3561  .addImm(0)
3563  } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
3564  BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
3565  .addImm(0)
3567  } else {
3568  // Otherwise, expand to ORR XZR.
3569  BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
3570  .addReg(AArch64::XZR)
3571  .addReg(SrcReg, getKillRegState(KillSrc));
3572  }
3573  return;
3574  }
3575 
3576  // Copy a DDDD register quad by copying the individual sub-registers.
3577  if (AArch64::DDDDRegClass.contains(DestReg) &&
3578  AArch64::DDDDRegClass.contains(SrcReg)) {
3579  static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
3580  AArch64::dsub2, AArch64::dsub3};
3581  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
3582  Indices);
3583  return;
3584  }
3585 
3586  // Copy a DDD register triple by copying the individual sub-registers.
3587  if (AArch64::DDDRegClass.contains(DestReg) &&
3588  AArch64::DDDRegClass.contains(SrcReg)) {
3589  static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
3590  AArch64::dsub2};
3591  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
3592  Indices);
3593  return;
3594  }
3595 
3596  // Copy a DD register pair by copying the individual sub-registers.
3597  if (AArch64::DDRegClass.contains(DestReg) &&
3598  AArch64::DDRegClass.contains(SrcReg)) {
3599  static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
3600  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
3601  Indices);
3602  return;
3603  }
3604 
3605  // Copy a QQQQ register quad by copying the individual sub-registers.
3606  if (AArch64::QQQQRegClass.contains(DestReg) &&
3607  AArch64::QQQQRegClass.contains(SrcReg)) {
3608  static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
3609  AArch64::qsub2, AArch64::qsub3};
3610  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
3611  Indices);
3612  return;
3613  }
3614 
3615  // Copy a QQQ register triple by copying the individual sub-registers.
3616  if (AArch64::QQQRegClass.contains(DestReg) &&
3617  AArch64::QQQRegClass.contains(SrcReg)) {
3618  static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
3619  AArch64::qsub2};
3620  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
3621  Indices);
3622  return;
3623  }
3624 
3625  // Copy a QQ register pair by copying the individual sub-registers.
3626  if (AArch64::QQRegClass.contains(DestReg) &&
3627  AArch64::QQRegClass.contains(SrcReg)) {
3628  static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
3629  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
3630  Indices);
3631  return;
3632  }
3633 
3634  if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
3635  AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
3636  static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
3637  copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
3638  AArch64::XZR, Indices);
3639  return;
3640  }
3641 
3642  if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
3643  AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
3644  static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
3645  copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
3646  AArch64::WZR, Indices);
3647  return;
3648  }
3649 
3650  if (AArch64::FPR128RegClass.contains(DestReg) &&
3651  AArch64::FPR128RegClass.contains(SrcReg)) {
3652  if (Subtarget.hasNEON()) {
3653  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
3654  .addReg(SrcReg)
3655  .addReg(SrcReg, getKillRegState(KillSrc));
3656  } else {
3657  BuildMI(MBB, I, DL, get(AArch64::STRQpre))
3658  .addReg(AArch64::SP, RegState::Define)
3659  .addReg(SrcReg, getKillRegState(KillSrc))
3660  .addReg(AArch64::SP)
3661  .addImm(-16);
3662  BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
3663  .addReg(AArch64::SP, RegState::Define)
3664  .addReg(DestReg, RegState::Define)
3665  .addReg(AArch64::SP)
3666  .addImm(16);
3667  }
3668  return;
3669  }
3670 
3671  if (AArch64::FPR64RegClass.contains(DestReg) &&
3672  AArch64::FPR64RegClass.contains(SrcReg)) {
3673  BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
3674  .addReg(SrcReg, getKillRegState(KillSrc));
3675  return;
3676  }
3677 
3678  if (AArch64::FPR32RegClass.contains(DestReg) &&
3679  AArch64::FPR32RegClass.contains(SrcReg)) {
3680  BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
3681  .addReg(SrcReg, getKillRegState(KillSrc));
3682  return;
3683  }
3684 
3685  if (AArch64::FPR16RegClass.contains(DestReg) &&
3686  AArch64::FPR16RegClass.contains(SrcReg)) {
3687  DestReg =
3688  RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass);
3689  SrcReg =
3690  RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass);
3691  BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
3692  .addReg(SrcReg, getKillRegState(KillSrc));
3693  return;
3694  }
3695 
3696  if (AArch64::FPR8RegClass.contains(DestReg) &&
3697  AArch64::FPR8RegClass.contains(SrcReg)) {
3698  DestReg =
3699  RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass);
3700  SrcReg =
3701  RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass);
3702  BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
3703  .addReg(SrcReg, getKillRegState(KillSrc));
3704  return;
3705  }
3706 
3707  // Copies between GPR64 and FPR64.
3708  if (AArch64::FPR64RegClass.contains(DestReg) &&
3709  AArch64::GPR64RegClass.contains(SrcReg)) {
3710  BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
3711  .addReg(SrcReg, getKillRegState(KillSrc));
3712  return;
3713  }
3714  if (AArch64::GPR64RegClass.contains(DestReg) &&
3715  AArch64::FPR64RegClass.contains(SrcReg)) {
3716  BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
3717  .addReg(SrcReg, getKillRegState(KillSrc));
3718  return;
3719  }
3720  // Copies between GPR32 and FPR32.
3721  if (AArch64::FPR32RegClass.contains(DestReg) &&
3722  AArch64::GPR32RegClass.contains(SrcReg)) {
3723  BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
3724  .addReg(SrcReg, getKillRegState(KillSrc));
3725  return;
3726  }
3727  if (AArch64::GPR32RegClass.contains(DestReg) &&
3728  AArch64::FPR32RegClass.contains(SrcReg)) {
3729  BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
3730  .addReg(SrcReg, getKillRegState(KillSrc));
3731  return;
3732  }
3733 
3734  if (DestReg == AArch64::NZCV) {
3735  assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
3736  BuildMI(MBB, I, DL, get(AArch64::MSR))
3737  .addImm(AArch64SysReg::NZCV)
3738  .addReg(SrcReg, getKillRegState(KillSrc))
3739  .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
3740  return;
3741  }
3742 
3743  if (SrcReg == AArch64::NZCV) {
3744  assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
3745  BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
3746  .addImm(AArch64SysReg::NZCV)
3747  .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
3748  return;
3749  }
3750 
3751 #ifndef NDEBUG
3753  errs() << TRI.getRegAsmName(DestReg) << " = COPY "
3754  << TRI.getRegAsmName(SrcReg) << "\n";
3755 #endif
3756  llvm_unreachable("unimplemented reg-to-reg copy");
3757 }
3758 
3761  MachineBasicBlock::iterator InsertBefore,
3762  const MCInstrDesc &MCID,
3763  Register SrcReg, bool IsKill,
3764  unsigned SubIdx0, unsigned SubIdx1, int FI,
3765  MachineMemOperand *MMO) {
3766  Register SrcReg0 = SrcReg;
3767  Register SrcReg1 = SrcReg;
3768  if (Register::isPhysicalRegister(SrcReg)) {
3769  SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
3770  SubIdx0 = 0;
3771  SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
3772  SubIdx1 = 0;
3773  }
3774  BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
3775  .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
3776  .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
3777  .addFrameIndex(FI)
3778  .addImm(0)
3779  .addMemOperand(MMO);
3780 }
3781 
3784  bool isKill, int FI, const TargetRegisterClass *RC,
3785  const TargetRegisterInfo *TRI) const {
3786  MachineFunction &MF = *MBB.getParent();
3787  MachineFrameInfo &MFI = MF.getFrameInfo();
3788 
3790  MachineMemOperand *MMO =
3792  MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
3793  unsigned Opc = 0;
3794  bool Offset = true;
3795  unsigned StackID = TargetStackID::Default;
3796  switch (TRI->getSpillSize(*RC)) {
3797  case 1:
3798  if (AArch64::FPR8RegClass.hasSubClassEq(RC))
3799  Opc = AArch64::STRBui;
3800  break;
3801  case 2:
3802  if (AArch64::FPR16RegClass.hasSubClassEq(RC))
3803  Opc = AArch64::STRHui;
3804  else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
3805  assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
3806  Opc = AArch64::STR_PXI;
3808  }
3809  break;
3810  case 4:
3811  if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
3812  Opc = AArch64::STRWui;
3813  if (Register::isVirtualRegister(SrcReg))
3814  MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
3815  else
3816  assert(SrcReg != AArch64::WSP);
3817  } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
3818  Opc = AArch64::STRSui;
3819  break;
3820  case 8:
3821  if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
3822  Opc = AArch64::STRXui;
3823  if (Register::isVirtualRegister(SrcReg))
3824  MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
3825  else
3826  assert(SrcReg != AArch64::SP);
3827  } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
3828  Opc = AArch64::STRDui;
3829  } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
3831  get(AArch64::STPWi), SrcReg, isKill,
3832  AArch64::sube32, AArch64::subo32, FI, MMO);
3833  return;
3834  }
3835  break;
3836  case 16:
3837  if (AArch64::FPR128RegClass.hasSubClassEq(RC))
3838  Opc = AArch64::STRQui;
3839  else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
3840  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
3841  Opc = AArch64::ST1Twov1d;
3842  Offset = false;
3843  } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
3845  get(AArch64::STPXi), SrcReg, isKill,
3846  AArch64::sube64, AArch64::subo64, FI, MMO);
3847  return;
3848  } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
3849  assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
3850  Opc = AArch64::STR_ZXI;
3852  }
3853  break;
3854  case 24:
3855  if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
3856  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
3857  Opc = AArch64::ST1Threev1d;
3858  Offset = false;
3859  }
3860  break;
3861  case 32:
3862  if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
3863  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
3864  Opc = AArch64::ST1Fourv1d;
3865  Offset = false;
3866  } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
3867  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
3868  Opc = AArch64::ST1Twov2d;
3869  Offset = false;
3870  } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
3871  assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
3872  Opc = AArch64::STR_ZZXI;
3874  }
3875  break;
3876  case 48:
3877  if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
3878  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
3879  Opc = AArch64::ST1Threev2d;
3880  Offset = false;
3881  } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
3882  assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
3883  Opc = AArch64::STR_ZZZXI;
3885  }
3886  break;
3887  case 64:
3888  if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
3889  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
3890  Opc = AArch64::ST1Fourv2d;
3891  Offset = false;
3892  } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
3893  assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
3894  Opc = AArch64::STR_ZZZZXI;
3896  }
3897  break;
3898  }
3899  assert(Opc && "Unknown register class");
3900  MFI.setStackID(FI, StackID);
3901 
3902  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
3903  .addReg(SrcReg, getKillRegState(isKill))
3904  .addFrameIndex(FI);
3905 
3906  if (Offset)
3907  MI.addImm(0);
3908  MI.addMemOperand(MMO);
3909 }
3910 
3913  MachineBasicBlock::iterator InsertBefore,
3914  const MCInstrDesc &MCID,
3915  Register DestReg, unsigned SubIdx0,
3916  unsigned SubIdx1, int FI,
3917  MachineMemOperand *MMO) {
3918  Register DestReg0 = DestReg;
3919  Register DestReg1 = DestReg;
3920  bool IsUndef = true;
3921  if (Register::isPhysicalRegister(DestReg)) {
3922  DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
3923  SubIdx0 = 0;
3924  DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
3925  SubIdx1 = 0;
3926  IsUndef = false;
3927  }
3928  BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
3929  .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
3930  .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
3931  .addFrameIndex(FI)
3932  .addImm(0)
3933  .addMemOperand(MMO);
3934 }
3935 
3938  int FI, const TargetRegisterClass *RC,
3939  const TargetRegisterInfo *TRI) const {
3940  MachineFunction &MF = *MBB.getParent();
3941  MachineFrameInfo &MFI = MF.getFrameInfo();
3943  MachineMemOperand *MMO =
3945  MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
3946 
3947  unsigned Opc = 0;
3948  bool Offset = true;
3949  unsigned StackID = TargetStackID::Default;
3950  switch (TRI->getSpillSize(*RC)) {
3951  case 1:
3952  if (AArch64::FPR8RegClass.hasSubClassEq(RC))
3953  Opc = AArch64::LDRBui;
3954  break;
3955  case 2:
3956  if (AArch64::FPR16RegClass.hasSubClassEq(RC))
3957  Opc = AArch64::LDRHui;
3958  else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
3959  assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
3960  Opc = AArch64::LDR_PXI;
3962  }
3963  break;
3964  case 4:
3965  if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
3966  Opc = AArch64::LDRWui;
3967  if (Register::isVirtualRegister(DestReg))
3968  MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
3969  else
3970  assert(DestReg != AArch64::WSP);
3971  } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
3972  Opc = AArch64::LDRSui;
3973  break;
3974  case 8:
3975  if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
3976  Opc = AArch64::LDRXui;
3977  if (Register::isVirtualRegister(DestReg))
3978  MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
3979  else
3980  assert(DestReg != AArch64::SP);
3981  } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
3982  Opc = AArch64::LDRDui;
3983  } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
3985  get(AArch64::LDPWi), DestReg, AArch64::sube32,
3986  AArch64::subo32, FI, MMO);
3987  return;
3988  }
3989  break;
3990  case 16:
3991  if (AArch64::FPR128RegClass.hasSubClassEq(RC))
3992  Opc = AArch64::LDRQui;
3993  else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
3994  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3995  Opc = AArch64::LD1Twov1d;
3996  Offset = false;
3997  } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
3999  get(AArch64::LDPXi), DestReg, AArch64::sube64,
4000  AArch64::subo64, FI, MMO);
4001  return;
4002  } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
4003  assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
4004  Opc = AArch64::LDR_ZXI;
4006  }
4007  break;
4008  case 24:
4009  if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
4010  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
4011  Opc = AArch64::LD1Threev1d;
4012  Offset = false;
4013  }
4014  break;
4015  case 32:
4016  if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
4017  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
4018  Opc = AArch64::LD1Fourv1d;
4019  Offset = false;
4020  } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
4021  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
4022  Opc = AArch64::LD1Twov2d;
4023  Offset = false;
4024  } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
4025  assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
4026  Opc = AArch64::LDR_ZZXI;
4028  }
4029  break;
4030  case 48:
4031  if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
4032  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
4033  Opc = AArch64::LD1Threev2d;
4034  Offset = false;
4035  } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
4036  assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
4037  Opc = AArch64::LDR_ZZZXI;
4039  }
4040  break;
4041  case 64:
4042  if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
4043  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
4044  Opc = AArch64::LD1Fourv2d;
4045  Offset = false;
4046  } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
4047  assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
4048  Opc = AArch64::LDR_ZZZZXI;
4050  }
4051  break;
4052  }
4053 
4054  assert(Opc && "Unknown register class");
4055  MFI.setStackID(FI, StackID);
4056 
4057  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
4058  .addReg(DestReg, getDefRegState(true))
4059  .addFrameIndex(FI);
4060  if (Offset)
4061  MI.addImm(0);
4062  MI.addMemOperand(MMO);
4063 }
4064 
4066  const MachineInstr &UseMI,
4067  const TargetRegisterInfo *TRI) {
4068  return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
4069  UseMI.getIterator()),
4070  [TRI](const MachineInstr &I) {
4071  return I.modifiesRegister(AArch64::NZCV, TRI) ||
4072  I.readsRegister(AArch64::NZCV, TRI);
4073  });
4074 }
4075 
4077  const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
4078  // The smallest scalable element supported by scaled SVE addressing
4079  // modes are predicates, which are 2 scalable bytes in size. So the scalable
4080  // byte offset must always be a multiple of 2.
4081  assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
4082 
4083  // VGSized offsets are divided by '2', because the VG register is the
4084  // the number of 64bit granules as opposed to 128bit vector chunks,
4085  // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
4086  // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
4087  // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
4088  ByteSized = Offset.getFixed();
4089  VGSized = Offset.getScalable() / 2;
4090 }
4091 
4092 /// Returns the offset in parts to which this frame offset can be
4093 /// decomposed for the purpose of describing a frame offset.
4094 /// For non-scalable offsets this is simply its byte size.
4096  const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
4097  int64_t &NumDataVectors) {
4098  // The smallest scalable element supported by scaled SVE addressing
4099  // modes are predicates, which are 2 scalable bytes in size. So the scalable
4100  // byte offset must always be a multiple of 2.
4101  assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
4102 
4103  NumBytes = Offset.getFixed();
4104  NumDataVectors = 0;
4105  NumPredicateVectors = Offset.getScalable() / 2;
4106  // This method is used to get the offsets to adjust the frame offset.
4107  // If the function requires ADDPL to be used and needs more than two ADDPL
4108  // instructions, part of the offset is folded into NumDataVectors so that it
4109  // uses ADDVL for part of it, reducing the number of ADDPL instructions.
4110  if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
4111  NumPredicateVectors > 62) {
4112  NumDataVectors = NumPredicateVectors / 8;
4113  NumPredicateVectors -= NumDataVectors * 8;
4114  }
4115 }
4116 
4117 // Convenience function to create a DWARF expression for
4118 // Expr + NumBytes + NumVGScaledBytes * AArch64::VG
4119 static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr, int NumBytes,
4120  int NumVGScaledBytes, unsigned VG,
4121  llvm::raw_string_ostream &Comment) {
4122  uint8_t buffer[16];
4123 
4124  if (NumBytes) {
4125  Expr.push_back(dwarf::DW_OP_consts);
4126  Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer));
4127  Expr.push_back((uint8_t)dwarf::DW_OP_plus);
4128  Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
4129  }
4130 
4131  if (NumVGScaledBytes) {
4132  Expr.push_back((uint8_t)dwarf::DW_OP_consts);
4133  Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer));
4134 
4135  Expr.push_back((uint8_t)dwarf::DW_OP_bregx);
4136  Expr.append(buffer, buffer + encodeULEB128(VG, buffer));
4137  Expr.push_back(0);
4138 
4139  Expr.push_back((uint8_t)dwarf::DW_OP_mul);
4140  Expr.push_back((uint8_t)dwarf::DW_OP_plus);
4141 
4142  Comment << (NumVGScaledBytes < 0 ? " - " : " + ")
4143  << std::abs(NumVGScaledBytes) << " * VG";
4144  }
4145 }
4146 
4147 // Creates an MCCFIInstruction:
4148 // { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
4150  unsigned Reg,
4151  const StackOffset &Offset) {
4152  int64_t NumBytes, NumVGScaledBytes;
4154  NumVGScaledBytes);
4155  std::string CommentBuffer;
4156  llvm::raw_string_ostream Comment(CommentBuffer);
4157 
4158  if (Reg == AArch64::SP)
4159  Comment << "sp";
4160  else if (Reg == AArch64::FP)
4161  Comment << "fp";
4162  else
4163  Comment << printReg(Reg, &TRI);
4164 
4165  // Build up the expression (Reg + NumBytes + NumVGScaledBytes * AArch64::VG)
4166  SmallString<64> Expr;
4167  unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
4168  Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
4169  Expr.push_back(0);
4170  appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes,
4171  TRI.getDwarfRegNum(AArch64::VG, true), Comment);
4172 
4173  // Wrap this into DW_CFA_def_cfa.
4174  SmallString<64> DefCfaExpr;
4175  DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
4176  uint8_t buffer[16];
4177  DefCfaExpr.append(buffer, buffer + encodeULEB128(Expr.size(), buffer));
4178  DefCfaExpr.append(Expr.str());
4179  return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(),
4180  Comment.str());
4181 }
4182 
4184  unsigned FrameReg, unsigned Reg,
4185  const StackOffset &Offset,
4186  bool LastAdjustmentWasScalable) {
4187  if (Offset.getScalable())
4188  return createDefCFAExpression(TRI, Reg, Offset);
4189 
4190  if (FrameReg == Reg && !LastAdjustmentWasScalable)
4191  return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed()));
4192 
4193  unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
4194  return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed());
4195 }
4196 
4198  unsigned Reg,
4199  const StackOffset &OffsetFromDefCFA) {
4200  int64_t NumBytes, NumVGScaledBytes;
4202  OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
4203 
4204  unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
4205 
4206  // Non-scalable offsets can use DW_CFA_offset directly.
4207  if (!NumVGScaledBytes)
4208  return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
4209 
4210  std::string CommentBuffer;
4211  llvm::raw_string_ostream Comment(CommentBuffer);
4212  Comment << printReg(Reg, &TRI) << " @ cfa";
4213 
4214  // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG)
4215  SmallString<64> OffsetExpr;
4216  appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes,
4217  TRI.getDwarfRegNum(AArch64::VG, true), Comment);
4218 
4219  // Wrap this into DW_CFA_expression
4220  SmallString<64> CfaExpr;
4221  CfaExpr.push_back(dwarf::DW_CFA_expression);
4222  uint8_t buffer[16];
4223  CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
4224  CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer));
4225  CfaExpr.append(OffsetExpr.str());
4226 
4227  return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), Comment.str());
4228 }
4229 
4230 // Helper function to emit a frame offset adjustment from a given
4231 // pointer (SrcReg), stored into DestReg. This function is explicit
4232 // in that it requires the opcode.
4235  const DebugLoc &DL, unsigned DestReg,
4236  unsigned SrcReg, int64_t Offset, unsigned Opc,
4237  const TargetInstrInfo *TII,
4238  MachineInstr::MIFlag Flag, bool NeedsWinCFI,
4239  bool *HasWinCFI, bool EmitCFAOffset,
4240  StackOffset CFAOffset, unsigned FrameReg) {
4241  int Sign = 1;
4242  unsigned MaxEncoding, ShiftSize;
4243  switch (Opc) {
4244  case AArch64::ADDXri:
4245  case AArch64::ADDSXri:
4246  case AArch64::SUBXri:
4247  case AArch64::SUBSXri:
4248  MaxEncoding = 0xfff;
4249  ShiftSize = 12;
4250  break;
4251  case AArch64::ADDVL_XXI:
4252  case AArch64::ADDPL_XXI:
4253  MaxEncoding = 31;
4254  ShiftSize = 0;
4255  if (Offset < 0) {
4256  MaxEncoding = 32;
4257  Sign = -1;
4258  Offset = -Offset;
4259  }
4260  break;
4261  default:
4262  llvm_unreachable("Unsupported opcode");
4263  }
4264 
4265  // `Offset` can be in bytes or in "scalable bytes".
4266  int VScale = 1;
4267  if (Opc == AArch64::ADDVL_XXI)
4268  VScale = 16;
4269  else if (Opc == AArch64::ADDPL_XXI)
4270  VScale = 2;
4271 
4272  // FIXME: If the offset won't fit in 24-bits, compute the offset into a
4273  // scratch register. If DestReg is a virtual register, use it as the
4274  // scratch register; otherwise, create a new virtual register (to be
4275  // replaced by the scavenger at the end of PEI). That case can be optimized
4276  // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
4277  // register can be loaded with offset%8 and the add/sub can use an extending
4278  // instruction with LSL#3.
4279  // Currently the function handles any offsets but generates a poor sequence
4280  // of code.
4281  // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
4282 
4283  const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
4284  Register TmpReg = DestReg;
4285  if (TmpReg == AArch64::XZR)
4287  &AArch64::GPR64RegClass);
4288  do {
4289  uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
4290  unsigned LocalShiftSize = 0;
4291  if (ThisVal > MaxEncoding) {
4292  ThisVal = ThisVal >> ShiftSize;
4293  LocalShiftSize = ShiftSize;
4294  }
4295  assert((ThisVal >> ShiftSize) <= MaxEncoding &&
4296  "Encoding cannot handle value that big");
4297 
4298  Offset -= ThisVal << LocalShiftSize;
4299  if (Offset == 0)
4300  TmpReg = DestReg;
4301  auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
4302  .addReg(SrcReg)
4303  .addImm(Sign * (int)ThisVal);
4304  if (ShiftSize)
4305  MBI = MBI.addImm(
4306  AArch64_AM::getShifterImm(AArch64_AM::LSL, LocalShiftSize));
4307  MBI = MBI.setMIFlag(Flag);
4308 
4309  auto Change =
4310  VScale == 1
4311  ? StackOffset::getFixed(ThisVal << LocalShiftSize)
4312  : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize));
4313  if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)
4314  CFAOffset += Change;
4315  else
4316  CFAOffset -= Change;
4317  if (EmitCFAOffset && DestReg == TmpReg) {
4318  MachineFunction &MF = *MBB.getParent();
4319  const TargetSubtargetInfo &STI = MF.getSubtarget();
4320  const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
4321 
4322  unsigned CFIIndex = MF.addFrameInst(
4323  createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1));
4324  BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
4325  .addCFIIndex(CFIIndex)
4326  .setMIFlags(Flag);
4327  }
4328 
4329  if (NeedsWinCFI) {
4330  assert(Sign == 1 && "SEH directives should always have a positive sign");
4331  int Imm = (int)(ThisVal << LocalShiftSize);
4332  if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
4333  (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
4334  if (HasWinCFI)
4335  *HasWinCFI = true;
4336  if (Imm == 0)
4337  BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
4338  else
4339  BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
4340  .addImm(Imm)
4341  .setMIFlag(Flag);
4342  assert(Offset == 0 && "Expected remaining offset to be zero to "
4343  "emit a single SEH directive");
4344  } else if (DestReg == AArch64::SP) {
4345  if (HasWinCFI)
4346  *HasWinCFI = true;
4347  assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
4348  BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
4349  .addImm(Imm)
4350  .setMIFlag(Flag);
4351  }
4352  if (HasWinCFI)
4353  *HasWinCFI = true;
4354  }
4355 
4356  SrcReg = TmpReg;
4357  } while (Offset);
4358 }
4359 
4362  unsigned DestReg, unsigned SrcReg,
4363  StackOffset Offset, const TargetInstrInfo *TII,
4364  MachineInstr::MIFlag Flag, bool SetNZCV,
4365  bool NeedsWinCFI, bool *HasWinCFI,
4366  bool EmitCFAOffset, StackOffset CFAOffset,
4367  unsigned FrameReg) {
4368  int64_t Bytes, NumPredicateVectors, NumDataVectors;
4370  Offset, Bytes, NumPredicateVectors, NumDataVectors);
4371 
4372  // First emit non-scalable frame offsets, or a simple 'mov'.
4373  if (Bytes || (!Offset && SrcReg != DestReg)) {
4374  assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
4375  "SP increment/decrement not 8-byte aligned");
4376  unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
4377  if (Bytes < 0) {
4378  Bytes = -Bytes;
4379  Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
4380  }
4381  emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
4382  NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
4383  FrameReg);
4384  CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)
4385  ? StackOffset::getFixed(-Bytes)
4386  : StackOffset::getFixed(Bytes);
4387  SrcReg = DestReg;
4388  FrameReg = DestReg;
4389  }
4390 
4391  assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
4392  "SetNZCV not supported with SVE vectors");
4393  assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) &&
4394  "WinCFI not supported with SVE vectors");
4395 
4396  if (NumDataVectors) {
4397  emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
4398  AArch64::ADDVL_XXI, TII, Flag, NeedsWinCFI, nullptr,
4399  EmitCFAOffset, CFAOffset, FrameReg);
4400  CFAOffset += StackOffset::getScalable(-NumDataVectors * 16);
4401  SrcReg = DestReg;
4402  }
4403 
4404  if (NumPredicateVectors) {
4405  assert(DestReg != AArch64::SP && "Unaligned access to SP");
4406  emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
4407  AArch64::ADDPL_XXI, TII, Flag, NeedsWinCFI, nullptr,
4408  EmitCFAOffset, CFAOffset, FrameReg);
4409  }
4410 }
4411 
4415  LiveIntervals *LIS, VirtRegMap *VRM) const {
4416  // This is a bit of a hack. Consider this instruction:
4417  //
4418  // %0 = COPY %sp; GPR64all:%0
4419  //
4420  // We explicitly chose GPR64all for the virtual register so such a copy might
4421  // be eliminated by RegisterCoalescer. However, that may not be possible, and
4422  // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
4423  // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
4424  //
4425  // To prevent that, we are going to constrain the %0 register class here.
4426  //
4427  // <rdar://problem/11522048>
4428  //
4429  if (MI.isFullCopy()) {
4430  Register DstReg = MI.getOperand(0).getReg();
4431  Register SrcReg = MI.getOperand(1).getReg();
4432  if (SrcReg == AArch64::SP && Register::isVirtualRegister(DstReg)) {
4433  MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
4434  return nullptr;
4435  }
4436  if (DstReg == AArch64::SP && Register::isVirtualRegister(SrcReg)) {
4437  MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
4438  return nullptr;
4439  }
4440  // Nothing can folded with copy from/to NZCV.
4441  if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)
4442  return nullptr;
4443  }
4444 
4445  // Handle the case where a copy is being spilled or filled but the source
4446  // and destination register class don't match. For example:
4447  //
4448  // %0 = COPY %xzr; GPR64common:%0
4449  //
4450  // In this case we can still safely fold away the COPY and generate the
4451  // following spill code:
4452  //
4453  // STRXui %xzr, %stack.0
4454  //
4455  // This also eliminates spilled cross register class COPYs (e.g. between x and
4456  // d regs) of the same size. For example:
4457  //
4458  // %0 = COPY %1; GPR64:%0, FPR64:%1
4459  //
4460  // will be filled as
4461  //
4462  // LDRDui %0, fi<#0>
4463  //
4464  // instead of
4465  //
4466  // LDRXui %Temp, fi<#0>
4467  // %0 = FMOV %Temp
4468  //
4469  if (MI.isCopy() && Ops.size() == 1 &&
4470  // Make sure we're only folding the explicit COPY defs/uses.
4471  (Ops[0] == 0 || Ops[0] == 1)) {
4472  bool IsSpill = Ops[0] == 0;
4473  bool IsFill = !IsSpill;
4475  const MachineRegisterInfo &MRI = MF.getRegInfo();
4476  MachineBasicBlock &MBB = *MI.getParent();
4477  const MachineOperand &DstMO = MI.getOperand(0);
4478  const MachineOperand &SrcMO = MI.getOperand(1);
4479  Register DstReg = DstMO.getReg();
4480  Register SrcReg = SrcMO.getReg();
4481  // This is slightly expensive to compute for physical regs since
4482  // getMinimalPhysRegClass is slow.
4483  auto getRegClass = [&](unsigned Reg) {
4486  };
4487 
4488  if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
4489  assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
4490  TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
4491  "Mismatched register size in non subreg COPY");
4492  if (IsSpill)
4493  storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
4494  getRegClass(SrcReg), &TRI);
4495  else
4496  loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
4497  getRegClass(DstReg), &TRI);
4498  return &*--InsertPt;
4499  }
4500 
4501  // Handle cases like spilling def of:
4502  //
4503  // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
4504  //
4505  // where the physical register source can be widened and stored to the full
4506  // virtual reg destination stack slot, in this case producing:
4507  //
4508  // STRXui %xzr, %stack.0
4509  //
4510  if (IsSpill && DstMO.isUndef() && Register::isPhysicalRegister(SrcReg)) {
4511  assert(SrcMO.getSubReg() == 0 &&
4512  "Unexpected subreg on physical register");
4513  const TargetRegisterClass *SpillRC;
4514  unsigned SpillSubreg;
4515  switch (DstMO.getSubReg()) {
4516  default:
4517  SpillRC = nullptr;
4518  break;
4519  case AArch64::sub_32:
4520  case AArch64::ssub:
4521  if (AArch64::GPR32RegClass.contains(SrcReg)) {
4522  SpillRC = &AArch64::GPR64RegClass;
4523  SpillSubreg = AArch64::sub_32;
4524  } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
4525  SpillRC = &AArch64::FPR64RegClass;
4526  SpillSubreg = AArch64::ssub;
4527  } else
4528  SpillRC = nullptr;
4529  break;
4530  case AArch64::dsub:
4531  if (AArch64::FPR64RegClass.contains(SrcReg)) {
4532  SpillRC = &AArch64::FPR128RegClass;
4533  SpillSubreg = AArch64::dsub;
4534  } else
4535  SpillRC = nullptr;
4536  break;
4537  }
4538 
4539  if (SpillRC)
4540  if (unsigned WidenedSrcReg =
4541  TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
4542  storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
4543  FrameIndex, SpillRC, &TRI);
4544  return &*--InsertPt;
4545  }
4546  }
4547 
4548  // Handle cases like filling use of:
4549  //
4550  // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
4551  //
4552  // where we can load the full virtual reg source stack slot, into the subreg
4553  // destination, in this case producing:
4554  //
4555  // LDRWui %0:sub_32<def,read-undef>, %stack.0
4556  //
4557  if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
4558  const TargetRegisterClass *FillRC;
4559  switch (DstMO.getSubReg()) {
4560  default:
4561  FillRC = nullptr;
4562  break;
4563  case AArch64::sub_32:
4564  FillRC = &AArch64::GPR32RegClass;
4565  break;
4566  case AArch64::ssub:
4567  FillRC = &AArch64::FPR32RegClass;
4568  break;
4569  case AArch64::dsub:
4570  FillRC = &AArch64::FPR64RegClass;
4571  break;
4572  }
4573 
4574  if (FillRC) {
4575  assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
4576  TRI.getRegSizeInBits(*FillRC) &&
4577  "Mismatched regclass size on folded subreg COPY");
4578  loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
4579  MachineInstr &LoadMI = *--InsertPt;
4580  MachineOperand &LoadDst = LoadMI.getOperand(0);
4581  assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
4582  LoadDst.setSubReg(DstMO.getSubReg());
4583  LoadDst.setIsUndef();
4584  return &LoadMI;
4585  }
4586  }
4587  }
4588 
4589  // Cannot fold.
4590  return nullptr;
4591 }
4592 
4594  StackOffset &SOffset,
4595  bool *OutUseUnscaledOp,
4596  unsigned *OutUnscaledOp,
4597  int64_t *EmittableOffset) {
4598  // Set output values in case of early exit.
4599  if (EmittableOffset)
4600  *EmittableOffset = 0;
4601  if (OutUseUnscaledOp)
4602  *OutUseUnscaledOp = false;
4603  if (OutUnscaledOp)
4604  *OutUnscaledOp = 0;
4605 
4606  // Exit early for structured vector spills/fills as they can't take an
4607  // immediate offset.
4608  switch (MI.getOpcode()) {
4609  default:
4610  break;
4611  case AArch64::LD1Twov2d:
4612  case AArch64::LD1Threev2d:
4613  case AArch64::LD1Fourv2d:
4614  case AArch64::LD1Twov1d:
4615  case AArch64::LD1Threev1d:
4616  case AArch64::LD1Fourv1d:
4617  case AArch64::ST1Twov2d:
4618  case AArch64::ST1Threev2d:
4619  case AArch64::ST1Fourv2d:
4620  case AArch64::ST1Twov1d:
4621  case AArch64::ST1Threev1d:
4622  case AArch64::ST1Fourv1d:
4623  case AArch64::ST1i8:
4624  case AArch64::ST1i16:
4625  case AArch64::ST1i32:
4626  case AArch64::ST1i64:
4627  case AArch64::IRG:
4628  case AArch64::IRGstack:
4629  case AArch64::STGloop:
4630  case AArch64::STZGloop:
4632  }
4633 
4634  // Get the min/max offset and the scale.
4635  TypeSize ScaleValue(0U, false);
4636  unsigned Width;
4637  int64_t MinOff, MaxOff;
4638  if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
4639  MaxOff))
4640  llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
4641 
4642  // Construct the complete offset.
4643  bool IsMulVL = ScaleValue.isScalable();
4644  unsigned Scale = ScaleValue.getKnownMinSize();
4645  int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
4646 
4647  const MachineOperand &ImmOpnd =
4648  MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
4649  Offset += ImmOpnd.getImm() * Scale;
4650 
4651  // If the offset doesn't match the scale, we rewrite the instruction to
4652  // use the unscaled instruction instead. Likewise, if we have a negative
4653  // offset and there is an unscaled op to use.
4654  Optional<unsigned> UnscaledOp =
4656  bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
4657  if (useUnscaledOp &&
4658  !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
4659  MaxOff))
4660  llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
4661 
4662  Scale = ScaleValue.getKnownMinSize();
4663  assert(IsMulVL == ScaleValue.isScalable() &&
4664  "Unscaled opcode has different value for scalable");
4665 
4666  int64_t Remainder = Offset % Scale;
4667  assert(!(Remainder && useUnscaledOp) &&
4668  "Cannot have remainder when using unscaled op");
4669 
4670  assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
4671  int64_t NewOffset = Offset / Scale;
4672  if (MinOff <= NewOffset && NewOffset <= MaxOff)
4673  Offset = Remainder;
4674  else {
4675  NewOffset = NewOffset < 0 ? MinOff : MaxOff;
4676  Offset = Offset - NewOffset * Scale + Remainder;
4677  }
4678 
4679  if (EmittableOffset)
4680  *EmittableOffset = NewOffset;
4681  if (OutUseUnscaledOp)
4682  *OutUseUnscaledOp = useUnscaledOp;
4683  if (OutUnscaledOp && UnscaledOp)
4684  *OutUnscaledOp = *UnscaledOp;
4685 
4686  if (IsMulVL)
4687  SOffset = StackOffset::get(SOffset.getFixed(), Offset);
4688  else
4689  SOffset = StackOffset::get(Offset, SOffset.getScalable());
4691  (SOffset ? 0 : AArch64FrameOffsetIsLegal);
4692 }
4693 
4694 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
4695  unsigned FrameReg, StackOffset &Offset,
4696  const AArch64InstrInfo *TII) {
4697  unsigned Opcode = MI.getOpcode();
4698  unsigned ImmIdx = FrameRegIdx + 1;
4699 
4700  if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
4701  Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
4702  emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
4703  MI.getOperand(0).getReg(), FrameReg, Offset, TII,
4704  MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
4705  MI.eraseFromParent();
4706  Offset = StackOffset();
4707  return true;
4708  }
4709 
4710  int64_t NewOffset;
4711  unsigned UnscaledOp;
4712  bool UseUnscaledOp;
4713  int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
4714  &UnscaledOp, &NewOffset);
4717  // Replace the FrameIndex with FrameReg.
4718  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
4719  if (UseUnscaledOp)
4720  MI.setDesc(TII->get(UnscaledOp));
4721 
4722  MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
4723  return !Offset;
4724  }
4725 
4726  return false;
4727 }
4728 
4730  return MCInstBuilder(AArch64::HINT).addImm(0);
4731 }
4732 
4733 // AArch64 supports MachineCombiner.
4734 bool AArch64InstrInfo::useMachineCombiner() const { return true; }
4735 
4736 // True when Opc sets flag
4737 static bool isCombineInstrSettingFlag(unsigned Opc) {
4738  switch (Opc) {
4739  case AArch64::ADDSWrr:
4740  case AArch64::ADDSWri:
4741  case AArch64::ADDSXrr:
4742  case AArch64::ADDSXri:
4743  case AArch64::SUBSWrr:
4744  case AArch64::SUBSXrr:
4745  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
4746  case AArch64::SUBSWri:
4747  case AArch64::SUBSXri:
4748  return true;
4749  default:
4750  break;
4751  }
4752  return false;
4753 }
4754 
4755 // 32b Opcodes that can be combined with a MUL
4756 static bool isCombineInstrCandidate32(unsigned Opc) {
4757  switch (Opc) {
4758  case AArch64::ADDWrr:
4759  case AArch64::ADDWri:
4760  case AArch64::SUBWrr:
4761  case AArch64::ADDSWrr:
4762  case AArch64::ADDSWri:
4763  case AArch64::SUBSWrr:
4764  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
4765  case AArch64::SUBWri:
4766  case AArch64::SUBSWri:
4767  return true;
4768  default:
4769  break;
4770  }
4771  return false;
4772 }
4773 
4774 // 64b Opcodes that can be combined with a MUL
4775 static bool isCombineInstrCandidate64(unsigned Opc) {
4776  switch (Opc) {
4777  case AArch64::ADDXrr:
4778  case AArch64::ADDXri:
4779  case AArch64::SUBXrr:
4780  case AArch64::ADDSXrr:
4781  case AArch64::ADDSXri:
4782  case AArch64::SUBSXrr:
4783  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
4784  case AArch64::SUBXri:
4785  case AArch64::SUBSXri:
4786  case AArch64::ADDv8i8:
4787  case AArch64::ADDv16i8:
4788  case AArch64::ADDv4i16:
4789  case AArch64::ADDv8i16:
4790  case AArch64::ADDv2i32:
4791  case AArch64::ADDv4i32:
4792  case AArch64::SUBv8i8:
4793  case AArch64::SUBv16i8:
4794  case AArch64::SUBv4i16:
4795  case AArch64::SUBv8i16:
4796  case AArch64::SUBv2i32:
4797  case AArch64::SUBv4i32:
4798  return true;
4799  default:
4800  break;
4801  }
4802  return false;
4803 }
4804 
4805 // FP Opcodes that can be combined with a FMUL.
4806 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
4807  switch (Inst.getOpcode()) {
4808  default:
4809  break;
4810  case AArch64::FADDHrr:
4811  case AArch64::FADDSrr:
4812  case AArch64::FADDDrr:
4813  case AArch64::FADDv4f16:
4814  case AArch64::FADDv8f16:
4815  case AArch64::FADDv2f32:
4816  case AArch64::FADDv2f64:
4817  case AArch64::FADDv4f32:
4818  case AArch64::FSUBHrr:
4819  case AArch64::FSUBSrr:
4820  case AArch64::FSUBDrr:
4821  case AArch64::FSUBv4f16:
4822  case AArch64::FSUBv8f16:
4823  case AArch64::FSUBv2f32:
4824  case AArch64::FSUBv2f64:
4825  case AArch64::FSUBv4f32:
4827  // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
4828  // the target options or if FADD/FSUB has the contract fast-math flag.
4829  return Options.UnsafeFPMath ||
4830  Options.AllowFPOpFusion == FPOpFusion::Fast ||
4832  return true;
4833  }
4834  return false;
4835 }
4836 
4837 // Opcodes that can be combined with a MUL
4838 static bool isCombineInstrCandidate(unsigned Opc) {
4840 }
4841 
4842 //
4843 // Utility routine that checks if \param MO is defined by an
4844 // \param CombineOpc instruction in the basic block \param MBB
4846  unsigned CombineOpc, unsigned ZeroReg = 0,
4847  bool CheckZeroReg = false) {
4849  MachineInstr *MI = nullptr;
4850 
4851  if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
4852  MI = MRI.getUniqueVRegDef(MO.getReg());
4853  // And it needs to be in the trace (otherwise, it won't have a depth).
4854  if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
4855  return false;
4856  // Must only used by the user we combine with.
4857  if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
4858  return false;
4859 
4860  if (CheckZeroReg) {
4861  assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
4862  MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
4863  MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
4864  // The third input reg must be zero.
4865  if (MI->getOperand(3).getReg() != ZeroReg)
4866  return false;
4867  }
4868 
4869  if (isCombineInstrSettingFlag(CombineOpc) &&
4870  MI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
4871  return false;
4872 
4873  return true;
4874 }
4875 
4876 //
4877 // Is \param MO defined by an integer multiply and can be combined?
4879  unsigned MulOpc, unsigned ZeroReg) {
4880  return canCombine(MBB, MO, MulOpc, ZeroReg, true);
4881 }
4882 
4883 //
4884 // Is \param MO defined by a floating-point multiply and can be combined?
4886  unsigned MulOpc) {
4887  return canCombine(MBB, MO, MulOpc);
4888 }
4889 
4890 // TODO: There are many more machine instruction opcodes to match:
4891 // 1. Other data types (integer, vectors)
4892 // 2. Other math / logic operations (xor, or)
4893 // 3. Other forms of the same operation (intrinsics and other variants)
4895  const MachineInstr &Inst) const {
4896  switch (Inst.getOpcode()) {
4897  case AArch64::FADDDrr:
4898  case AArch64::FADDSrr:
4899  case AArch64::FADDv2f32:
4900  case AArch64::FADDv2f64:
4901  case AArch64::FADDv4f32:
4902  case AArch64::FMULDrr:
4903  case AArch64::FMULSrr:
4904  case AArch64::FMULX32:
4905  case AArch64::FMULX64:
4906  case AArch64::FMULXv2f32:
4907  case AArch64::FMULXv2f64:
4908  case AArch64::FMULXv4f32:
4909  case AArch64::FMULv2f32:
4910  case AArch64::FMULv2f64:
4911  case AArch64::FMULv4f32:
4912  return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
4913  default:
4914  return false;
4915  }
4916 }
4917 
4918 /// Find instructions that can be turned into madd.
4919 static bool getMaddPatterns(MachineInstr &Root,
4921  unsigned Opc = Root.getOpcode();
4922  MachineBasicBlock &MBB = *Root.getParent();
4923  bool Found = false;
4924 
4925  if (!isCombineInstrCandidate(Opc))
4926  return false;
4927  if (isCombineInstrSettingFlag(Opc)) {
4928  int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
4929  // When NZCV is live bail out.
4930  if (Cmp_NZCV == -1)
4931  return false;
4932  unsigned NewOpc = convertToNonFlagSettingOpc(Root);
4933  // When opcode can't change bail out.
4934  // CHECKME: do we miss any cases for opcode conversion?
4935  if (NewOpc == Opc)
4936  return false;
4937  Opc = NewOpc;
4938  }
4939 
4940  auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
4942  if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
4943  Patterns.push_back(Pattern);
4944  Found = true;
4945  }
4946  };
4947 
4948  auto setVFound = [&](int Opcode, int Operand, MachineCombinerPattern Pattern) {
4949  if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
4950  Patterns.push_back(Pattern);
4951  Found = true;
4952  }
4953  };
4954 
4955  typedef MachineCombinerPattern MCP;
4956 
4957  switch (Opc) {
4958  default:
4959  break;
4960  case AArch64::ADDWrr:
4961  assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
4962  "ADDWrr does not have register operands");
4963  setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
4964  setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
4965  break;
4966  case AArch64::ADDXrr:
4967  setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
4968  setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
4969  break;
4970  case AArch64::SUBWrr:
4971  setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
4972  setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
4973  break;
4974  case AArch64::SUBXrr:
4975  setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
4976  setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
4977  break;
4978  case AArch64::ADDWri:
4979  setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
4980  break;
4981  case AArch64::ADDXri:
4982  setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
4983  break;
4984  case AArch64::SUBWri:
4985  setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
4986  break;
4987  case AArch64::SUBXri:
4988  setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
4989  break;
4990  case AArch64::ADDv8i8:
4991  setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
4992  setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
4993  break;
4994  case AArch64::ADDv16i8:
4995  setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
4996  setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
4997  break;
4998  case AArch64::ADDv4i16:
4999  setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
5000  setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
5001  setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
5002  setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
5003  break;
5004  case AArch64::ADDv8i16:
5005  setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
5006  setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
5007  setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
5008  setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
5009  break;
5010  case AArch64::ADDv2i32:
5011  setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
5012  setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
5013  setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
5014  setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
5015  break;
5016  case AArch64::ADDv4i32:
5017  setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
5018  setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
5019  setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
5020  setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
5021  break;
5022  case AArch64::SUBv8i8:
5023  setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
5024  setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
5025  break;
5026  case AArch64::SUBv16i8:
5027  setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
5028  setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
5029  break;
5030  case AArch64::SUBv4i16:
5031  setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
5032  setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
5033  setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
5034  setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
5035  break;
5036  case AArch64::SUBv8i16:
5037  setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
5038  setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
5039  setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
5040  setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
5041  break;
5042  case AArch64::SUBv2i32:
5043  setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
5044  setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
5045  setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
5046  setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
5047  break;
5048  case AArch64::SUBv4i32:
5049  setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
5050  setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
5051  setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
5052  setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
5053  break;
5054  }
5055  return Found;
5056 }
5057 /// Floating-Point Support
5058 
5059 /// Find instructions that can be turned into madd.
5060 static bool getFMAPatterns(MachineInstr &Root,
5062 
5063  if (!isCombineInstrCandidateFP(Root))
5064  return false;
5065 
5066  MachineBasicBlock &MBB = *Root.getParent();
5067  bool Found = false;
5068 
5069  auto Match = [&](int Opcode, int Operand,
5070  MachineCombinerPattern Pattern) -> bool {
5071  if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
5072  Patterns.push_back(Pattern);
5073  return true;
5074  }
5075  return false;
5076  };
5077 
5078  typedef MachineCombinerPattern MCP;
5079 
5080  switch (Root.getOpcode()) {
5081  default:
5082  assert(false && "Unsupported FP instruction in combiner\n");
5083  break;
5084  case AArch64::FADDHrr:
5085  assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
5086  "FADDHrr does not have register operands");
5087 
5088  Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
5089  Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
5090  break;
5091  case AArch64::FADDSrr:
5092  assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
5093  "FADDSrr does not have register operands");
5094 
5095  Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
5096  Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
5097 
5098  Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
5099  Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
5100  break;
5101  case AArch64::FADDDrr:
5102  Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
5103  Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
5104 
5105  Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
5106  Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
5107  break;
5108  case AArch64::FADDv4f16:
5109  Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
5110  Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
5111 
5112  Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
5113  Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
5114  break;
5115  case AArch64::FADDv8f16:
5116  Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
5117  Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
5118 
5119  Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
5120  Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
5121  break;
5122  case AArch64::FADDv2f32:
5123  Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
5124  Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
5125 
5126  Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
5127  Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
5128  break;
5129  case AArch64::FADDv2f64:
5130  Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
5131  Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
5132 
5133  Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
5134  Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
5135  break;
5136  case AArch64::FADDv4f32:
5137  Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
5138  Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
5139 
5140  Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
5141  Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
5142  break;
5143  case AArch64::FSUBHrr:
5144  Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
5145  Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
5146  Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
5147  break;
5148  case AArch64::FSUBSrr:
5149  Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
5150 
5151  Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
5152  Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
5153 
5154  Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
5155  break;
5156  case AArch64::FSUBDrr:
5157  Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
5158 
5159  Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
5160  Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
5161 
5162  Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
5163  break;
5164  case AArch64::FSUBv4f16:
5165  Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
5166  Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
5167 
5168  Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
5169  Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
5170  break;
5171  case AArch64::FSUBv8f16:
5172  Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
5173  Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
5174 
5175  Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
5176  Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
5177  break;
5178  case AArch64::FSUBv2f32:
5179  Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
5180  Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
5181 
5182  Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
5183  Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
5184  break;
5185  case AArch64::FSUBv2f64:
5186  Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
5187  Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
5188 
5189  Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
5190  Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
5191  break;
5192  case AArch64::FSUBv4f32:
5193  Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
5194  Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
5195 
5196  Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
5197  Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
5198  break;
5199  }
5200  return Found;
5201 }
5202 
5203 static bool getFMULPatterns(MachineInstr &Root,
5205  MachineBasicBlock &MBB = *Root.getParent();
5206  bool Found = false;
5207 
5208  auto Match = [&](unsigned Opcode, int Operand,
5209  MachineCombinerPattern Pattern) -> bool {
5211  MachineOperand &MO = Root.getOperand(Operand);
5212  MachineInstr *MI = nullptr;
5213  if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
5214  MI = MRI.getUniqueVRegDef(MO.getReg());
5215  // Ignore No-op COPYs in FMUL(COPY(DUP(..)))
5216  if (MI && MI->getOpcode() == TargetOpcode::COPY &&
5217  MI->getOperand(1).getReg().isVirtual())
5218  MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());
5219  if (MI && MI->getOpcode() == Opcode) {
5220  Patterns.push_back(Pattern);
5221  return true;
5222  }
5223  return false;
5224  };
5225 
5226  typedef MachineCombinerPattern MCP;
5227 
5228  switch (Root.getOpcode()) {
5229  default:
5230  return false;
5231  case AArch64::FMULv2f32:
5232  Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
5233  Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
5234  break;
5235  case AArch64::FMULv2f64:
5236  Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
5237  Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
5238  break;
5239  case AArch64::FMULv4f16:
5240  Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
5241  Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
5242  break;
5243  case AArch64::FMULv4f32:
5244  Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
5245  Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
5246  break;
5247  case AArch64::FMULv8f16:
5248  Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
5249  Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
5250  break;
5251  }
5252 
5253  return Found;
5254 }
5255 
5256 /// Return true when a code sequence can improve throughput. It
5257 /// should be called only for instructions in loops.
5258 /// \param Pattern - combiner pattern
5261  switch (Pattern) {
5262  default:
5263  break;
5305  case M