LLVM  6.0.0svn
AArch64InstrInfo.cpp
Go to the documentation of this file.
1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the AArch64 implementation of the TargetInstrInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64InstrInfo.h"
16 #include "AArch64Subtarget.h"
18 #include "Utils/AArch64BaseInfo.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/CodeGen/StackMaps.h"
34 #include "llvm/IR/DebugLoc.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/MC/MCInst.h"
37 #include "llvm/MC/MCInstrDesc.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/CodeGen.h"
41 #include "llvm/Support/Compiler.h"
46 #include <cassert>
47 #include <cstdint>
48 #include <iterator>
49 #include <utility>
50 
51 using namespace llvm;
52 
53 #define GET_INSTRINFO_CTOR_DTOR
54 #include "AArch64GenInstrInfo.inc"
55 
57  "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
58  cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
59 
61  "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
62  cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
63 
64 static cl::opt<unsigned>
65  BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
66  cl::desc("Restrict range of Bcc instructions (DEBUG)"));
67 
69  : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
70  RI(STI.getTargetTriple()), Subtarget(STI) {}
71 
72 /// GetInstSize - Return the number of bytes of code the specified
73 /// instruction may be. This returns the maximum number of bytes.
75  const MachineBasicBlock &MBB = *MI.getParent();
76  const MachineFunction *MF = MBB.getParent();
77  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
78 
79  if (MI.getOpcode() == AArch64::INLINEASM)
80  return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
81 
82  // FIXME: We currently only handle pseudoinstructions that don't get expanded
83  // before the assembly printer.
84  unsigned NumBytes = 0;
85  const MCInstrDesc &Desc = MI.getDesc();
86  switch (Desc.getOpcode()) {
87  default:
88  // Anything not explicitly designated otherwise is a normal 4-byte insn.
89  NumBytes = 4;
90  break;
91  case TargetOpcode::DBG_VALUE:
93  case TargetOpcode::IMPLICIT_DEF:
94  case TargetOpcode::KILL:
95  NumBytes = 0;
96  break;
97  case TargetOpcode::STACKMAP:
98  // The upper bound for a stackmap intrinsic is the full length of its shadow
99  NumBytes = StackMapOpers(&MI).getNumPatchBytes();
100  assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
101  break;
102  case TargetOpcode::PATCHPOINT:
103  // The size of the patchpoint intrinsic is the number of bytes requested
104  NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
105  assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
106  break;
108  // This gets lowered to an instruction sequence which takes 16 bytes
109  NumBytes = 16;
110  break;
111  }
112 
113  return NumBytes;
114 }
115 
118  // Block ends with fall-through condbranch.
119  switch (LastInst->getOpcode()) {
120  default:
121  llvm_unreachable("Unknown branch instruction?");
122  case AArch64::Bcc:
123  Target = LastInst->getOperand(1).getMBB();
124  Cond.push_back(LastInst->getOperand(0));
125  break;
126  case AArch64::CBZW:
127  case AArch64::CBZX:
128  case AArch64::CBNZW:
129  case AArch64::CBNZX:
130  Target = LastInst->getOperand(1).getMBB();
132  Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
133  Cond.push_back(LastInst->getOperand(0));
134  break;
135  case AArch64::TBZW:
136  case AArch64::TBZX:
137  case AArch64::TBNZW:
138  case AArch64::TBNZX:
139  Target = LastInst->getOperand(2).getMBB();
141  Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
142  Cond.push_back(LastInst->getOperand(0));
143  Cond.push_back(LastInst->getOperand(1));
144  }
145 }
146 
147 static unsigned getBranchDisplacementBits(unsigned Opc) {
148  switch (Opc) {
149  default:
150  llvm_unreachable("unexpected opcode!");
151  case AArch64::B:
152  return 64;
153  case AArch64::TBNZW:
154  case AArch64::TBZW:
155  case AArch64::TBNZX:
156  case AArch64::TBZX:
157  return TBZDisplacementBits;
158  case AArch64::CBNZW:
159  case AArch64::CBZW:
160  case AArch64::CBNZX:
161  case AArch64::CBZX:
162  return CBZDisplacementBits;
163  case AArch64::Bcc:
164  return BCCDisplacementBits;
165  }
166 }
167 
169  int64_t BrOffset) const {
170  unsigned Bits = getBranchDisplacementBits(BranchOp);
171  assert(Bits >= 3 && "max branch displacement must be enough to jump"
172  "over conditional branch expansion");
173  return isIntN(Bits, BrOffset / 4);
174 }
175 
178  switch (MI.getOpcode()) {
179  default:
180  llvm_unreachable("unexpected opcode!");
181  case AArch64::B:
182  return MI.getOperand(0).getMBB();
183  case AArch64::TBZW:
184  case AArch64::TBNZW:
185  case AArch64::TBZX:
186  case AArch64::TBNZX:
187  return MI.getOperand(2).getMBB();
188  case AArch64::CBZW:
189  case AArch64::CBNZW:
190  case AArch64::CBZX:
191  case AArch64::CBNZX:
192  case AArch64::Bcc:
193  return MI.getOperand(1).getMBB();
194  }
195 }
196 
197 // Branch analysis.
199  MachineBasicBlock *&TBB,
200  MachineBasicBlock *&FBB,
202  bool AllowModify) const {
203  // If the block has no terminators, it just falls into the block after it.
205  if (I == MBB.end())
206  return false;
207 
208  if (!isUnpredicatedTerminator(*I))
209  return false;
210 
211  // Get the last instruction in the block.
212  MachineInstr *LastInst = &*I;
213 
214  // If there is only one terminator instruction, process it.
215  unsigned LastOpc = LastInst->getOpcode();
216  if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
217  if (isUncondBranchOpcode(LastOpc)) {
218  TBB = LastInst->getOperand(0).getMBB();
219  return false;
220  }
221  if (isCondBranchOpcode(LastOpc)) {
222  // Block ends with fall-through condbranch.
223  parseCondBranch(LastInst, TBB, Cond);
224  return false;
225  }
226  return true; // Can't handle indirect branch.
227  }
228 
229  // Get the instruction before it if it is a terminator.
230  MachineInstr *SecondLastInst = &*I;
231  unsigned SecondLastOpc = SecondLastInst->getOpcode();
232 
233  // If AllowModify is true and the block ends with two or more unconditional
234  // branches, delete all but the first unconditional branch.
235  if (AllowModify && isUncondBranchOpcode(LastOpc)) {
236  while (isUncondBranchOpcode(SecondLastOpc)) {
237  LastInst->eraseFromParent();
238  LastInst = SecondLastInst;
239  LastOpc = LastInst->getOpcode();
240  if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
241  // Return now the only terminator is an unconditional branch.
242  TBB = LastInst->getOperand(0).getMBB();
243  return false;
244  } else {
245  SecondLastInst = &*I;
246  SecondLastOpc = SecondLastInst->getOpcode();
247  }
248  }
249  }
250 
251  // If there are three terminators, we don't know what sort of block this is.
252  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
253  return true;
254 
255  // If the block ends with a B and a Bcc, handle it.
256  if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
257  parseCondBranch(SecondLastInst, TBB, Cond);
258  FBB = LastInst->getOperand(0).getMBB();
259  return false;
260  }
261 
262  // If the block ends with two unconditional branches, handle it. The second
263  // one is not executed, so remove it.
264  if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
265  TBB = SecondLastInst->getOperand(0).getMBB();
266  I = LastInst;
267  if (AllowModify)
268  I->eraseFromParent();
269  return false;
270  }
271 
272  // ...likewise if it ends with an indirect branch followed by an unconditional
273  // branch.
274  if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
275  I = LastInst;
276  if (AllowModify)
277  I->eraseFromParent();
278  return true;
279  }
280 
281  // Otherwise, can't handle this.
282  return true;
283 }
284 
286  SmallVectorImpl<MachineOperand> &Cond) const {
287  if (Cond[0].getImm() != -1) {
288  // Regular Bcc
289  AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
290  Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
291  } else {
292  // Folded compare-and-branch
293  switch (Cond[1].getImm()) {
294  default:
295  llvm_unreachable("Unknown conditional branch!");
296  case AArch64::CBZW:
297  Cond[1].setImm(AArch64::CBNZW);
298  break;
299  case AArch64::CBNZW:
300  Cond[1].setImm(AArch64::CBZW);
301  break;
302  case AArch64::CBZX:
303  Cond[1].setImm(AArch64::CBNZX);
304  break;
305  case AArch64::CBNZX:
306  Cond[1].setImm(AArch64::CBZX);
307  break;
308  case AArch64::TBZW:
309  Cond[1].setImm(AArch64::TBNZW);
310  break;
311  case AArch64::TBNZW:
312  Cond[1].setImm(AArch64::TBZW);
313  break;
314  case AArch64::TBZX:
315  Cond[1].setImm(AArch64::TBNZX);
316  break;
317  case AArch64::TBNZX:
318  Cond[1].setImm(AArch64::TBZX);
319  break;
320  }
321  }
322 
323  return false;
324 }
325 
327  int *BytesRemoved) const {
329  if (I == MBB.end())
330  return 0;
331 
332  if (!isUncondBranchOpcode(I->getOpcode()) &&
333  !isCondBranchOpcode(I->getOpcode()))
334  return 0;
335 
336  // Remove the branch.
337  I->eraseFromParent();
338 
339  I = MBB.end();
340 
341  if (I == MBB.begin()) {
342  if (BytesRemoved)
343  *BytesRemoved = 4;
344  return 1;
345  }
346  --I;
347  if (!isCondBranchOpcode(I->getOpcode())) {
348  if (BytesRemoved)
349  *BytesRemoved = 4;
350  return 1;
351  }
352 
353  // Remove the branch.
354  I->eraseFromParent();
355  if (BytesRemoved)
356  *BytesRemoved = 8;
357 
358  return 2;
359 }
360 
361 void AArch64InstrInfo::instantiateCondBranch(
362  MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
363  ArrayRef<MachineOperand> Cond) const {
364  if (Cond[0].getImm() != -1) {
365  // Regular Bcc
366  BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
367  } else {
368  // Folded compare-and-branch
369  // Note that we use addOperand instead of addReg to keep the flags.
370  const MachineInstrBuilder MIB =
371  BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
372  if (Cond.size() > 3)
373  MIB.addImm(Cond[3].getImm());
374  MIB.addMBB(TBB);
375  }
376 }
377 
380  ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
381  // Shouldn't be a fall through.
382  assert(TBB && "insertBranch must not be told to insert a fallthrough");
383 
384  if (!FBB) {
385  if (Cond.empty()) // Unconditional branch?
386  BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
387  else
388  instantiateCondBranch(MBB, DL, TBB, Cond);
389 
390  if (BytesAdded)
391  *BytesAdded = 4;
392 
393  return 1;
394  }
395 
396  // Two-way conditional branch.
397  instantiateCondBranch(MBB, DL, TBB, Cond);
398  BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
399 
400  if (BytesAdded)
401  *BytesAdded = 8;
402 
403  return 2;
404 }
405 
406 // Find the original register that VReg is copied from.
407 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
409  const MachineInstr *DefMI = MRI.getVRegDef(VReg);
410  if (!DefMI->isFullCopy())
411  return VReg;
412  VReg = DefMI->getOperand(1).getReg();
413  }
414  return VReg;
415 }
416 
417 // Determine if VReg is defined by an instruction that can be folded into a
418 // csel instruction. If so, return the folded opcode, and the replacement
419 // register.
420 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
421  unsigned *NewVReg = nullptr) {
422  VReg = removeCopies(MRI, VReg);
424  return 0;
425 
426  bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
427  const MachineInstr *DefMI = MRI.getVRegDef(VReg);
428  unsigned Opc = 0;
429  unsigned SrcOpNum = 0;
430  switch (DefMI->getOpcode()) {
431  case AArch64::ADDSXri:
432  case AArch64::ADDSWri:
433  // if NZCV is used, do not fold.
434  if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
435  return 0;
436  // fall-through to ADDXri and ADDWri.
438  case AArch64::ADDXri:
439  case AArch64::ADDWri:
440  // add x, 1 -> csinc.
441  if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
442  DefMI->getOperand(3).getImm() != 0)
443  return 0;
444  SrcOpNum = 1;
445  Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
446  break;
447 
448  case AArch64::ORNXrr:
449  case AArch64::ORNWrr: {
450  // not x -> csinv, represented as orn dst, xzr, src.
451  unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
452  if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
453  return 0;
454  SrcOpNum = 2;
455  Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
456  break;
457  }
458 
459  case AArch64::SUBSXrr:
460  case AArch64::SUBSWrr:
461  // if NZCV is used, do not fold.
462  if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
463  return 0;
464  // fall-through to SUBXrr and SUBWrr.
466  case AArch64::SUBXrr:
467  case AArch64::SUBWrr: {
468  // neg x -> csneg, represented as sub dst, xzr, src.
469  unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
470  if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
471  return 0;
472  SrcOpNum = 2;
473  Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
474  break;
475  }
476  default:
477  return 0;
478  }
479  assert(Opc && SrcOpNum && "Missing parameters");
480 
481  if (NewVReg)
482  *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
483  return Opc;
484 }
485 
488  unsigned TrueReg, unsigned FalseReg,
489  int &CondCycles, int &TrueCycles,
490  int &FalseCycles) const {
491  // Check register classes.
492  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
493  const TargetRegisterClass *RC =
494  RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
495  if (!RC)
496  return false;
497 
498  // Expanding cbz/tbz requires an extra cycle of latency on the condition.
499  unsigned ExtraCondLat = Cond.size() != 1;
500 
501  // GPRs are handled by csel.
502  // FIXME: Fold in x+1, -x, and ~x when applicable.
503  if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
504  AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
505  // Single-cycle csel, csinc, csinv, and csneg.
506  CondCycles = 1 + ExtraCondLat;
507  TrueCycles = FalseCycles = 1;
508  if (canFoldIntoCSel(MRI, TrueReg))
509  TrueCycles = 0;
510  else if (canFoldIntoCSel(MRI, FalseReg))
511  FalseCycles = 0;
512  return true;
513  }
514 
515  // Scalar floating point is handled by fcsel.
516  // FIXME: Form fabs, fmin, and fmax when applicable.
517  if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
518  AArch64::FPR32RegClass.hasSubClassEq(RC)) {
519  CondCycles = 5 + ExtraCondLat;
520  TrueCycles = FalseCycles = 2;
521  return true;
522  }
523 
524  // Can't do vectors.
525  return false;
526 }
527 
530  const DebugLoc &DL, unsigned DstReg,
532  unsigned TrueReg, unsigned FalseReg) const {
534 
535  // Parse the condition code, see parseCondBranch() above.
537  switch (Cond.size()) {
538  default:
539  llvm_unreachable("Unknown condition opcode in Cond");
540  case 1: // b.cc
541  CC = AArch64CC::CondCode(Cond[0].getImm());
542  break;
543  case 3: { // cbz/cbnz
544  // We must insert a compare against 0.
545  bool Is64Bit;
546  switch (Cond[1].getImm()) {
547  default:
548  llvm_unreachable("Unknown branch opcode in Cond");
549  case AArch64::CBZW:
550  Is64Bit = false;
551  CC = AArch64CC::EQ;
552  break;
553  case AArch64::CBZX:
554  Is64Bit = true;
555  CC = AArch64CC::EQ;
556  break;
557  case AArch64::CBNZW:
558  Is64Bit = false;
559  CC = AArch64CC::NE;
560  break;
561  case AArch64::CBNZX:
562  Is64Bit = true;
563  CC = AArch64CC::NE;
564  break;
565  }
566  unsigned SrcReg = Cond[2].getReg();
567  if (Is64Bit) {
568  // cmp reg, #0 is actually subs xzr, reg, #0.
569  MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
570  BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
571  .addReg(SrcReg)
572  .addImm(0)
573  .addImm(0);
574  } else {
575  MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
576  BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
577  .addReg(SrcReg)
578  .addImm(0)
579  .addImm(0);
580  }
581  break;
582  }
583  case 4: { // tbz/tbnz
584  // We must insert a tst instruction.
585  switch (Cond[1].getImm()) {
586  default:
587  llvm_unreachable("Unknown branch opcode in Cond");
588  case AArch64::TBZW:
589  case AArch64::TBZX:
590  CC = AArch64CC::EQ;
591  break;
592  case AArch64::TBNZW:
593  case AArch64::TBNZX:
594  CC = AArch64CC::NE;
595  break;
596  }
597  // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
598  if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
599  BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
600  .addReg(Cond[2].getReg())
601  .addImm(
602  AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
603  else
604  BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
605  .addReg(Cond[2].getReg())
606  .addImm(
607  AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
608  break;
609  }
610  }
611 
612  unsigned Opc = 0;
613  const TargetRegisterClass *RC = nullptr;
614  bool TryFold = false;
615  if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
616  RC = &AArch64::GPR64RegClass;
617  Opc = AArch64::CSELXr;
618  TryFold = true;
619  } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
620  RC = &AArch64::GPR32RegClass;
621  Opc = AArch64::CSELWr;
622  TryFold = true;
623  } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
624  RC = &AArch64::FPR64RegClass;
625  Opc = AArch64::FCSELDrrr;
626  } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
627  RC = &AArch64::FPR32RegClass;
628  Opc = AArch64::FCSELSrrr;
629  }
630  assert(RC && "Unsupported regclass");
631 
632  // Try folding simple instructions into the csel.
633  if (TryFold) {
634  unsigned NewVReg = 0;
635  unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
636  if (FoldedOpc) {
637  // The folded opcodes csinc, csinc and csneg apply the operation to
638  // FalseReg, so we need to invert the condition.
640  TrueReg = FalseReg;
641  } else
642  FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
643 
644  // Fold the operation. Leave any dead instructions for DCE to clean up.
645  if (FoldedOpc) {
646  FalseReg = NewVReg;
647  Opc = FoldedOpc;
648  // The extends the live range of NewVReg.
649  MRI.clearKillFlags(NewVReg);
650  }
651  }
652 
653  // Pull all virtual register into the appropriate class.
654  MRI.constrainRegClass(TrueReg, RC);
655  MRI.constrainRegClass(FalseReg, RC);
656 
657  // Insert the csel.
658  BuildMI(MBB, I, DL, get(Opc), DstReg)
659  .addReg(TrueReg)
660  .addReg(FalseReg)
661  .addImm(CC);
662 }
663 
664 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
665 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
666  uint64_t Imm = MI.getOperand(1).getImm();
667  uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
668  uint64_t Encoding;
669  return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
670 }
671 
672 // FIXME: this implementation should be micro-architecture dependent, so a
673 // micro-architecture target hook should be introduced here in future.
675  if (!Subtarget.hasCustomCheapAsMoveHandling())
676  return MI.isAsCheapAsAMove();
677  if (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
679  return true;
680 
681  switch (MI.getOpcode()) {
682  default:
683  return false;
684 
685  // add/sub on register without shift
686  case AArch64::ADDWri:
687  case AArch64::ADDXri:
688  case AArch64::SUBWri:
689  case AArch64::SUBXri:
690  return (MI.getOperand(3).getImm() == 0);
691 
692  // logical ops on immediate
693  case AArch64::ANDWri:
694  case AArch64::ANDXri:
695  case AArch64::EORWri:
696  case AArch64::EORXri:
697  case AArch64::ORRWri:
698  case AArch64::ORRXri:
699  return true;
700 
701  // logical ops on register without shift
702  case AArch64::ANDWrr:
703  case AArch64::ANDXrr:
704  case AArch64::BICWrr:
705  case AArch64::BICXrr:
706  case AArch64::EONWrr:
707  case AArch64::EONXrr:
708  case AArch64::EORWrr:
709  case AArch64::EORXrr:
710  case AArch64::ORNWrr:
711  case AArch64::ORNXrr:
712  case AArch64::ORRWrr:
713  case AArch64::ORRXrr:
714  return true;
715 
716  // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
717  // ORRXri, it is as cheap as MOV
718  case AArch64::MOVi32imm:
719  return canBeExpandedToORR(MI, 32);
720  case AArch64::MOVi64imm:
721  return canBeExpandedToORR(MI, 64);
722 
723  // It is cheap to zero out registers if the subtarget has ZeroCycleZeroing
724  // feature.
725  case AArch64::FMOVH0:
726  case AArch64::FMOVS0:
727  case AArch64::FMOVD0:
728  return Subtarget.hasZeroCycleZeroing();
729  case TargetOpcode::COPY:
730  return (Subtarget.hasZeroCycleZeroing() &&
731  (MI.getOperand(1).getReg() == AArch64::WZR ||
732  MI.getOperand(1).getReg() == AArch64::XZR));
733  }
734 
735  llvm_unreachable("Unknown opcode to check as cheap as a move!");
736 }
737 
739  unsigned Imm, Shift;
741 
742  switch (MI.getOpcode()) {
743  default:
744  return false;
745 
746  // WriteI
747  case AArch64::ADDSWri:
748  case AArch64::ADDSXri:
749  case AArch64::ADDWri:
750  case AArch64::ADDXri:
751  case AArch64::SUBSWri:
752  case AArch64::SUBSXri:
753  case AArch64::SUBWri:
754  case AArch64::SUBXri:
755  return true;
756 
757  // WriteISReg
758  case AArch64::ADDSWrs:
759  case AArch64::ADDSXrs:
760  case AArch64::ADDWrs:
761  case AArch64::ADDXrs:
762  case AArch64::ANDSWrs:
763  case AArch64::ANDSXrs:
764  case AArch64::ANDWrs:
765  case AArch64::ANDXrs:
766  case AArch64::BICSWrs:
767  case AArch64::BICSXrs:
768  case AArch64::BICWrs:
769  case AArch64::BICXrs:
770  case AArch64::EONWrs:
771  case AArch64::EONXrs:
772  case AArch64::EORWrs:
773  case AArch64::EORXrs:
774  case AArch64::ORNWrs:
775  case AArch64::ORNXrs:
776  case AArch64::ORRWrs:
777  case AArch64::ORRXrs:
778  case AArch64::SUBSWrs:
779  case AArch64::SUBSXrs:
780  case AArch64::SUBWrs:
781  case AArch64::SUBXrs:
782  Imm = MI.getOperand(3).getImm();
783  Shift = AArch64_AM::getShiftValue(Imm);
784  Ext = AArch64_AM::getShiftType(Imm);
785  return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::LSL));
786 
787  // WriteIEReg
788  case AArch64::ADDSWrx:
789  case AArch64::ADDSXrx:
790  case AArch64::ADDSXrx64:
791  case AArch64::ADDWrx:
792  case AArch64::ADDXrx:
793  case AArch64::ADDXrx64:
794  case AArch64::SUBSWrx:
795  case AArch64::SUBSXrx:
796  case AArch64::SUBSXrx64:
797  case AArch64::SUBWrx:
798  case AArch64::SUBXrx:
799  case AArch64::SUBXrx64:
800  Imm = MI.getOperand(3).getImm();
801  Shift = AArch64_AM::getArithShiftValue(Imm);
803  return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::UXTX));
804 
805  case AArch64::PRFMroW:
806  case AArch64::PRFMroX:
807 
808  // WriteLDIdx
809  case AArch64::LDRBBroW:
810  case AArch64::LDRBBroX:
811  case AArch64::LDRHHroW:
812  case AArch64::LDRHHroX:
813  case AArch64::LDRSBWroW:
814  case AArch64::LDRSBWroX:
815  case AArch64::LDRSBXroW:
816  case AArch64::LDRSBXroX:
817  case AArch64::LDRSHWroW:
818  case AArch64::LDRSHWroX:
819  case AArch64::LDRSHXroW:
820  case AArch64::LDRSHXroX:
821  case AArch64::LDRSWroW:
822  case AArch64::LDRSWroX:
823  case AArch64::LDRWroW:
824  case AArch64::LDRWroX:
825  case AArch64::LDRXroW:
826  case AArch64::LDRXroX:
827 
828  case AArch64::LDRBroW:
829  case AArch64::LDRBroX:
830  case AArch64::LDRDroW:
831  case AArch64::LDRDroX:
832  case AArch64::LDRHroW:
833  case AArch64::LDRHroX:
834  case AArch64::LDRSroW:
835  case AArch64::LDRSroX:
836 
837  // WriteSTIdx
838  case AArch64::STRBBroW:
839  case AArch64::STRBBroX:
840  case AArch64::STRHHroW:
841  case AArch64::STRHHroX:
842  case AArch64::STRWroW:
843  case AArch64::STRWroX:
844  case AArch64::STRXroW:
845  case AArch64::STRXroX:
846 
847  case AArch64::STRBroW:
848  case AArch64::STRBroX:
849  case AArch64::STRDroW:
850  case AArch64::STRDroX:
851  case AArch64::STRHroW:
852  case AArch64::STRHroX:
853  case AArch64::STRSroW:
854  case AArch64::STRSroX:
855  Imm = MI.getOperand(3).getImm();
856  Ext = AArch64_AM::getMemExtendType(Imm);
857  return (Ext == AArch64_AM::SXTX || Ext == AArch64_AM::UXTX);
858  }
859 }
860 
862  switch (MI.getOpcode()) {
863  default:
864  return false;
865 
866  case AArch64::ADDWrs:
867  case AArch64::ADDXrs:
868  case AArch64::ADDSWrs:
869  case AArch64::ADDSXrs: {
870  unsigned Imm = MI.getOperand(3).getImm();
871  unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
872  if (ShiftVal == 0)
873  return true;
874  return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
875  }
876 
877  case AArch64::ADDWrx:
878  case AArch64::ADDXrx:
879  case AArch64::ADDXrx64:
880  case AArch64::ADDSWrx:
881  case AArch64::ADDSXrx:
882  case AArch64::ADDSXrx64: {
883  unsigned Imm = MI.getOperand(3).getImm();
884  switch (AArch64_AM::getArithExtendType(Imm)) {
885  default:
886  return false;
887  case AArch64_AM::UXTB:
888  case AArch64_AM::UXTH:
889  case AArch64_AM::UXTW:
890  case AArch64_AM::UXTX:
891  return AArch64_AM::getArithShiftValue(Imm) <= 4;
892  }
893  }
894 
895  case AArch64::SUBWrs:
896  case AArch64::SUBSWrs: {
897  unsigned Imm = MI.getOperand(3).getImm();
898  unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
899  return ShiftVal == 0 ||
900  (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
901  }
902 
903  case AArch64::SUBXrs:
904  case AArch64::SUBSXrs: {
905  unsigned Imm = MI.getOperand(3).getImm();
906  unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
907  return ShiftVal == 0 ||
908  (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
909  }
910 
911  case AArch64::SUBWrx:
912  case AArch64::SUBXrx:
913  case AArch64::SUBXrx64:
914  case AArch64::SUBSWrx:
915  case AArch64::SUBSXrx:
916  case AArch64::SUBSXrx64: {
917  unsigned Imm = MI.getOperand(3).getImm();
918  switch (AArch64_AM::getArithExtendType(Imm)) {
919  default:
920  return false;
921  case AArch64_AM::UXTB:
922  case AArch64_AM::UXTH:
923  case AArch64_AM::UXTW:
924  case AArch64_AM::UXTX:
925  return AArch64_AM::getArithShiftValue(Imm) == 0;
926  }
927  }
928 
929  case AArch64::LDRBBroW:
930  case AArch64::LDRBBroX:
931  case AArch64::LDRBroW:
932  case AArch64::LDRBroX:
933  case AArch64::LDRDroW:
934  case AArch64::LDRDroX:
935  case AArch64::LDRHHroW:
936  case AArch64::LDRHHroX:
937  case AArch64::LDRHroW:
938  case AArch64::LDRHroX:
939  case AArch64::LDRQroW:
940  case AArch64::LDRQroX:
941  case AArch64::LDRSBWroW:
942  case AArch64::LDRSBWroX:
943  case AArch64::LDRSBXroW:
944  case AArch64::LDRSBXroX:
945  case AArch64::LDRSHWroW:
946  case AArch64::LDRSHWroX:
947  case AArch64::LDRSHXroW:
948  case AArch64::LDRSHXroX:
949  case AArch64::LDRSWroW:
950  case AArch64::LDRSWroX:
951  case AArch64::LDRSroW:
952  case AArch64::LDRSroX:
953  case AArch64::LDRWroW:
954  case AArch64::LDRWroX:
955  case AArch64::LDRXroW:
956  case AArch64::LDRXroX:
957  case AArch64::PRFMroW:
958  case AArch64::PRFMroX:
959  case AArch64::STRBBroW:
960  case AArch64::STRBBroX:
961  case AArch64::STRBroW:
962  case AArch64::STRBroX:
963  case AArch64::STRDroW:
964  case AArch64::STRDroX:
965  case AArch64::STRHHroW:
966  case AArch64::STRHHroX:
967  case AArch64::STRHroW:
968  case AArch64::STRHroX:
969  case AArch64::STRQroW:
970  case AArch64::STRQroX:
971  case AArch64::STRSroW:
972  case AArch64::STRSroX:
973  case AArch64::STRWroW:
974  case AArch64::STRWroX:
975  case AArch64::STRXroW:
976  case AArch64::STRXroX: {
977  unsigned IsSigned = MI.getOperand(3).getImm();
978  return !IsSigned;
979  }
980  }
981 }
982 
984  unsigned &SrcReg, unsigned &DstReg,
985  unsigned &SubIdx) const {
986  switch (MI.getOpcode()) {
987  default:
988  return false;
989  case AArch64::SBFMXri: // aka sxtw
990  case AArch64::UBFMXri: // aka uxtw
991  // Check for the 32 -> 64 bit extension case, these instructions can do
992  // much more.
993  if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
994  return false;
995  // This is a signed or unsigned 32 -> 64 bit extension.
996  SrcReg = MI.getOperand(1).getReg();
997  DstReg = MI.getOperand(0).getReg();
998  SubIdx = AArch64::sub_32;
999  return true;
1000  }
1001 }
1002 
1004  MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
1005  const TargetRegisterInfo *TRI = &getRegisterInfo();
1006  unsigned BaseRegA = 0, BaseRegB = 0;
1007  int64_t OffsetA = 0, OffsetB = 0;
1008  unsigned WidthA = 0, WidthB = 0;
1009 
1010  assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1011  assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1012 
1013  if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
1015  return false;
1016 
1017  // Retrieve the base register, offset from the base register and width. Width
1018  // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1019  // base registers are identical, and the offset of a lower memory access +
1020  // the width doesn't overlap the offset of a higher memory access,
1021  // then the memory accesses are different.
1022  if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
1023  getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
1024  if (BaseRegA == BaseRegB) {
1025  int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1026  int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1027  int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1028  if (LowOffset + LowWidth <= HighOffset)
1029  return true;
1030  }
1031  }
1032  return false;
1033 }
1034 
1035 /// analyzeCompare - For a comparison instruction, return the source registers
1036 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1037 /// Return true if the comparison instruction can be analyzed.
1038 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
1039  unsigned &SrcReg2, int &CmpMask,
1040  int &CmpValue) const {
1041  // The first operand can be a frame index where we'd normally expect a
1042  // register.
1043  assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1044  if (!MI.getOperand(1).isReg())
1045  return false;
1046 
1047  switch (MI.getOpcode()) {
1048  default:
1049  break;
1050  case AArch64::SUBSWrr:
1051  case AArch64::SUBSWrs:
1052  case AArch64::SUBSWrx:
1053  case AArch64::SUBSXrr:
1054  case AArch64::SUBSXrs:
1055  case AArch64::SUBSXrx:
1056  case AArch64::ADDSWrr:
1057  case AArch64::ADDSWrs:
1058  case AArch64::ADDSWrx:
1059  case AArch64::ADDSXrr:
1060  case AArch64::ADDSXrs:
1061  case AArch64::ADDSXrx:
1062  // Replace SUBSWrr with SUBWrr if NZCV is not used.
1063  SrcReg = MI.getOperand(1).getReg();
1064  SrcReg2 = MI.getOperand(2).getReg();
1065  CmpMask = ~0;
1066  CmpValue = 0;
1067  return true;
1068  case AArch64::SUBSWri:
1069  case AArch64::ADDSWri:
1070  case AArch64::SUBSXri:
1071  case AArch64::ADDSXri:
1072  SrcReg = MI.getOperand(1).getReg();
1073  SrcReg2 = 0;
1074  CmpMask = ~0;
1075  // FIXME: In order to convert CmpValue to 0 or 1
1076  CmpValue = MI.getOperand(2).getImm() != 0;
1077  return true;
1078  case AArch64::ANDSWri:
1079  case AArch64::ANDSXri:
1080  // ANDS does not use the same encoding scheme as the others xxxS
1081  // instructions.
1082  SrcReg = MI.getOperand(1).getReg();
1083  SrcReg2 = 0;
1084  CmpMask = ~0;
1085  // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
1086  // while the type of CmpValue is int. When converting uint64_t to int,
1087  // the high 32 bits of uint64_t will be lost.
1088  // In fact it causes a bug in spec2006-483.xalancbmk
1089  // CmpValue is only used to compare with zero in OptimizeCompareInstr
1091  MI.getOperand(2).getImm(),
1092  MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
1093  return true;
1094  }
1095 
1096  return false;
1097 }
1098 
1100  MachineBasicBlock *MBB = Instr.getParent();
1101  assert(MBB && "Can't get MachineBasicBlock here");
1102  MachineFunction *MF = MBB->getParent();
1103  assert(MF && "Can't get MachineFunction here");
1104  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1105  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1107 
1108  for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1109  ++OpIdx) {
1110  MachineOperand &MO = Instr.getOperand(OpIdx);
1111  const TargetRegisterClass *OpRegCstraints =
1112  Instr.getRegClassConstraint(OpIdx, TII, TRI);
1113 
1114  // If there's no constraint, there's nothing to do.
1115  if (!OpRegCstraints)
1116  continue;
1117  // If the operand is a frame index, there's nothing to do here.
1118  // A frame index operand will resolve correctly during PEI.
1119  if (MO.isFI())
1120  continue;
1121 
1122  assert(MO.isReg() &&
1123  "Operand has register constraints without being a register!");
1124 
1125  unsigned Reg = MO.getReg();
1127  if (!OpRegCstraints->contains(Reg))
1128  return false;
1129  } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1130  !MRI->constrainRegClass(Reg, OpRegCstraints))
1131  return false;
1132  }
1133 
1134  return true;
1135 }
1136 
1137 /// \brief Return the opcode that does not set flags when possible - otherwise
1138 /// return the original opcode. The caller is responsible to do the actual
1139 /// substitution and legality checking.
1140 static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
1141  // Don't convert all compare instructions, because for some the zero register
1142  // encoding becomes the sp register.
1143  bool MIDefinesZeroReg = false;
1144  if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
1145  MIDefinesZeroReg = true;
1146 
1147  switch (MI.getOpcode()) {
1148  default:
1149  return MI.getOpcode();
1150  case AArch64::ADDSWrr:
1151  return AArch64::ADDWrr;
1152  case AArch64::ADDSWri:
1153  return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1154  case AArch64::ADDSWrs:
1155  return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1156  case AArch64::ADDSWrx:
1157  return AArch64::ADDWrx;
1158  case AArch64::ADDSXrr:
1159  return AArch64::ADDXrr;
1160  case AArch64::ADDSXri:
1161  return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1162  case AArch64::ADDSXrs:
1163  return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1164  case AArch64::ADDSXrx:
1165  return AArch64::ADDXrx;
1166  case AArch64::SUBSWrr:
1167  return AArch64::SUBWrr;
1168  case AArch64::SUBSWri:
1169  return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1170  case AArch64::SUBSWrs:
1171  return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1172  case AArch64::SUBSWrx:
1173  return AArch64::SUBWrx;
1174  case AArch64::SUBSXrr:
1175  return AArch64::SUBXrr;
1176  case AArch64::SUBSXri:
1177  return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1178  case AArch64::SUBSXrs:
1179  return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1180  case AArch64::SUBSXrx:
1181  return AArch64::SUBXrx;
1182  }
1183 }
1184 
1185 enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1186 
1187 /// True when condition flags are accessed (either by writing or reading)
1188 /// on the instruction trace starting at From and ending at To.
1189 ///
1190 /// Note: If From and To are from different blocks it's assumed CC are accessed
1191 /// on the path.
1194  const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1195  // Early exit if To is at the beginning of the BB.
1196  if (To == To->getParent()->begin())
1197  return true;
1198 
1199  // Check whether the instructions are in the same basic block
1200  // If not, assume the condition flags might get modified somewhere.
1201  if (To->getParent() != From->getParent())
1202  return true;
1203 
1204  // From must be above To.
1205  assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
1206  [From](MachineInstr &MI) {
1207  return MI.getIterator() == From;
1208  }) != To->getParent()->rend());
1209 
1210  // We iterate backward starting \p To until we hit \p From.
1211  for (--To; To != From; --To) {
1212  const MachineInstr &Instr = *To;
1213 
1214  if (((AccessToCheck & AK_Write) &&
1215  Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1216  ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1217  return true;
1218  }
1219  return false;
1220 }
1221 
1222 /// Try to optimize a compare instruction. A compare instruction is an
1223 /// instruction which produces AArch64::NZCV. It can be truly compare
1224 /// instruction
1225 /// when there are no uses of its destination register.
1226 ///
1227 /// The following steps are tried in order:
1228 /// 1. Convert CmpInstr into an unconditional version.
1229 /// 2. Remove CmpInstr if above there is an instruction producing a needed
1230 /// condition code or an instruction which can be converted into such an
1231 /// instruction.
1232 /// Only comparison with zero is supported.
1234  MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
1235  int CmpValue, const MachineRegisterInfo *MRI) const {
1236  assert(CmpInstr.getParent());
1237  assert(MRI);
1238 
1239  // Replace SUBSWrr with SUBWrr if NZCV is not used.
1240  int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
1241  if (DeadNZCVIdx != -1) {
1242  if (CmpInstr.definesRegister(AArch64::WZR) ||
1243  CmpInstr.definesRegister(AArch64::XZR)) {
1244  CmpInstr.eraseFromParent();
1245  return true;
1246  }
1247  unsigned Opc = CmpInstr.getOpcode();
1248  unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1249  if (NewOpc == Opc)
1250  return false;
1251  const MCInstrDesc &MCID = get(NewOpc);
1252  CmpInstr.setDesc(MCID);
1253  CmpInstr.RemoveOperand(DeadNZCVIdx);
1254  bool succeeded = UpdateOperandRegClass(CmpInstr);
1255  (void)succeeded;
1256  assert(succeeded && "Some operands reg class are incompatible!");
1257  return true;
1258  }
1259 
1260  // Continue only if we have a "ri" where immediate is zero.
1261  // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1262  // function.
1263  assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
1264  if (CmpValue != 0 || SrcReg2 != 0)
1265  return false;
1266 
1267  // CmpInstr is a Compare instruction if destination register is not used.
1268  if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1269  return false;
1270 
1271  return substituteCmpToZero(CmpInstr, SrcReg, MRI);
1272 }
1273 
1274 /// Get opcode of S version of Instr.
1275 /// If Instr is S version its opcode is returned.
1276 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1277 /// or we are not interested in it.
1278 static unsigned sForm(MachineInstr &Instr) {
1279  switch (Instr.getOpcode()) {
1280  default:
1281  return AArch64::INSTRUCTION_LIST_END;
1282 
1283  case AArch64::ADDSWrr:
1284  case AArch64::ADDSWri:
1285  case AArch64::ADDSXrr:
1286  case AArch64::ADDSXri:
1287  case AArch64::SUBSWrr:
1288  case AArch64::SUBSWri:
1289  case AArch64::SUBSXrr:
1290  case AArch64::SUBSXri:
1291  return Instr.getOpcode();
1292 
1293  case AArch64::ADDWrr:
1294  return AArch64::ADDSWrr;
1295  case AArch64::ADDWri:
1296  return AArch64::ADDSWri;
1297  case AArch64::ADDXrr:
1298  return AArch64::ADDSXrr;
1299  case AArch64::ADDXri:
1300  return AArch64::ADDSXri;
1301  case AArch64::ADCWr:
1302  return AArch64::ADCSWr;
1303  case AArch64::ADCXr:
1304  return AArch64::ADCSXr;
1305  case AArch64::SUBWrr:
1306  return AArch64::SUBSWrr;
1307  case AArch64::SUBWri:
1308  return AArch64::SUBSWri;
1309  case AArch64::SUBXrr:
1310  return AArch64::SUBSXrr;
1311  case AArch64::SUBXri:
1312  return AArch64::SUBSXri;
1313  case AArch64::SBCWr:
1314  return AArch64::SBCSWr;
1315  case AArch64::SBCXr:
1316  return AArch64::SBCSXr;
1317  case AArch64::ANDWri:
1318  return AArch64::ANDSWri;
1319  case AArch64::ANDXri:
1320  return AArch64::ANDSXri;
1321  }
1322 }
1323 
1324 /// Check if AArch64::NZCV should be alive in successors of MBB.
1326  for (auto *BB : MBB->successors())
1327  if (BB->isLiveIn(AArch64::NZCV))
1328  return true;
1329  return false;
1330 }
1331 
1332 namespace {
1333 
1334 struct UsedNZCV {
1335  bool N = false;
1336  bool Z = false;
1337  bool C = false;
1338  bool V = false;
1339 
1340  UsedNZCV() = default;
1341 
1342  UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
1343  this->N |= UsedFlags.N;
1344  this->Z |= UsedFlags.Z;
1345  this->C |= UsedFlags.C;
1346  this->V |= UsedFlags.V;
1347  return *this;
1348  }
1349 };
1350 
1351 } // end anonymous namespace
1352 
1353 /// Find a condition code used by the instruction.
1354 /// Returns AArch64CC::Invalid if either the instruction does not use condition
1355 /// codes or we don't optimize CmpInstr in the presence of such instructions.
1357  switch (Instr.getOpcode()) {
1358  default:
1359  return AArch64CC::Invalid;
1360 
1361  case AArch64::Bcc: {
1362  int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1363  assert(Idx >= 2);
1364  return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1365  }
1366 
1367  case AArch64::CSINVWr:
1368  case AArch64::CSINVXr:
1369  case AArch64::CSINCWr:
1370  case AArch64::CSINCXr:
1371  case AArch64::CSELWr:
1372  case AArch64::CSELXr:
1373  case AArch64::CSNEGWr:
1374  case AArch64::CSNEGXr:
1375  case AArch64::FCSELSrrr:
1376  case AArch64::FCSELDrrr: {
1377  int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1378  assert(Idx >= 1);
1379  return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1380  }
1381  }
1382 }
1383 
1384 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1385  assert(CC != AArch64CC::Invalid);
1386  UsedNZCV UsedFlags;
1387  switch (CC) {
1388  default:
1389  break;
1390 
1391  case AArch64CC::EQ: // Z set
1392  case AArch64CC::NE: // Z clear
1393  UsedFlags.Z = true;
1394  break;
1395 
1396  case AArch64CC::HI: // Z clear and C set
1397  case AArch64CC::LS: // Z set or C clear
1398  UsedFlags.Z = true;
1400  case AArch64CC::HS: // C set
1401  case AArch64CC::LO: // C clear
1402  UsedFlags.C = true;
1403  break;
1404 
1405  case AArch64CC::MI: // N set
1406  case AArch64CC::PL: // N clear
1407  UsedFlags.N = true;
1408  break;
1409 
1410  case AArch64CC::VS: // V set
1411  case AArch64CC::VC: // V clear
1412  UsedFlags.V = true;
1413  break;
1414 
1415  case AArch64CC::GT: // Z clear, N and V the same
1416  case AArch64CC::LE: // Z set, N and V differ
1417  UsedFlags.Z = true;
1419  case AArch64CC::GE: // N and V the same
1420  case AArch64CC::LT: // N and V differ
1421  UsedFlags.N = true;
1422  UsedFlags.V = true;
1423  break;
1424  }
1425  return UsedFlags;
1426 }
1427 
1428 static bool isADDSRegImm(unsigned Opcode) {
1429  return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1430 }
1431 
1432 static bool isSUBSRegImm(unsigned Opcode) {
1433  return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1434 }
1435 
1436 /// Check if CmpInstr can be substituted by MI.
1437 ///
1438 /// CmpInstr can be substituted:
1439 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1440 /// - and, MI and CmpInstr are from the same MachineBB
1441 /// - and, condition flags are not alive in successors of the CmpInstr parent
1442 /// - and, if MI opcode is the S form there must be no defs of flags between
1443 /// MI and CmpInstr
1444 /// or if MI opcode is not the S form there must be neither defs of flags
1445 /// nor uses of flags between MI and CmpInstr.
1446 /// - and C/V flags are not used after CmpInstr
1448  const TargetRegisterInfo *TRI) {
1449  assert(MI);
1450  assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1451  assert(CmpInstr);
1452 
1453  const unsigned CmpOpcode = CmpInstr->getOpcode();
1454  if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1455  return false;
1456 
1457  if (MI->getParent() != CmpInstr->getParent())
1458  return false;
1459 
1460  if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1461  return false;
1462 
1463  AccessKind AccessToCheck = AK_Write;
1464  if (sForm(*MI) != MI->getOpcode())
1465  AccessToCheck = AK_All;
1466  if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1467  return false;
1468 
1469  UsedNZCV NZCVUsedAfterCmp;
1470  for (auto I = std::next(CmpInstr->getIterator()),
1471  E = CmpInstr->getParent()->instr_end();
1472  I != E; ++I) {
1473  const MachineInstr &Instr = *I;
1474  if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1476  if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1477  return false;
1478  NZCVUsedAfterCmp |= getUsedNZCV(CC);
1479  }
1480 
1481  if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1482  break;
1483  }
1484 
1485  return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1486 }
1487 
1488 /// Substitute an instruction comparing to zero with another instruction
1489 /// which produces needed condition flags.
1490 ///
1491 /// Return true on success.
1492 bool AArch64InstrInfo::substituteCmpToZero(
1493  MachineInstr &CmpInstr, unsigned SrcReg,
1494  const MachineRegisterInfo *MRI) const {
1495  assert(MRI);
1496  // Get the unique definition of SrcReg.
1497  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1498  if (!MI)
1499  return false;
1500 
1501  const TargetRegisterInfo *TRI = &getRegisterInfo();
1502 
1503  unsigned NewOpc = sForm(*MI);
1504  if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1505  return false;
1506 
1507  if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
1508  return false;
1509 
1510  // Update the instruction to set NZCV.
1511  MI->setDesc(get(NewOpc));
1512  CmpInstr.eraseFromParent();
1513  bool succeeded = UpdateOperandRegClass(*MI);
1514  (void)succeeded;
1515  assert(succeeded && "Some operands reg class are incompatible!");
1516  MI->addRegisterDefined(AArch64::NZCV, TRI);
1517  return true;
1518 }
1519 
1521  if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
1522  return false;
1523 
1524  MachineBasicBlock &MBB = *MI.getParent();
1525  DebugLoc DL = MI.getDebugLoc();
1526  unsigned Reg = MI.getOperand(0).getReg();
1527  const GlobalValue *GV =
1528  cast<GlobalValue>((*MI.memoperands_begin())->getValue());
1529  const TargetMachine &TM = MBB.getParent()->getTarget();
1530  unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1531  const unsigned char MO_NC = AArch64II::MO_NC;
1532 
1533  if ((OpFlags & AArch64II::MO_GOT) != 0) {
1534  BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1535  .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
1536  BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1537  .addReg(Reg, RegState::Kill)
1538  .addImm(0)
1540  } else if (TM.getCodeModel() == CodeModel::Large) {
1541  BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1542  .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
1543  .addImm(0);
1544  BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1545  .addReg(Reg, RegState::Kill)
1546  .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
1547  .addImm(16);
1548  BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1549  .addReg(Reg, RegState::Kill)
1550  .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
1551  .addImm(32);
1552  BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1553  .addReg(Reg, RegState::Kill)
1555  .addImm(48);
1556  BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1557  .addReg(Reg, RegState::Kill)
1558  .addImm(0)
1560  } else {
1561  BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1562  .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1563  unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1564  BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1565  .addReg(Reg, RegState::Kill)
1566  .addGlobalAddress(GV, 0, LoFlags)
1568  }
1569 
1570  MBB.erase(MI);
1571 
1572  return true;
1573 }
1574 
1575 /// Return true if this is this instruction has a non-zero immediate
1577  switch (MI.getOpcode()) {
1578  default:
1579  break;
1580  case AArch64::ADDSWrs:
1581  case AArch64::ADDSXrs:
1582  case AArch64::ADDWrs:
1583  case AArch64::ADDXrs:
1584  case AArch64::ANDSWrs:
1585  case AArch64::ANDSXrs:
1586  case AArch64::ANDWrs:
1587  case AArch64::ANDXrs:
1588  case AArch64::BICSWrs:
1589  case AArch64::BICSXrs:
1590  case AArch64::BICWrs:
1591  case AArch64::BICXrs:
1592  case AArch64::EONWrs:
1593  case AArch64::EONXrs:
1594  case AArch64::EORWrs:
1595  case AArch64::EORXrs:
1596  case AArch64::ORNWrs:
1597  case AArch64::ORNXrs:
1598  case AArch64::ORRWrs:
1599  case AArch64::ORRXrs:
1600  case AArch64::SUBSWrs:
1601  case AArch64::SUBSXrs:
1602  case AArch64::SUBWrs:
1603  case AArch64::SUBXrs:
1604  if (MI.getOperand(3).isImm()) {
1605  unsigned val = MI.getOperand(3).getImm();
1606  return (val != 0);
1607  }
1608  break;
1609  }
1610  return false;
1611 }
1612 
1613 /// Return true if this is this instruction has a non-zero immediate
1615  switch (MI.getOpcode()) {
1616  default:
1617  break;
1618  case AArch64::ADDSWrx:
1619  case AArch64::ADDSXrx:
1620  case AArch64::ADDSXrx64:
1621  case AArch64::ADDWrx:
1622  case AArch64::ADDXrx:
1623  case AArch64::ADDXrx64:
1624  case AArch64::SUBSWrx:
1625  case AArch64::SUBSXrx:
1626  case AArch64::SUBSXrx64:
1627  case AArch64::SUBWrx:
1628  case AArch64::SUBXrx:
1629  case AArch64::SUBXrx64:
1630  if (MI.getOperand(3).isImm()) {
1631  unsigned val = MI.getOperand(3).getImm();
1632  return (val != 0);
1633  }
1634  break;
1635  }
1636 
1637  return false;
1638 }
1639 
1640 // Return true if this instruction simply sets its single destination register
1641 // to zero. This is equivalent to a register rename of the zero-register.
1643  switch (MI.getOpcode()) {
1644  default:
1645  break;
1646  case AArch64::MOVZWi:
1647  case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
1648  if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1649  assert(MI.getDesc().getNumOperands() == 3 &&
1650  MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1651  return true;
1652  }
1653  break;
1654  case AArch64::ANDWri: // and Rd, Rzr, #imm
1655  return MI.getOperand(1).getReg() == AArch64::WZR;
1656  case AArch64::ANDXri:
1657  return MI.getOperand(1).getReg() == AArch64::XZR;
1658  case TargetOpcode::COPY:
1659  return MI.getOperand(1).getReg() == AArch64::WZR;
1660  }
1661  return false;
1662 }
1663 
1664 // Return true if this instruction simply renames a general register without
1665 // modifying bits.
1667  switch (MI.getOpcode()) {
1668  default:
1669  break;
1670  case TargetOpcode::COPY: {
1671  // GPR32 copies will by lowered to ORRXrs
1672  unsigned DstReg = MI.getOperand(0).getReg();
1673  return (AArch64::GPR32RegClass.contains(DstReg) ||
1674  AArch64::GPR64RegClass.contains(DstReg));
1675  }
1676  case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
1677  if (MI.getOperand(1).getReg() == AArch64::XZR) {
1678  assert(MI.getDesc().getNumOperands() == 4 &&
1679  MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1680  return true;
1681  }
1682  break;
1683  case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
1684  if (MI.getOperand(2).getImm() == 0) {
1685  assert(MI.getDesc().getNumOperands() == 4 &&
1686  MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1687  return true;
1688  }
1689  break;
1690  }
1691  return false;
1692 }
1693 
1694 // Return true if this instruction simply renames a general register without
1695 // modifying bits.
1697  switch (MI.getOpcode()) {
1698  default:
1699  break;
1700  case TargetOpcode::COPY: {
1701  // FPR64 copies will by lowered to ORR.16b
1702  unsigned DstReg = MI.getOperand(0).getReg();
1703  return (AArch64::FPR64RegClass.contains(DstReg) ||
1704  AArch64::FPR128RegClass.contains(DstReg));
1705  }
1706  case AArch64::ORRv16i8:
1707  if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1708  assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
1709  "invalid ORRv16i8 operands");
1710  return true;
1711  }
1712  break;
1713  }
1714  return false;
1715 }
1716 
1718  int &FrameIndex) const {
1719  switch (MI.getOpcode()) {
1720  default:
1721  break;
1722  case AArch64::LDRWui:
1723  case AArch64::LDRXui:
1724  case AArch64::LDRBui:
1725  case AArch64::LDRHui:
1726  case AArch64::LDRSui:
1727  case AArch64::LDRDui:
1728  case AArch64::LDRQui:
1729  if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1730  MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1731  FrameIndex = MI.getOperand(1).getIndex();
1732  return MI.getOperand(0).getReg();
1733  }
1734  break;
1735  }
1736 
1737  return 0;
1738 }
1739 
1741  int &FrameIndex) const {
1742  switch (MI.getOpcode()) {
1743  default:
1744  break;
1745  case AArch64::STRWui:
1746  case AArch64::STRXui:
1747  case AArch64::STRBui:
1748  case AArch64::STRHui:
1749  case AArch64::STRSui:
1750  case AArch64::STRDui:
1751  case AArch64::STRQui:
1752  if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1753  MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1754  FrameIndex = MI.getOperand(1).getIndex();
1755  return MI.getOperand(0).getReg();
1756  }
1757  break;
1758  }
1759  return 0;
1760 }
1761 
1762 /// Return true if this is load/store scales or extends its register offset.
1763 /// This refers to scaling a dynamic index as opposed to scaled immediates.
1764 /// MI should be a memory op that allows scaled addressing.
1766  switch (MI.getOpcode()) {
1767  default:
1768  break;
1769  case AArch64::LDRBBroW:
1770  case AArch64::LDRBroW:
1771  case AArch64::LDRDroW:
1772  case AArch64::LDRHHroW:
1773  case AArch64::LDRHroW:
1774  case AArch64::LDRQroW:
1775  case AArch64::LDRSBWroW:
1776  case AArch64::LDRSBXroW:
1777  case AArch64::LDRSHWroW:
1778  case AArch64::LDRSHXroW:
1779  case AArch64::LDRSWroW:
1780  case AArch64::LDRSroW:
1781  case AArch64::LDRWroW:
1782  case AArch64::LDRXroW:
1783  case AArch64::STRBBroW:
1784  case AArch64::STRBroW:
1785  case AArch64::STRDroW:
1786  case AArch64::STRHHroW:
1787  case AArch64::STRHroW:
1788  case AArch64::STRQroW:
1789  case AArch64::STRSroW:
1790  case AArch64::STRWroW:
1791  case AArch64::STRXroW:
1792  case AArch64::LDRBBroX:
1793  case AArch64::LDRBroX:
1794  case AArch64::LDRDroX:
1795  case AArch64::LDRHHroX:
1796  case AArch64::LDRHroX:
1797  case AArch64::LDRQroX:
1798  case AArch64::LDRSBWroX:
1799  case AArch64::LDRSBXroX:
1800  case AArch64::LDRSHWroX:
1801  case AArch64::LDRSHXroX:
1802  case AArch64::LDRSWroX:
1803  case AArch64::LDRSroX:
1804  case AArch64::LDRWroX:
1805  case AArch64::LDRXroX:
1806  case AArch64::STRBBroX:
1807  case AArch64::STRBroX:
1808  case AArch64::STRDroX:
1809  case AArch64::STRHHroX:
1810  case AArch64::STRHroX:
1811  case AArch64::STRQroX:
1812  case AArch64::STRSroX:
1813  case AArch64::STRWroX:
1814  case AArch64::STRXroX:
1815 
1816  unsigned Val = MI.getOperand(3).getImm();
1818  return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
1819  }
1820  return false;
1821 }
1822 
1823 /// Check all MachineMemOperands for a hint to suppress pairing.
1825  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1826  return MMO->getFlags() & MOSuppressPair;
1827  });
1828 }
1829 
1830 /// Set a flag on the first MachineMemOperand to suppress pairing.
1832  if (MI.memoperands_empty())
1833  return;
1834  (*MI.memoperands_begin())->setFlags(MOSuppressPair);
1835 }
1836 
1837 /// Check all MachineMemOperands for a hint that the load/store is strided.
1839  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1840  return MMO->getFlags() & MOStridedAccess;
1841  });
1842 }
1843 
1844 bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) const {
1845  switch (Opc) {
1846  default:
1847  return false;
1848  case AArch64::STURSi:
1849  case AArch64::STURDi:
1850  case AArch64::STURQi:
1851  case AArch64::STURBBi:
1852  case AArch64::STURHHi:
1853  case AArch64::STURWi:
1854  case AArch64::STURXi:
1855  case AArch64::LDURSi:
1856  case AArch64::LDURDi:
1857  case AArch64::LDURQi:
1858  case AArch64::LDURWi:
1859  case AArch64::LDURXi:
1860  case AArch64::LDURSWi:
1861  case AArch64::LDURHHi:
1862  case AArch64::LDURBBi:
1863  case AArch64::LDURSBWi:
1864  case AArch64::LDURSHWi:
1865  return true;
1866  }
1867 }
1868 
1870  return isUnscaledLdSt(MI.getOpcode());
1871 }
1872 
1873 // Is this a candidate for ld/st merging or pairing? For example, we don't
1874 // touch volatiles or load/stores that have a hint to avoid pair formation.
1876  // If this is a volatile load/store, don't mess with it.
1877  if (MI.hasOrderedMemoryRef())
1878  return false;
1879 
1880  // Make sure this is a reg+imm (as opposed to an address reloc).
1881  assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
1882  if (!MI.getOperand(2).isImm())
1883  return false;
1884 
1885  // Can't merge/pair if the instruction modifies the base register.
1886  // e.g., ldr x0, [x0]
1887  unsigned BaseReg = MI.getOperand(1).getReg();
1888  const TargetRegisterInfo *TRI = &getRegisterInfo();
1889  if (MI.modifiesRegister(BaseReg, TRI))
1890  return false;
1891 
1892  // Check if this load/store has a hint to avoid pair formation.
1893  // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
1894  if (isLdStPairSuppressed(MI))
1895  return false;
1896 
1897  // On some CPUs quad load/store pairs are slower than two single load/stores.
1898  if (Subtarget.isPaired128Slow()) {
1899  switch (MI.getOpcode()) {
1900  default:
1901  break;
1902  case AArch64::LDURQi:
1903  case AArch64::STURQi:
1904  case AArch64::LDRQui:
1905  case AArch64::STRQui:
1906  return false;
1907  }
1908  }
1909 
1910  return true;
1911 }
1912 
1914  MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
1915  const TargetRegisterInfo *TRI) const {
1916  unsigned Width;
1917  return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
1918 }
1919 
1921  MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
1922  const TargetRegisterInfo *TRI) const {
1923  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
1924  // Handle only loads/stores with base register followed by immediate offset.
1925  if (LdSt.getNumExplicitOperands() == 3) {
1926  // Non-paired instruction (e.g., ldr x1, [x0, #8]).
1927  if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
1928  return false;
1929  } else if (LdSt.getNumExplicitOperands() == 4) {
1930  // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
1931  if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
1932  !LdSt.getOperand(3).isImm())
1933  return false;
1934  } else
1935  return false;
1936 
1937  // Get the scaling factor for the instruction and set the width for the
1938  // instruction.
1939  unsigned Scale = 0;
1940  int64_t Dummy1, Dummy2;
1941 
1942  // If this returns false, then it's an instruction we don't want to handle.
1943  if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
1944  return false;
1945 
1946  // Compute the offset. Offset is calculated as the immediate operand
1947  // multiplied by the scaling factor. Unscaled instructions have scaling factor
1948  // set to 1.
1949  if (LdSt.getNumExplicitOperands() == 3) {
1950  BaseReg = LdSt.getOperand(1).getReg();
1951  Offset = LdSt.getOperand(2).getImm() * Scale;
1952  } else {
1953  assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
1954  BaseReg = LdSt.getOperand(2).getReg();
1955  Offset = LdSt.getOperand(3).getImm() * Scale;
1956  }
1957  return true;
1958 }
1959 
1962  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
1963  MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
1964  assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
1965  return OfsOp;
1966 }
1967 
1968 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
1969  unsigned &Width, int64_t &MinOffset,
1970  int64_t &MaxOffset) const {
1971  switch (Opcode) {
1972  // Not a memory operation or something we want to handle.
1973  default:
1974  Scale = Width = 0;
1975  MinOffset = MaxOffset = 0;
1976  return false;
1977  case AArch64::STRWpost:
1978  case AArch64::LDRWpost:
1979  Width = 32;
1980  Scale = 4;
1981  MinOffset = -256;
1982  MaxOffset = 255;
1983  break;
1984  case AArch64::LDURQi:
1985  case AArch64::STURQi:
1986  Width = 16;
1987  Scale = 1;
1988  MinOffset = -256;
1989  MaxOffset = 255;
1990  break;
1991  case AArch64::LDURXi:
1992  case AArch64::LDURDi:
1993  case AArch64::STURXi:
1994  case AArch64::STURDi:
1995  Width = 8;
1996  Scale = 1;
1997  MinOffset = -256;
1998  MaxOffset = 255;
1999  break;
2000  case AArch64::LDURWi:
2001  case AArch64::LDURSi:
2002  case AArch64::LDURSWi:
2003  case AArch64::STURWi:
2004  case AArch64::STURSi:
2005  Width = 4;
2006  Scale = 1;
2007  MinOffset = -256;
2008  MaxOffset = 255;
2009  break;
2010  case AArch64::LDURHi:
2011  case AArch64::LDURHHi:
2012  case AArch64::LDURSHXi:
2013  case AArch64::LDURSHWi:
2014  case AArch64::STURHi:
2015  case AArch64::STURHHi:
2016  Width = 2;
2017  Scale = 1;
2018  MinOffset = -256;
2019  MaxOffset = 255;
2020  break;
2021  case AArch64::LDURBi:
2022  case AArch64::LDURBBi:
2023  case AArch64::LDURSBXi:
2024  case AArch64::LDURSBWi:
2025  case AArch64::STURBi:
2026  case AArch64::STURBBi:
2027  Width = 1;
2028  Scale = 1;
2029  MinOffset = -256;
2030  MaxOffset = 255;
2031  break;
2032  case AArch64::LDPQi:
2033  case AArch64::LDNPQi:
2034  case AArch64::STPQi:
2035  case AArch64::STNPQi:
2036  Scale = 16;
2037  Width = 32;
2038  MinOffset = -64;
2039  MaxOffset = 63;
2040  break;
2041  case AArch64::LDRQui:
2042  case AArch64::STRQui:
2043  Scale = Width = 16;
2044  MinOffset = 0;
2045  MaxOffset = 4095;
2046  break;
2047  case AArch64::LDPXi:
2048  case AArch64::LDPDi:
2049  case AArch64::LDNPXi:
2050  case AArch64::LDNPDi:
2051  case AArch64::STPXi:
2052  case AArch64::STPDi:
2053  case AArch64::STNPXi:
2054  case AArch64::STNPDi:
2055  Scale = 8;
2056  Width = 16;
2057  MinOffset = -64;
2058  MaxOffset = 63;
2059  break;
2060  case AArch64::LDRXui:
2061  case AArch64::LDRDui:
2062  case AArch64::STRXui:
2063  case AArch64::STRDui:
2064  Scale = Width = 8;
2065  MinOffset = 0;
2066  MaxOffset = 4095;
2067  break;
2068  case AArch64::LDPWi:
2069  case AArch64::LDPSi:
2070  case AArch64::LDNPWi:
2071  case AArch64::LDNPSi:
2072  case AArch64::STPWi:
2073  case AArch64::STPSi:
2074  case AArch64::STNPWi:
2075  case AArch64::STNPSi:
2076  Scale = 4;
2077  Width = 8;
2078  MinOffset = -64;
2079  MaxOffset = 63;
2080  break;
2081  case AArch64::LDRWui:
2082  case AArch64::LDRSui:
2083  case AArch64::LDRSWui:
2084  case AArch64::STRWui:
2085  case AArch64::STRSui:
2086  Scale = Width = 4;
2087  MinOffset = 0;
2088  MaxOffset = 4095;
2089  break;
2090  case AArch64::LDRHui:
2091  case AArch64::LDRHHui:
2092  case AArch64::STRHui:
2093  case AArch64::STRHHui:
2094  Scale = Width = 2;
2095  MinOffset = 0;
2096  MaxOffset = 4095;
2097  break;
2098  case AArch64::LDRBui:
2099  case AArch64::LDRBBui:
2100  case AArch64::STRBui:
2101  case AArch64::STRBBui:
2102  Scale = Width = 1;
2103  MinOffset = 0;
2104  MaxOffset = 4095;
2105  break;
2106  }
2107 
2108  return true;
2109 }
2110 
2111 // Scale the unscaled offsets. Returns false if the unscaled offset can't be
2112 // scaled.
2113 static bool scaleOffset(unsigned Opc, int64_t &Offset) {
2114  unsigned OffsetStride = 1;
2115  switch (Opc) {
2116  default:
2117  return false;
2118  case AArch64::LDURQi:
2119  case AArch64::STURQi:
2120  OffsetStride = 16;
2121  break;
2122  case AArch64::LDURXi:
2123  case AArch64::LDURDi:
2124  case AArch64::STURXi:
2125  case AArch64::STURDi:
2126  OffsetStride = 8;
2127  break;
2128  case AArch64::LDURWi:
2129  case AArch64::LDURSi:
2130  case AArch64::LDURSWi:
2131  case AArch64::STURWi:
2132  case AArch64::STURSi:
2133  OffsetStride = 4;
2134  break;
2135  }
2136  // If the byte-offset isn't a multiple of the stride, we can't scale this
2137  // offset.
2138  if (Offset % OffsetStride != 0)
2139  return false;
2140 
2141  // Convert the byte-offset used by unscaled into an "element" offset used
2142  // by the scaled pair load/store instructions.
2143  Offset /= OffsetStride;
2144  return true;
2145 }
2146 
2147 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
2148  if (FirstOpc == SecondOpc)
2149  return true;
2150  // We can also pair sign-ext and zero-ext instructions.
2151  switch (FirstOpc) {
2152  default:
2153  return false;
2154  case AArch64::LDRWui:
2155  case AArch64::LDURWi:
2156  return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
2157  case AArch64::LDRSWui:
2158  case AArch64::LDURSWi:
2159  return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
2160  }
2161  // These instructions can't be paired based on their opcodes.
2162  return false;
2163 }
2164 
2165 /// Detect opportunities for ldp/stp formation.
2166 ///
2167 /// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
2169  unsigned BaseReg1,
2170  MachineInstr &SecondLdSt,
2171  unsigned BaseReg2,
2172  unsigned NumLoads) const {
2173  if (BaseReg1 != BaseReg2)
2174  return false;
2175 
2176  // Only cluster up to a single pair.
2177  if (NumLoads > 1)
2178  return false;
2179 
2180  if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
2181  return false;
2182 
2183  // Can we pair these instructions based on their opcodes?
2184  unsigned FirstOpc = FirstLdSt.getOpcode();
2185  unsigned SecondOpc = SecondLdSt.getOpcode();
2186  if (!canPairLdStOpc(FirstOpc, SecondOpc))
2187  return false;
2188 
2189  // Can't merge volatiles or load/stores that have a hint to avoid pair
2190  // formation, for example.
2191  if (!isCandidateToMergeOrPair(FirstLdSt) ||
2192  !isCandidateToMergeOrPair(SecondLdSt))
2193  return false;
2194 
2195  // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
2196  int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
2197  if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
2198  return false;
2199 
2200  int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
2201  if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
2202  return false;
2203 
2204  // Pairwise instructions have a 7-bit signed offset field.
2205  if (Offset1 > 63 || Offset1 < -64)
2206  return false;
2207 
2208  // The caller should already have ordered First/SecondLdSt by offset.
2209  assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
2210  return Offset1 + 1 == Offset2;
2211 }
2212 
2214  unsigned Reg, unsigned SubIdx,
2215  unsigned State,
2216  const TargetRegisterInfo *TRI) {
2217  if (!SubIdx)
2218  return MIB.addReg(Reg, State);
2219 
2221  return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
2222  return MIB.addReg(Reg, State, SubIdx);
2223 }
2224 
2225 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
2226  unsigned NumRegs) {
2227  // We really want the positive remainder mod 32 here, that happens to be
2228  // easily obtainable with a mask.
2229  return ((DestReg - SrcReg) & 0x1f) < NumRegs;
2230 }
2231 
2234  const DebugLoc &DL, unsigned DestReg,
2235  unsigned SrcReg, bool KillSrc,
2236  unsigned Opcode,
2237  ArrayRef<unsigned> Indices) const {
2238  assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
2239  const TargetRegisterInfo *TRI = &getRegisterInfo();
2240  uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2241  uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2242  unsigned NumRegs = Indices.size();
2243 
2244  int SubReg = 0, End = NumRegs, Incr = 1;
2245  if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
2246  SubReg = NumRegs - 1;
2247  End = -1;
2248  Incr = -1;
2249  }
2250 
2251  for (; SubReg != End; SubReg += Incr) {
2252  const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2253  AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2254  AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
2255  AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2256  }
2257 }
2258 
2261  const DebugLoc &DL, unsigned DestReg,
2262  unsigned SrcReg, bool KillSrc) const {
2263  if (AArch64::GPR32spRegClass.contains(DestReg) &&
2264  (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
2265  const TargetRegisterInfo *TRI = &getRegisterInfo();
2266 
2267  if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
2268  // If either operand is WSP, expand to ADD #0.
2269  if (Subtarget.hasZeroCycleRegMove()) {
2270  // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
2271  unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2272  &AArch64::GPR64spRegClass);
2273  unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2274  &AArch64::GPR64spRegClass);
2275  // This instruction is reading and writing X registers. This may upset
2276  // the register scavenger and machine verifier, so we need to indicate
2277  // that we are reading an undefined value from SrcRegX, but a proper
2278  // value from SrcReg.
2279  BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
2280  .addReg(SrcRegX, RegState::Undef)
2281  .addImm(0)
2283  .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2284  } else {
2285  BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
2286  .addReg(SrcReg, getKillRegState(KillSrc))
2287  .addImm(0)
2289  }
2290  } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
2291  BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
2292  .addImm(0)
2294  } else {
2295  if (Subtarget.hasZeroCycleRegMove()) {
2296  // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
2297  unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2298  &AArch64::GPR64spRegClass);
2299  unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2300  &AArch64::GPR64spRegClass);
2301  // This instruction is reading and writing X registers. This may upset
2302  // the register scavenger and machine verifier, so we need to indicate
2303  // that we are reading an undefined value from SrcRegX, but a proper
2304  // value from SrcReg.
2305  BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
2306  .addReg(AArch64::XZR)
2307  .addReg(SrcRegX, RegState::Undef)
2308  .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2309  } else {
2310  // Otherwise, expand to ORR WZR.
2311  BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
2312  .addReg(AArch64::WZR)
2313  .addReg(SrcReg, getKillRegState(KillSrc));
2314  }
2315  }
2316  return;
2317  }
2318 
2319  if (AArch64::GPR64spRegClass.contains(DestReg) &&
2320  (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
2321  if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
2322  // If either operand is SP, expand to ADD #0.
2323  BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
2324  .addReg(SrcReg, getKillRegState(KillSrc))
2325  .addImm(0)
2327  } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
2328  BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
2329  .addImm(0)
2331  } else {
2332  // Otherwise, expand to ORR XZR.
2333  BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
2334  .addReg(AArch64::XZR)
2335  .addReg(SrcReg, getKillRegState(KillSrc));
2336  }
2337  return;
2338  }
2339 
2340  // Copy a DDDD register quad by copying the individual sub-registers.
2341  if (AArch64::DDDDRegClass.contains(DestReg) &&
2342  AArch64::DDDDRegClass.contains(SrcReg)) {
2343  static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2344  AArch64::dsub2, AArch64::dsub3};
2345  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2346  Indices);
2347  return;
2348  }
2349 
2350  // Copy a DDD register triple by copying the individual sub-registers.
2351  if (AArch64::DDDRegClass.contains(DestReg) &&
2352  AArch64::DDDRegClass.contains(SrcReg)) {
2353  static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2354  AArch64::dsub2};
2355  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2356  Indices);
2357  return;
2358  }
2359 
2360  // Copy a DD register pair by copying the individual sub-registers.
2361  if (AArch64::DDRegClass.contains(DestReg) &&
2362  AArch64::DDRegClass.contains(SrcReg)) {
2363  static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
2364  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2365  Indices);
2366  return;
2367  }
2368 
2369  // Copy a QQQQ register quad by copying the individual sub-registers.
2370  if (AArch64::QQQQRegClass.contains(DestReg) &&
2371  AArch64::QQQQRegClass.contains(SrcReg)) {
2372  static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2373  AArch64::qsub2, AArch64::qsub3};
2374  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2375  Indices);
2376  return;
2377  }
2378 
2379  // Copy a QQQ register triple by copying the individual sub-registers.
2380  if (AArch64::QQQRegClass.contains(DestReg) &&
2381  AArch64::QQQRegClass.contains(SrcReg)) {
2382  static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2383  AArch64::qsub2};
2384  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2385  Indices);
2386  return;
2387  }
2388 
2389  // Copy a QQ register pair by copying the individual sub-registers.
2390  if (AArch64::QQRegClass.contains(DestReg) &&
2391  AArch64::QQRegClass.contains(SrcReg)) {
2392  static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
2393  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2394  Indices);
2395  return;
2396  }
2397 
2398  if (AArch64::FPR128RegClass.contains(DestReg) &&
2399  AArch64::FPR128RegClass.contains(SrcReg)) {
2400  if (Subtarget.hasNEON()) {
2401  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2402  .addReg(SrcReg)
2403  .addReg(SrcReg, getKillRegState(KillSrc));
2404  } else {
2405  BuildMI(MBB, I, DL, get(AArch64::STRQpre))
2406  .addReg(AArch64::SP, RegState::Define)
2407  .addReg(SrcReg, getKillRegState(KillSrc))
2408  .addReg(AArch64::SP)
2409  .addImm(-16);
2410  BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
2411  .addReg(AArch64::SP, RegState::Define)
2412  .addReg(DestReg, RegState::Define)
2413  .addReg(AArch64::SP)
2414  .addImm(16);
2415  }
2416  return;
2417  }
2418 
2419  if (AArch64::FPR64RegClass.contains(DestReg) &&
2420  AArch64::FPR64RegClass.contains(SrcReg)) {
2421  if (Subtarget.hasNEON()) {
2422  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2423  &AArch64::FPR128RegClass);
2424  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2425  &AArch64::FPR128RegClass);
2426  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2427  .addReg(SrcReg)
2428  .addReg(SrcReg, getKillRegState(KillSrc));
2429  } else {
2430  BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2431  .addReg(SrcReg, getKillRegState(KillSrc));
2432  }
2433  return;
2434  }
2435 
2436  if (AArch64::FPR32RegClass.contains(DestReg) &&
2437  AArch64::FPR32RegClass.contains(SrcReg)) {
2438  if (Subtarget.hasNEON()) {
2439  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2440  &AArch64::FPR128RegClass);
2441  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2442  &AArch64::FPR128RegClass);
2443  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2444  .addReg(SrcReg)
2445  .addReg(SrcReg, getKillRegState(KillSrc));
2446  } else {
2447  BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2448  .addReg(SrcReg, getKillRegState(KillSrc));
2449  }
2450  return;
2451  }
2452 
2453  if (AArch64::FPR16RegClass.contains(DestReg) &&
2454  AArch64::FPR16RegClass.contains(SrcReg)) {
2455  if (Subtarget.hasNEON()) {
2456  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2457  &AArch64::FPR128RegClass);
2458  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2459  &AArch64::FPR128RegClass);
2460  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2461  .addReg(SrcReg)
2462  .addReg(SrcReg, getKillRegState(KillSrc));
2463  } else {
2464  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2465  &AArch64::FPR32RegClass);
2466  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2467  &AArch64::FPR32RegClass);
2468  BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2469  .addReg(SrcReg, getKillRegState(KillSrc));
2470  }
2471  return;
2472  }
2473 
2474  if (AArch64::FPR8RegClass.contains(DestReg) &&
2475  AArch64::FPR8RegClass.contains(SrcReg)) {
2476  if (Subtarget.hasNEON()) {
2477  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2478  &AArch64::FPR128RegClass);
2479  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2480  &AArch64::FPR128RegClass);
2481  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2482  .addReg(SrcReg)
2483  .addReg(SrcReg, getKillRegState(KillSrc));
2484  } else {
2485  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2486  &AArch64::FPR32RegClass);
2487  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2488  &AArch64::FPR32RegClass);
2489  BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2490  .addReg(SrcReg, getKillRegState(KillSrc));
2491  }
2492  return;
2493  }
2494 
2495  // Copies between GPR64 and FPR64.
2496  if (AArch64::FPR64RegClass.contains(DestReg) &&
2497  AArch64::GPR64RegClass.contains(SrcReg)) {
2498  BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2499  .addReg(SrcReg, getKillRegState(KillSrc));
2500  return;
2501  }
2502  if (AArch64::GPR64RegClass.contains(DestReg) &&
2503  AArch64::FPR64RegClass.contains(SrcReg)) {
2504  BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2505  .addReg(SrcReg, getKillRegState(KillSrc));
2506  return;
2507  }
2508  // Copies between GPR32 and FPR32.
2509  if (AArch64::FPR32RegClass.contains(DestReg) &&
2510  AArch64::GPR32RegClass.contains(SrcReg)) {
2511  BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2512  .addReg(SrcReg, getKillRegState(KillSrc));
2513  return;
2514  }
2515  if (AArch64::GPR32RegClass.contains(DestReg) &&
2516  AArch64::FPR32RegClass.contains(SrcReg)) {
2517  BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2518  .addReg(SrcReg, getKillRegState(KillSrc));
2519  return;
2520  }
2521 
2522  if (DestReg == AArch64::NZCV) {
2523  assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2524  BuildMI(MBB, I, DL, get(AArch64::MSR))
2525  .addImm(AArch64SysReg::NZCV)
2526  .addReg(SrcReg, getKillRegState(KillSrc))
2527  .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
2528  return;
2529  }
2530 
2531  if (SrcReg == AArch64::NZCV) {
2532  assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
2533  BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
2534  .addImm(AArch64SysReg::NZCV)
2535  .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
2536  return;
2537  }
2538 
2539  llvm_unreachable("unimplemented reg-to-reg copy");
2540 }
2541 
2543  MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2544  bool isKill, int FI, const TargetRegisterClass *RC,
2545  const TargetRegisterInfo *TRI) const {
2546  DebugLoc DL;
2547  if (MBBI != MBB.end())
2548  DL = MBBI->getDebugLoc();
2549  MachineFunction &MF = *MBB.getParent();
2550  MachineFrameInfo &MFI = MF.getFrameInfo();
2551  unsigned Align = MFI.getObjectAlignment(FI);
2552 
2555  PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2556  unsigned Opc = 0;
2557  bool Offset = true;
2558  switch (TRI->getSpillSize(*RC)) {
2559  case 1:
2560  if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2561  Opc = AArch64::STRBui;
2562  break;
2563  case 2:
2564  if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2565  Opc = AArch64::STRHui;
2566  break;
2567  case 4:
2568  if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2569  Opc = AArch64::STRWui;
2571  MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2572  else
2573  assert(SrcReg != AArch64::WSP);
2574  } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2575  Opc = AArch64::STRSui;
2576  break;
2577  case 8:
2578  if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2579  Opc = AArch64::STRXui;
2581  MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2582  else
2583  assert(SrcReg != AArch64::SP);
2584  } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2585  Opc = AArch64::STRDui;
2586  break;
2587  case 16:
2588  if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2589  Opc = AArch64::STRQui;
2590  else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2591  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2592  Opc = AArch64::ST1Twov1d;
2593  Offset = false;
2594  }
2595  break;
2596  case 24:
2597  if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2598  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2599  Opc = AArch64::ST1Threev1d;
2600  Offset = false;
2601  }
2602  break;
2603  case 32:
2604  if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2605  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2606  Opc = AArch64::ST1Fourv1d;
2607  Offset = false;
2608  } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2609  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2610  Opc = AArch64::ST1Twov2d;
2611  Offset = false;
2612  }
2613  break;
2614  case 48:
2615  if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2616  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2617  Opc = AArch64::ST1Threev2d;
2618  Offset = false;
2619  }
2620  break;
2621  case 64:
2622  if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2623  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2624  Opc = AArch64::ST1Fourv2d;
2625  Offset = false;
2626  }
2627  break;
2628  }
2629  assert(Opc && "Unknown register class");
2630 
2631  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
2632  .addReg(SrcReg, getKillRegState(isKill))
2633  .addFrameIndex(FI);
2634 
2635  if (Offset)
2636  MI.addImm(0);
2637  MI.addMemOperand(MMO);
2638 }
2639 
2641  MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2642  int FI, const TargetRegisterClass *RC,
2643  const TargetRegisterInfo *TRI) const {
2644  DebugLoc DL;
2645  if (MBBI != MBB.end())
2646  DL = MBBI->getDebugLoc();
2647  MachineFunction &MF = *MBB.getParent();
2648  MachineFrameInfo &MFI = MF.getFrameInfo();
2649  unsigned Align = MFI.getObjectAlignment(FI);
2652  PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2653 
2654  unsigned Opc = 0;
2655  bool Offset = true;
2656  switch (TRI->getSpillSize(*RC)) {
2657  case 1:
2658  if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2659  Opc = AArch64::LDRBui;
2660  break;
2661  case 2:
2662  if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2663  Opc = AArch64::LDRHui;
2664  break;
2665  case 4:
2666  if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2667  Opc = AArch64::LDRWui;
2669  MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
2670  else
2671  assert(DestReg != AArch64::WSP);
2672  } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2673  Opc = AArch64::LDRSui;
2674  break;
2675  case 8:
2676  if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2677  Opc = AArch64::LDRXui;
2679  MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
2680  else
2681  assert(DestReg != AArch64::SP);
2682  } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2683  Opc = AArch64::LDRDui;
2684  break;
2685  case 16:
2686  if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2687  Opc = AArch64::LDRQui;
2688  else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2689  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2690  Opc = AArch64::LD1Twov1d;
2691  Offset = false;
2692  }
2693  break;
2694  case 24:
2695  if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2696  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2697  Opc = AArch64::LD1Threev1d;
2698  Offset = false;
2699  }
2700  break;
2701  case 32:
2702  if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2703  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2704  Opc = AArch64::LD1Fourv1d;
2705  Offset = false;
2706  } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2707  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2708  Opc = AArch64::LD1Twov2d;
2709  Offset = false;
2710  }
2711  break;
2712  case 48:
2713  if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2714  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2715  Opc = AArch64::LD1Threev2d;
2716  Offset = false;
2717  }
2718  break;
2719  case 64:
2720  if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2721  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2722  Opc = AArch64::LD1Fourv2d;
2723  Offset = false;
2724  }
2725  break;
2726  }
2727  assert(Opc && "Unknown register class");
2728 
2729  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
2730  .addReg(DestReg, getDefRegState(true))
2731  .addFrameIndex(FI);
2732  if (Offset)
2733  MI.addImm(0);
2734  MI.addMemOperand(MMO);
2735 }
2736 
2738  MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
2739  unsigned DestReg, unsigned SrcReg, int Offset,
2740  const TargetInstrInfo *TII,
2741  MachineInstr::MIFlag Flag, bool SetNZCV) {
2742  if (DestReg == SrcReg && Offset == 0)
2743  return;
2744 
2745  assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
2746  "SP increment/decrement not 16-byte aligned");
2747 
2748  bool isSub = Offset < 0;
2749  if (isSub)
2750  Offset = -Offset;
2751 
2752  // FIXME: If the offset won't fit in 24-bits, compute the offset into a
2753  // scratch register. If DestReg is a virtual register, use it as the
2754  // scratch register; otherwise, create a new virtual register (to be
2755  // replaced by the scavenger at the end of PEI). That case can be optimized
2756  // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
2757  // register can be loaded with offset%8 and the add/sub can use an extending
2758  // instruction with LSL#3.
2759  // Currently the function handles any offsets but generates a poor sequence
2760  // of code.
2761  // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
2762 
2763  unsigned Opc;
2764  if (SetNZCV)
2765  Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
2766  else
2767  Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
2768  const unsigned MaxEncoding = 0xfff;
2769  const unsigned ShiftSize = 12;
2770  const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
2771  while (((unsigned)Offset) >= (1 << ShiftSize)) {
2772  unsigned ThisVal;
2773  if (((unsigned)Offset) > MaxEncodableValue) {
2774  ThisVal = MaxEncodableValue;
2775  } else {
2776  ThisVal = Offset & MaxEncodableValue;
2777  }
2778  assert((ThisVal >> ShiftSize) <= MaxEncoding &&
2779  "Encoding cannot handle value that big");
2780  BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2781  .addReg(SrcReg)
2782  .addImm(ThisVal >> ShiftSize)
2784  .setMIFlag(Flag);
2785 
2786  SrcReg = DestReg;
2787  Offset -= ThisVal;
2788  if (Offset == 0)
2789  return;
2790  }
2791  BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2792  .addReg(SrcReg)
2793  .addImm(Offset)
2795  .setMIFlag(Flag);
2796 }
2797 
2801  LiveIntervals *LIS) const {
2802  // This is a bit of a hack. Consider this instruction:
2803  //
2804  // %vreg0<def> = COPY %SP; GPR64all:%vreg0
2805  //
2806  // We explicitly chose GPR64all for the virtual register so such a copy might
2807  // be eliminated by RegisterCoalescer. However, that may not be possible, and
2808  // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all
2809  // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
2810  //
2811  // To prevent that, we are going to constrain the %vreg0 register class here.
2812  //
2813  // <rdar://problem/11522048>
2814  //
2815  if (MI.isFullCopy()) {
2816  unsigned DstReg = MI.getOperand(0).getReg();
2817  unsigned SrcReg = MI.getOperand(1).getReg();
2818  if (SrcReg == AArch64::SP &&
2820  MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
2821  return nullptr;
2822  }
2823  if (DstReg == AArch64::SP &&
2825  MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2826  return nullptr;
2827  }
2828  }
2829 
2830  // Handle the case where a copy is being spilled or filled but the source
2831  // and destination register class don't match. For example:
2832  //
2833  // %vreg0<def> = COPY %XZR; GPR64common:%vreg0
2834  //
2835  // In this case we can still safely fold away the COPY and generate the
2836  // following spill code:
2837  //
2838  // STRXui %XZR, <fi#0>
2839  //
2840  // This also eliminates spilled cross register class COPYs (e.g. between x and
2841  // d regs) of the same size. For example:
2842  //
2843  // %vreg0<def> = COPY %vreg1; GPR64:%vreg0, FPR64:%vreg1
2844  //
2845  // will be filled as
2846  //
2847  // LDRDui %vreg0, fi<#0>
2848  //
2849  // instead of
2850  //
2851  // LDRXui %vregTemp, fi<#0>
2852  // %vreg0 = FMOV %vregTemp
2853  //
2854  if (MI.isCopy() && Ops.size() == 1 &&
2855  // Make sure we're only folding the explicit COPY defs/uses.
2856  (Ops[0] == 0 || Ops[0] == 1)) {
2857  bool IsSpill = Ops[0] == 0;
2858  bool IsFill = !IsSpill;
2859  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
2860  const MachineRegisterInfo &MRI = MF.getRegInfo();
2861  MachineBasicBlock &MBB = *MI.getParent();
2862  const MachineOperand &DstMO = MI.getOperand(0);
2863  const MachineOperand &SrcMO = MI.getOperand(1);
2864  unsigned DstReg = DstMO.getReg();
2865  unsigned SrcReg = SrcMO.getReg();
2866  // This is slightly expensive to compute for physical regs since
2867  // getMinimalPhysRegClass is slow.
2868  auto getRegClass = [&](unsigned Reg) {
2870  ? MRI.getRegClass(Reg)
2871  : TRI.getMinimalPhysRegClass(Reg);
2872  };
2873 
2874  if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
2875  assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
2876  TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
2877  "Mismatched register size in non subreg COPY");
2878  if (IsSpill)
2879  storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
2880  getRegClass(SrcReg), &TRI);
2881  else
2882  loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
2883  getRegClass(DstReg), &TRI);
2884  return &*--InsertPt;
2885  }
2886 
2887  // Handle cases like spilling def of:
2888  //
2889  // %vreg0:sub_32<def,read-undef> = COPY %WZR; GPR64common:%vreg0
2890  //
2891  // where the physical register source can be widened and stored to the full
2892  // virtual reg destination stack slot, in this case producing:
2893  //
2894  // STRXui %XZR, <fi#0>
2895  //
2896  if (IsSpill && DstMO.isUndef() &&
2898  assert(SrcMO.getSubReg() == 0 &&
2899  "Unexpected subreg on physical register");
2900  const TargetRegisterClass *SpillRC;
2901  unsigned SpillSubreg;
2902  switch (DstMO.getSubReg()) {
2903  default:
2904  SpillRC = nullptr;
2905  break;
2906  case AArch64::sub_32:
2907  case AArch64::ssub:
2908  if (AArch64::GPR32RegClass.contains(SrcReg)) {
2909  SpillRC = &AArch64::GPR64RegClass;
2910  SpillSubreg = AArch64::sub_32;
2911  } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
2912  SpillRC = &AArch64::FPR64RegClass;
2913  SpillSubreg = AArch64::ssub;
2914  } else
2915  SpillRC = nullptr;
2916  break;
2917  case AArch64::dsub:
2918  if (AArch64::FPR64RegClass.contains(SrcReg)) {
2919  SpillRC = &AArch64::FPR128RegClass;
2920  SpillSubreg = AArch64::dsub;
2921  } else
2922  SpillRC = nullptr;
2923  break;
2924  }
2925 
2926  if (SpillRC)
2927  if (unsigned WidenedSrcReg =
2928  TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
2929  storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
2930  FrameIndex, SpillRC, &TRI);
2931  return &*--InsertPt;
2932  }
2933  }
2934 
2935  // Handle cases like filling use of:
2936  //
2937  // %vreg0:sub_32<def,read-undef> = COPY %vreg1; GPR64:%vreg0, GPR32:%vreg1
2938  //
2939  // where we can load the full virtual reg source stack slot, into the subreg
2940  // destination, in this case producing:
2941  //
2942  // LDRWui %vreg0:sub_32<def,read-undef>, <fi#0>
2943  //
2944  if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
2945  const TargetRegisterClass *FillRC;
2946  switch (DstMO.getSubReg()) {
2947  default:
2948  FillRC = nullptr;
2949  break;
2950  case AArch64::sub_32:
2951  FillRC = &AArch64::GPR32RegClass;
2952  break;
2953  case AArch64::ssub:
2954  FillRC = &AArch64::FPR32RegClass;
2955  break;
2956  case AArch64::dsub:
2957  FillRC = &AArch64::FPR64RegClass;
2958  break;
2959  }
2960 
2961  if (FillRC) {
2962  assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
2963  TRI.getRegSizeInBits(*FillRC) &&
2964  "Mismatched regclass size on folded subreg COPY");
2965  loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
2966  MachineInstr &LoadMI = *--InsertPt;
2967  MachineOperand &LoadDst = LoadMI.getOperand(0);
2968  assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
2969  LoadDst.setSubReg(DstMO.getSubReg());
2970  LoadDst.setIsUndef();
2971  return &LoadMI;
2972  }
2973  }
2974  }
2975 
2976  // Cannot fold.
2977  return nullptr;
2978 }
2979 
2981  bool *OutUseUnscaledOp,
2982  unsigned *OutUnscaledOp,
2983  int *EmittableOffset) {
2984  int Scale = 1;
2985  bool IsSigned = false;
2986  // The ImmIdx should be changed case by case if it is not 2.
2987  unsigned ImmIdx = 2;
2988  unsigned UnscaledOp = 0;
2989  // Set output values in case of early exit.
2990  if (EmittableOffset)
2991  *EmittableOffset = 0;
2992  if (OutUseUnscaledOp)
2993  *OutUseUnscaledOp = false;
2994  if (OutUnscaledOp)
2995  *OutUnscaledOp = 0;
2996  switch (MI.getOpcode()) {
2997  default:
2998  llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
2999  // Vector spills/fills can't take an immediate offset.
3000  case AArch64::LD1Twov2d:
3001  case AArch64::LD1Threev2d:
3002  case AArch64::LD1Fourv2d:
3003  case AArch64::LD1Twov1d:
3004  case AArch64::LD1Threev1d:
3005  case AArch64::LD1Fourv1d:
3006  case AArch64::ST1Twov2d:
3007  case AArch64::ST1Threev2d:
3008  case AArch64::ST1Fourv2d:
3009  case AArch64::ST1Twov1d:
3010  case AArch64::ST1Threev1d:
3011  case AArch64::ST1Fourv1d:
3013  case AArch64::PRFMui:
3014  Scale = 8;
3015  UnscaledOp = AArch64::PRFUMi;
3016  break;
3017  case AArch64::LDRXui:
3018  Scale = 8;
3019  UnscaledOp = AArch64::LDURXi;
3020  break;
3021  case AArch64::LDRWui:
3022  Scale = 4;
3023  UnscaledOp = AArch64::LDURWi;
3024  break;
3025  case AArch64::LDRBui:
3026  Scale = 1;
3027  UnscaledOp = AArch64::LDURBi;
3028  break;
3029  case AArch64::LDRHui:
3030  Scale = 2;
3031  UnscaledOp = AArch64::LDURHi;
3032  break;
3033  case AArch64::LDRSui:
3034  Scale = 4;
3035  UnscaledOp = AArch64::LDURSi;
3036  break;
3037  case AArch64::LDRDui:
3038  Scale = 8;
3039  UnscaledOp = AArch64::LDURDi;
3040  break;
3041  case AArch64::LDRQui:
3042  Scale = 16;
3043  UnscaledOp = AArch64::LDURQi;
3044  break;
3045  case AArch64::LDRBBui:
3046  Scale = 1;
3047  UnscaledOp = AArch64::LDURBBi;
3048  break;
3049  case AArch64::LDRHHui:
3050  Scale = 2;
3051  UnscaledOp = AArch64::LDURHHi;
3052  break;
3053  case AArch64::LDRSBXui:
3054  Scale = 1;
3055  UnscaledOp = AArch64::LDURSBXi;
3056  break;
3057  case AArch64::LDRSBWui:
3058  Scale = 1;
3059  UnscaledOp = AArch64::LDURSBWi;
3060  break;
3061  case AArch64::LDRSHXui:
3062  Scale = 2;
3063  UnscaledOp = AArch64::LDURSHXi;
3064  break;
3065  case AArch64::LDRSHWui:
3066  Scale = 2;
3067  UnscaledOp = AArch64::LDURSHWi;
3068  break;
3069  case AArch64::LDRSWui:
3070  Scale = 4;
3071  UnscaledOp = AArch64::LDURSWi;
3072  break;
3073 
3074  case AArch64::STRXui:
3075  Scale = 8;
3076  UnscaledOp = AArch64::STURXi;
3077  break;
3078  case AArch64::STRWui:
3079  Scale = 4;
3080  UnscaledOp = AArch64::STURWi;
3081  break;
3082  case AArch64::STRBui:
3083  Scale = 1;
3084  UnscaledOp = AArch64::STURBi;
3085  break;
3086  case AArch64::STRHui:
3087  Scale = 2;
3088  UnscaledOp = AArch64::STURHi;
3089  break;
3090  case AArch64::STRSui:
3091  Scale = 4;
3092  UnscaledOp = AArch64::STURSi;
3093  break;
3094  case AArch64::STRDui:
3095  Scale = 8;
3096  UnscaledOp = AArch64::STURDi;
3097  break;
3098  case AArch64::STRQui:
3099  Scale = 16;
3100  UnscaledOp = AArch64::STURQi;
3101  break;
3102  case AArch64::STRBBui:
3103  Scale = 1;
3104  UnscaledOp = AArch64::STURBBi;
3105  break;
3106  case AArch64::STRHHui:
3107  Scale = 2;
3108  UnscaledOp = AArch64::STURHHi;
3109  break;
3110 
3111  case AArch64::LDPXi:
3112  case AArch64::LDPDi:
3113  case AArch64::STPXi:
3114  case AArch64::STPDi:
3115  case AArch64::LDNPXi:
3116  case AArch64::LDNPDi:
3117  case AArch64::STNPXi:
3118  case AArch64::STNPDi:
3119  ImmIdx = 3;
3120  IsSigned = true;
3121  Scale = 8;
3122  break;
3123  case AArch64::LDPQi:
3124  case AArch64::STPQi:
3125  case AArch64::LDNPQi:
3126  case AArch64::STNPQi:
3127  ImmIdx = 3;
3128  IsSigned = true;
3129  Scale = 16;
3130  break;
3131  case AArch64::LDPWi:
3132  case AArch64::LDPSi:
3133  case AArch64::STPWi:
3134  case AArch64::STPSi:
3135  case AArch64::LDNPWi:
3136  case AArch64::LDNPSi:
3137  case AArch64::STNPWi:
3138  case AArch64::STNPSi:
3139  ImmIdx = 3;
3140  IsSigned = true;
3141  Scale = 4;
3142  break;
3143 
3144  case AArch64::LDURXi:
3145  case AArch64::LDURWi:
3146  case AArch64::LDURBi:
3147  case AArch64::LDURHi:
3148  case AArch64::LDURSi:
3149  case AArch64::LDURDi:
3150  case AArch64::LDURQi:
3151  case AArch64::LDURHHi:
3152  case AArch64::LDURBBi:
3153  case AArch64::LDURSBXi:
3154  case AArch64::LDURSBWi:
3155  case AArch64::LDURSHXi:
3156  case AArch64::LDURSHWi:
3157  case AArch64::LDURSWi:
3158  case AArch64::STURXi:
3159  case AArch64::STURWi:
3160  case AArch64::STURBi:
3161  case AArch64::STURHi:
3162  case AArch64::STURSi:
3163  case AArch64::STURDi:
3164  case AArch64::STURQi:
3165  case AArch64::STURBBi:
3166  case AArch64::STURHHi:
3167  Scale = 1;
3168  break;
3169  }
3170 
3171  Offset += MI.getOperand(ImmIdx).getImm() * Scale;
3172 
3173  bool useUnscaledOp = false;
3174  // If the offset doesn't match the scale, we rewrite the instruction to
3175  // use the unscaled instruction instead. Likewise, if we have a negative
3176  // offset (and have an unscaled op to use).
3177  if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
3178  useUnscaledOp = true;
3179 
3180  // Use an unscaled addressing mode if the instruction has a negative offset
3181  // (or if the instruction is already using an unscaled addressing mode).
3182  unsigned MaskBits;
3183  if (IsSigned) {
3184  // ldp/stp instructions.
3185  MaskBits = 7;
3186  Offset /= Scale;
3187  } else if (UnscaledOp == 0 || useUnscaledOp) {
3188  MaskBits = 9;
3189  IsSigned = true;
3190  Scale = 1;
3191  } else {
3192  MaskBits = 12;
3193  IsSigned = false;
3194  Offset /= Scale;
3195  }
3196 
3197  // Attempt to fold address computation.
3198  int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
3199  int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
3200  if (Offset >= MinOff && Offset <= MaxOff) {
3201  if (EmittableOffset)
3202  *EmittableOffset = Offset;
3203  Offset = 0;
3204  } else {
3205  int NewOff = Offset < 0 ? MinOff : MaxOff;
3206  if (EmittableOffset)
3207  *EmittableOffset = NewOff;
3208  Offset = (Offset - NewOff) * Scale;
3209  }
3210  if (OutUseUnscaledOp)
3211  *OutUseUnscaledOp = useUnscaledOp;
3212  if (OutUnscaledOp)
3213  *OutUnscaledOp = UnscaledOp;
3215  (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
3216 }
3217 
3218 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
3219  unsigned FrameReg, int &Offset,
3220  const AArch64InstrInfo *TII) {
3221  unsigned Opcode = MI.getOpcode();
3222  unsigned ImmIdx = FrameRegIdx + 1;
3223 
3224  if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
3225  Offset += MI.getOperand(ImmIdx).getImm();
3226  emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
3227  MI.getOperand(0).getReg(), FrameReg, Offset, TII,
3228  MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
3229  MI.eraseFromParent();
3230  Offset = 0;
3231  return true;
3232  }
3233 
3234  int NewOffset;
3235  unsigned UnscaledOp;
3236  bool UseUnscaledOp;
3237  int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
3238  &UnscaledOp, &NewOffset);
3239  if (Status & AArch64FrameOffsetCanUpdate) {
3240  if (Status & AArch64FrameOffsetIsLegal)
3241  // Replace the FrameIndex with FrameReg.
3242  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
3243  if (UseUnscaledOp)
3244  MI.setDesc(TII->get(UnscaledOp));
3245 
3246  MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
3247  return Offset == 0;
3248  }
3249 
3250  return false;
3251 }
3252 
3253 void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
3254  NopInst.setOpcode(AArch64::HINT);
3255  NopInst.addOperand(MCOperand::createImm(0));
3256 }
3257 
3258 // AArch64 supports MachineCombiner.
3259 bool AArch64InstrInfo::useMachineCombiner() const { return true; }
3260 
3261 // True when Opc sets flag
3262 static bool isCombineInstrSettingFlag(unsigned Opc) {
3263  switch (Opc) {
3264  case AArch64::ADDSWrr:
3265  case AArch64::ADDSWri:
3266  case AArch64::ADDSXrr:
3267  case AArch64::ADDSXri:
3268  case AArch64::SUBSWrr:
3269  case AArch64::SUBSXrr:
3270  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3271  case AArch64::SUBSWri:
3272  case AArch64::SUBSXri:
3273  return true;
3274  default:
3275  break;
3276  }
3277  return false;
3278 }
3279 
3280 // 32b Opcodes that can be combined with a MUL
3281 static bool isCombineInstrCandidate32(unsigned Opc) {
3282  switch (Opc) {
3283  case AArch64::ADDWrr:
3284  case AArch64::ADDWri:
3285  case AArch64::SUBWrr:
3286  case AArch64::ADDSWrr:
3287  case AArch64::ADDSWri:
3288  case AArch64::SUBSWrr:
3289  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3290  case AArch64::SUBWri:
3291  case AArch64::SUBSWri:
3292  return true;
3293  default:
3294  break;
3295  }
3296  return false;
3297 }
3298 
3299 // 64b Opcodes that can be combined with a MUL
3300 static bool isCombineInstrCandidate64(unsigned Opc) {
3301  switch (Opc) {
3302  case AArch64::ADDXrr:
3303  case AArch64::ADDXri:
3304  case AArch64::SUBXrr:
3305  case AArch64::ADDSXrr:
3306  case AArch64::ADDSXri:
3307  case AArch64::SUBSXrr:
3308  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3309  case AArch64::SUBXri:
3310  case AArch64::SUBSXri:
3311  return true;
3312  default:
3313  break;
3314  }
3315  return false;
3316 }
3317 
3318 // FP Opcodes that can be combined with a FMUL
3319 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
3320  switch (Inst.getOpcode()) {
3321  default:
3322  break;
3323  case AArch64::FADDSrr:
3324  case AArch64::FADDDrr:
3325  case AArch64::FADDv2f32:
3326  case AArch64::FADDv2f64:
3327  case AArch64::FADDv4f32:
3328  case AArch64::FSUBSrr:
3329  case AArch64::FSUBDrr:
3330  case AArch64::FSUBv2f32:
3331  case AArch64::FSUBv2f64:
3332  case AArch64::FSUBv4f32:
3333  TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
3334  return (Options.UnsafeFPMath ||
3335  Options.AllowFPOpFusion == FPOpFusion::Fast);
3336  }
3337  return false;
3338 }
3339 
3340 // Opcodes that can be combined with a MUL
3341 static bool isCombineInstrCandidate(unsigned Opc) {
3343 }
3344 
3345 //
3346 // Utility routine that checks if \param MO is defined by an
3347 // \param CombineOpc instruction in the basic block \param MBB
3349  unsigned CombineOpc, unsigned ZeroReg = 0,
3350  bool CheckZeroReg = false) {
3351  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3352  MachineInstr *MI = nullptr;
3353 
3355  MI = MRI.getUniqueVRegDef(MO.getReg());
3356  // And it needs to be in the trace (otherwise, it won't have a depth).
3357  if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
3358  return false;
3359  // Must only used by the user we combine with.
3360  if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
3361  return false;
3362 
3363  if (CheckZeroReg) {
3364  assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
3365  MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
3366  MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
3367  // The third input reg must be zero.
3368  if (MI->getOperand(3).getReg() != ZeroReg)
3369  return false;
3370  }
3371 
3372  return true;
3373 }
3374 
3375 //
3376 // Is \param MO defined by an integer multiply and can be combined?
3378  unsigned MulOpc, unsigned ZeroReg) {
3379  return canCombine(MBB, MO, MulOpc, ZeroReg, true);
3380 }
3381 
3382 //
3383 // Is \param MO defined by a floating-point multiply and can be combined?
3385  unsigned MulOpc) {
3386  return canCombine(MBB, MO, MulOpc);
3387 }
3388 
3389 // TODO: There are many more machine instruction opcodes to match:
3390 // 1. Other data types (integer, vectors)
3391 // 2. Other math / logic operations (xor, or)
3392 // 3. Other forms of the same operation (intrinsics and other variants)
3394  const MachineInstr &Inst) const {
3395  switch (Inst.getOpcode()) {
3396  case AArch64::FADDDrr:
3397  case AArch64::FADDSrr:
3398  case AArch64::FADDv2f32:
3399  case AArch64::FADDv2f64:
3400  case AArch64::FADDv4f32:
3401  case AArch64::FMULDrr:
3402  case AArch64::FMULSrr:
3403  case AArch64::FMULX32:
3404  case AArch64::FMULX64:
3405  case AArch64::FMULXv2f32:
3406  case AArch64::FMULXv2f64:
3407  case AArch64::FMULXv4f32:
3408  case AArch64::FMULv2f32:
3409  case AArch64::FMULv2f64:
3410  case AArch64::FMULv4f32:
3411  return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
3412  default:
3413  return false;
3414  }
3415 }
3416 
3417 /// Find instructions that can be turned into madd.
3418 static bool getMaddPatterns(MachineInstr &Root,
3420  unsigned Opc = Root.getOpcode();
3421  MachineBasicBlock &MBB = *Root.getParent();
3422  bool Found = false;
3423 
3424  if (!isCombineInstrCandidate(Opc))
3425  return false;
3426  if (isCombineInstrSettingFlag(Opc)) {
3427  int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
3428  // When NZCV is live bail out.
3429  if (Cmp_NZCV == -1)
3430  return false;
3431  unsigned NewOpc = convertToNonFlagSettingOpc(Root);
3432  // When opcode can't change bail out.
3433  // CHECKME: do we miss any cases for opcode conversion?
3434  if (NewOpc == Opc)
3435  return false;
3436  Opc = NewOpc;
3437  }
3438 
3439  switch (Opc) {
3440  default:
3441  break;
3442  case AArch64::ADDWrr:
3443  assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3444  "ADDWrr does not have register operands");
3445  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3446  AArch64::WZR)) {
3448  Found = true;
3449  }
3450  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3451  AArch64::WZR)) {
3453  Found = true;
3454  }
3455  break;
3456  case AArch64::ADDXrr:
3457  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3458  AArch64::XZR)) {
3460  Found = true;
3461  }
3462  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3463  AArch64::XZR)) {
3465  Found = true;
3466  }
3467  break;
3468  case AArch64::SUBWrr:
3469  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3470  AArch64::WZR)) {
3472  Found = true;
3473  }
3474  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3475  AArch64::WZR)) {
3477  Found = true;
3478  }
3479  break;
3480  case AArch64::SUBXrr:
3481  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3482  AArch64::XZR)) {
3484  Found = true;
3485  }
3486  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3487  AArch64::XZR)) {
3489  Found = true;
3490  }
3491  break;
3492  case AArch64::ADDWri:
3493  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3494  AArch64::WZR)) {
3496  Found = true;
3497  }
3498  break;
3499  case AArch64::ADDXri:
3500  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3501  AArch64::XZR)) {
3503  Found = true;
3504  }
3505  break;
3506  case AArch64::SUBWri:
3507  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3508  AArch64::WZR)) {
3510  Found = true;
3511  }
3512  break;
3513  case AArch64::SUBXri:
3514  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3515  AArch64::XZR)) {
3517  Found = true;
3518  }
3519  break;
3520  }
3521  return Found;
3522 }
3523 /// Floating-Point Support
3524 
3525 /// Find instructions that can be turned into madd.
3526 static bool getFMAPatterns(MachineInstr &Root,
3528 
3529  if (!isCombineInstrCandidateFP(Root))
3530  return false;
3531 
3532  MachineBasicBlock &MBB = *Root.getParent();
3533  bool Found = false;
3534 
3535  switch (Root.getOpcode()) {
3536  default:
3537  assert(false && "Unsupported FP instruction in combiner\n");
3538  break;
3539  case AArch64::FADDSrr:
3540  assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3541  "FADDWrr does not have register operands");
3542  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3544  Found = true;
3545  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3546  AArch64::FMULv1i32_indexed)) {
3548  Found = true;
3549  }
3550  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3552  Found = true;
3553  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3554  AArch64::FMULv1i32_indexed)) {
3556  Found = true;
3557  }
3558  break;
3559  case AArch64::FADDDrr:
3560  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3562  Found = true;
3563  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3564  AArch64::FMULv1i64_indexed)) {
3566  Found = true;
3567  }
3568  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3570  Found = true;
3571  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3572  AArch64::FMULv1i64_indexed)) {
3574  Found = true;
3575  }
3576  break;
3577  case AArch64::FADDv2f32:
3578  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3579  AArch64::FMULv2i32_indexed)) {
3581  Found = true;
3582  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3583  AArch64::FMULv2f32)) {
3585  Found = true;
3586  }
3587  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3588  AArch64::FMULv2i32_indexed)) {
3590  Found = true;
3591  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3592  AArch64::FMULv2f32)) {
3594  Found = true;
3595  }
3596  break;
3597  case AArch64::FADDv2f64:
3598  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3599  AArch64::FMULv2i64_indexed)) {
3601  Found = true;
3602  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3603  AArch64::FMULv2f64)) {
3605  Found = true;
3606  }
3607  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3608  AArch64::FMULv2i64_indexed)) {
3610  Found = true;
3611  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3612  AArch64::FMULv2f64)) {
3614  Found = true;
3615  }
3616  break;
3617  case AArch64::FADDv4f32:
3618  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3619  AArch64::FMULv4i32_indexed)) {
3621  Found = true;
3622  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3623  AArch64::FMULv4f32)) {
3625  Found = true;
3626  }
3627  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3628  AArch64::FMULv4i32_indexed)) {
3630  Found = true;
3631  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3632  AArch64::FMULv4f32)) {
3634  Found = true;
3635  }
3636  break;
3637 
3638  case AArch64::FSUBSrr:
3639  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3641  Found = true;
3642  }
3643  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3645  Found = true;
3646  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3647  AArch64::FMULv1i32_indexed)) {
3649  Found = true;
3650  }
3651  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) {
3653  Found = true;
3654  }
3655  break;
3656  case AArch64::FSUBDrr:
3657  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3659  Found = true;
3660  }
3661  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3663  Found = true;
3664  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3665  AArch64::FMULv1i64_indexed)) {
3667  Found = true;
3668  }
3669  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) {
3671  Found = true;
3672  }
3673  break;
3674  case AArch64::FSUBv2f32:
3675  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3676  AArch64::FMULv2i32_indexed)) {
3678  Found = true;
3679  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3680  AArch64::FMULv2f32)) {
3682  Found = true;
3683  }
3684  break;
3685  case AArch64::FSUBv2f64:
3686  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3687  AArch64::FMULv2i64_indexed)) {
3689  Found = true;
3690  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3691  AArch64::FMULv2f64)) {
3693  Found = true;
3694  }
3695  break;
3696  case AArch64::FSUBv4f32:
3697  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3698  AArch64::FMULv4i32_indexed)) {
3700  Found = true;
3701  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3702  AArch64::FMULv4f32)) {
3704  Found = true;
3705  }
3706  break;
3707  }
3708  return Found;
3709 }
3710 
3711 /// Return true when a code sequence can improve throughput. It
3712 /// should be called only for instructions in loops.
3713 /// \param Pattern - combiner pattern
3715  MachineCombinerPattern Pattern) const {
3716  switch (Pattern) {
3717  default:
3718  break;
3753  return true;
3754  } // end switch (Pattern)
3755  return false;
3756 }
3757 /// Return true when there is potentially a faster code sequence for an
3758 /// instruction chain ending in \p Root. All potential patterns are listed in
3759 /// the \p Pattern vector. Pattern should be sorted in priority order since the
3760 /// pattern evaluator stops checking as soon as it finds a faster sequence.
3761 
3763  MachineInstr &Root,
3764  SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
3765  // Integer patterns
3766  if (getMaddPatterns(Root, Patterns))
3767  return true;
3768  // Floating point patterns
3769  if (getFMAPatterns(Root, Patterns))
3770  return true;
3771 
3772  return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
3773 }
3774 
3776 /// genFusedMultiply - Generate fused multiply instructions.
3777 /// This function supports both integer and floating point instructions.
3778 /// A typical example:
3779 /// F|MUL I=A,B,0
3780 /// F|ADD R,I,C
3781 /// ==> F|MADD R,A,B,C
3782 /// \param MF Containing MachineFunction
3783 /// \param MRI Register information
3784 /// \param TII Target information
3785 /// \param Root is the F|ADD instruction
3786 /// \param [out] InsInstrs is a vector of machine instructions and will
3787 /// contain the generated madd instruction
3788 /// \param IdxMulOpd is index of operand in Root that is the result of
3789 /// the F|MUL. In the example above IdxMulOpd is 1.
3790 /// \param MaddOpc the opcode fo the f|madd instruction
3791 /// \param RC Register class of operands
3792 /// \param kind of fma instruction (addressing mode) to be generated
3793 static MachineInstr *
3795  const TargetInstrInfo *TII, MachineInstr &Root,
3796  SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
3797  unsigned MaddOpc, const TargetRegisterClass *RC,
3799  assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3800 
3801  unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
3802  MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
3803  unsigned ResultReg = Root.getOperand(0).getReg();
3804  unsigned SrcReg0 = MUL->getOperand(1).getReg();
3805  bool Src0IsKill = MUL->getOperand(1).isKill();
3806  unsigned SrcReg1 = MUL->getOperand(2).getReg();
3807  bool Src1IsKill = MUL->getOperand(2).isKill();
3808  unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
3809  bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
3810 
3812  MRI.constrainRegClass(ResultReg, RC);
3814  MRI.constrainRegClass(SrcReg0, RC);
3816  MRI.constrainRegClass(SrcReg1, RC);
3818  MRI.constrainRegClass(SrcReg2, RC);
3819 
3820  MachineInstrBuilder MIB;
3821  if (kind == FMAInstKind::Default)
3822  MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3823  .addReg(SrcReg0, getKillRegState(Src0IsKill))
3824  .addReg(SrcReg1, getKillRegState(Src1IsKill))
3825  .addReg(SrcReg2, getKillRegState(Src2IsKill));
3826  else if (kind == FMAInstKind::Indexed)
3827  MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3828  .addReg(SrcReg2, getKillRegState(Src2IsKill))
3829  .addReg(SrcReg0, getKillRegState(Src0IsKill))
3830  .addReg(SrcReg1, getKillRegState(Src1IsKill))
3831  .addImm(MUL->getOperand(3).getImm());
3832  else if (kind == FMAInstKind::Accumulator)
3833  MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3834  .addReg(SrcReg2, getKillRegState(Src2IsKill))
3835  .addReg(SrcReg0, getKillRegState(Src0IsKill))
3836  .addReg(SrcReg1, getKillRegState(Src1IsKill));
3837  else
3838  assert(false && "Invalid FMA instruction kind \n");
3839  // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
3840  InsInstrs.push_back(MIB);
3841  return MUL;
3842 }
3843 
3844 /// genMaddR - Generate madd instruction and combine mul and add using
3845 /// an extra virtual register
3846 /// Example - an ADD intermediate needs to be stored in a register:
3847 /// MUL I=A,B,0
3848 /// ADD R,I,Imm
3849 /// ==> ORR V, ZR, Imm
3850 /// ==> MADD R,A,B,V
3851 /// \param MF Containing MachineFunction
3852 /// \param MRI Register information
3853 /// \param TII Target information
3854 /// \param Root is the ADD instruction
3855 /// \param [out] InsInstrs is a vector of machine instructions and will
3856 /// contain the generated madd instruction
3857 /// \param IdxMulOpd is index of operand in Root that is the result of
3858 /// the MUL. In the example above IdxMulOpd is 1.
3859 /// \param MaddOpc the opcode fo the madd instruction
3860 /// \param VR is a virtual register that holds the value of an ADD operand
3861 /// (V in the example above).
3862 /// \param RC Register class of operands
3864  const TargetInstrInfo *TII, MachineInstr &Root,
3866  unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
3867  const TargetRegisterClass *RC) {
3868  assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3869 
3870  MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
3871  unsigned ResultReg = Root.getOperand(0).getReg();
3872  unsigned SrcReg0 = MUL->getOperand(1).getReg();
3873  bool Src0IsKill = MUL->getOperand(1).isKill();
3874  unsigned SrcReg1 = MUL->getOperand(2).getReg();
3875  bool Src1IsKill = MUL->getOperand(2).isKill();
3876 
3878  MRI.constrainRegClass(ResultReg, RC);
3880  MRI.constrainRegClass(SrcReg0, RC);
3882  MRI.constrainRegClass(SrcReg1, RC);
3884  MRI.constrainRegClass(VR, RC);
3885 
3886  MachineInstrBuilder MIB =
3887  BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3888  .addReg(SrcReg0, getKillRegState(Src0IsKill))
3889  .addReg(SrcReg1, getKillRegState(Src1IsKill))
3890  .addReg(VR);
3891  // Insert the MADD
3892  InsInstrs.push_back(MIB);
3893  return MUL;
3894 }
3895 
3896 /// When getMachineCombinerPatterns() finds potential patterns,
3897 /// this function generates the instructions that could replace the
3898 /// original code sequence
3900  MachineInstr &Root, MachineCombinerPattern Pattern,
3903  DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
3904  MachineBasicBlock &MBB = *Root.getParent();
3905  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3906  MachineFunction &MF = *MBB.getParent();
3907  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
3908 
3909  MachineInstr *MUL;
3910  const TargetRegisterClass *RC;
3911  unsigned Opc;
3912  switch (Pattern) {
3913  default:
3914  // Reassociate instructions.
3915  TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
3916  DelInstrs, InstrIdxForVirtReg);
3917  return;
3920  // MUL I=A,B,0
3921  // ADD R,I,C
3922  // ==> MADD R,A,B,C
3923  // --- Create(MADD);
3924  if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
3925  Opc = AArch64::MADDWrrr;
3926  RC = &AArch64::GPR32RegClass;
3927  } else {
3928  Opc = AArch64::MADDXrrr;
3929  RC = &AArch64::GPR64RegClass;
3930  }
3931  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
3932  break;
3935  // MUL I=A,B,0
3936  // ADD R,C,I
3937  // ==> MADD R,A,B,C
3938  // --- Create(MADD);
3939  if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
3940  Opc = AArch64::MADDWrrr;
3941  RC = &AArch64::GPR32RegClass;
3942  } else {
3943  Opc = AArch64::MADDXrrr;
3944  RC = &AArch64::GPR64RegClass;
3945  }
3946  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3947  break;
3950  // MUL I=A,B,0
3951  // ADD R,I,Imm
3952  // ==> ORR V, ZR, Imm
3953  // ==> MADD R,A,B,V
3954  // --- Create(MADD);
3955  const TargetRegisterClass *OrrRC;
3956  unsigned BitSize, OrrOpc, ZeroReg;
3957  if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
3958  OrrOpc = AArch64::ORRWri;
3959  OrrRC = &AArch64::GPR32spRegClass;
3960  BitSize = 32;
3961  ZeroReg = AArch64::WZR;
3962  Opc = AArch64::MADDWrrr;
3963  RC = &AArch64::GPR32RegClass;
3964  } else {
3965  OrrOpc = AArch64::ORRXri;
3966  OrrRC = &AArch64::GPR64spRegClass;
3967  BitSize = 64;
3968  ZeroReg = AArch64::XZR;
3969  Opc = AArch64::MADDXrrr;
3970  RC = &AArch64::GPR64RegClass;
3971  }
3972  unsigned NewVR = MRI.createVirtualRegister(OrrRC);
3973  uint64_t Imm = Root.getOperand(2).getImm();
3974 
3975  if (Root.getOperand(3).isImm()) {
3976  unsigned Val = Root.getOperand(3).getImm();
3977  Imm = Imm << Val;
3978  }
3979  uint64_t UImm = SignExtend64(Imm, BitSize);
3980  uint64_t Encoding;
3981  if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
3982  MachineInstrBuilder MIB1 =
3983  BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
3984  .addReg(ZeroReg)
3985  .addImm(Encoding);
3986  InsInstrs.push_back(MIB1);
3987  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
3988  MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
3989  }
3990  break;
3991  }
3994  // MUL I=A,B,0
3995  // SUB R,I, C
3996  // ==> SUB V, 0, C
3997  // ==> MADD R,A,B,V // = -C + A*B
3998  // --- Create(MADD);
3999  const TargetRegisterClass *SubRC;
4000  unsigned SubOpc, ZeroReg;
4001  if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
4002  SubOpc = AArch64::SUBWrr;
4003  SubRC = &AArch64::GPR32spRegClass;
4004  ZeroReg = AArch64::WZR;
4005  Opc = AArch64::MADDWrrr;
4006  RC = &AArch64::GPR32RegClass;
4007  } else {
4008  SubOpc = AArch64::SUBXrr;
4009  SubRC = &AArch64::GPR64spRegClass;
4010  ZeroReg = AArch64::XZR;
4011  Opc = AArch64::MADDXrrr;
4012  RC = &AArch64::GPR64RegClass;
4013  }
4014  unsigned NewVR = MRI.createVirtualRegister(SubRC);
4015  // SUB NewVR, 0, C
4016  MachineInstrBuilder MIB1 =
4017  BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
4018  .addReg(ZeroReg)
4019  .add(Root.getOperand(2));
4020  InsInstrs.push_back(MIB1);
4021  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4022  MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4023  break;
4024  }
4027  // MUL I=A,B,0
4028  // SUB R,C,I
4029  // ==> MSUB R,A,B,C (computes C - A*B)
4030  // --- Create(MSUB);
4031  if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
4032  Opc = AArch64::MSUBWrrr;
4033  RC = &AArch64::GPR32RegClass;
4034  } else {
4035  Opc = AArch64::MSUBXrrr;
4036  RC = &AArch64::GPR64RegClass;
4037  }
4038  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4039  break;
4042  // MUL I=A,B,0
4043  // SUB R,I, Imm
4044  // ==> ORR V, ZR, -Imm
4045  // ==> MADD R,A,B,V // = -Imm + A*B
4046  // --- Create(MADD);
4047  const TargetRegisterClass *OrrRC;
4048  unsigned BitSize, OrrOpc, ZeroReg;
4049  if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
4050  OrrOpc = AArch64::ORRWri;
4051  OrrRC = &AArch64::GPR32spRegClass;
4052  BitSize = 32;
4053  ZeroReg = AArch64::WZR;
4054  Opc = AArch64::MADDWrrr;
4055  RC = &AArch64::GPR32RegClass;
4056  } else {
4057  OrrOpc = AArch64::ORRXri;
4058  OrrRC = &AArch64::GPR64spRegClass;
4059  BitSize = 64;
4060  ZeroReg = AArch64::XZR;
4061  Opc = AArch64::MADDXrrr;
4062  RC = &AArch64::GPR64RegClass;
4063  }
4064  unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4065  uint64_t Imm = Root.getOperand(2).getImm();
4066  if (Root.getOperand(3).isImm()) {
4067  unsigned Val = Root.getOperand(3).getImm();
4068  Imm = Imm << Val;
4069  }
4070  uint64_t UImm = SignExtend64(-Imm, BitSize);
4071  uint64_t Encoding;
4072  if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4073  MachineInstrBuilder MIB1 =
4074  BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4075  .addReg(ZeroReg)
4076  .addImm(Encoding);
4077  InsInstrs.push_back(MIB1);
4078  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4079  MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4080  }
4081  break;
4082  }
4083  // Floating Point Support
4086  // MUL I=A,B,0
4087  // ADD R,I,C
4088  // ==> MADD R,A,B,C
4089  // --- Create(MADD);
4090  if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
4091  Opc = AArch64::FMADDSrrr;
4092  RC = &AArch64::FPR32RegClass;
4093  } else {
4094  Opc = AArch64::FMADDDrrr;
4095  RC = &AArch64::FPR64RegClass;
4096  }
4097  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4098  break;
4101  // FMUL I=A,B,0
4102  // FADD R,C,I
4103  // ==> FMADD R,A,B,C
4104  // --- Create(FMADD);
4105  if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
4106  Opc = AArch64::FMADDSrrr;
4107  RC = &AArch64::FPR32RegClass;
4108  } else {
4109  Opc = AArch64::FMADDDrrr;
4110  RC = &AArch64::FPR64RegClass;
4111  }
4112  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4113  break;
4114 
4116  Opc = AArch64::FMLAv1i32_indexed;
4117  RC = &AArch64::FPR32RegClass;
4118  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4120  break;
4122  Opc = AArch64::FMLAv1i32_indexed;
4123  RC = &AArch64::FPR32RegClass;
4124  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4126  break;
4127 
4129  Opc = AArch64::FMLAv1i64_indexed;
4130  RC = &AArch64::FPR64RegClass;
4131  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4133  break;
4135  Opc = AArch64::FMLAv1i64_indexed;
4136  RC = &AArch64::FPR64RegClass;
4137  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4139  break;
4140 
4143  RC = &AArch64::FPR64RegClass;
4145  Opc = AArch64::FMLAv2i32_indexed;
4146  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4148  } else {
4149  Opc = AArch64::FMLAv2f32;
4150  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4152  }
4153  break;
4156  RC = &AArch64::FPR64RegClass;
4158  Opc = AArch64::FMLAv2i32_indexed;
4159  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4161  } else {
4162  Opc = AArch64::FMLAv2f32;
4163  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4165  }
4166  break;
4167 
4170  RC = &AArch64::FPR128RegClass;
4172  Opc = AArch64::FMLAv2i64_indexed;
4173  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4175  } else {
4176  Opc = AArch64::FMLAv2f64;
4177  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4179  }
4180  break;
4183  RC = &AArch64::FPR128RegClass;
4185  Opc = AArch64::FMLAv2i64_indexed;
4186  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4188  } else {
4189  Opc = AArch64::FMLAv2f64;
4190  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4192  }
4193  break;
4194 
4197  RC = &AArch64::FPR128RegClass;
4199  Opc = AArch64::FMLAv4i32_indexed;
4200  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4202  } else {
4203  Opc = AArch64::FMLAv4f32;
4204  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4206  }
4207  break;
4208 
4211  RC = &AArch64::FPR128RegClass;
4213  Opc = AArch64::FMLAv4i32_indexed;
4214  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4216  } else {
4217  Opc = AArch64::FMLAv4f32;
4218  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4220  }
4221  break;
4222 
4225  // FMUL I=A,B,0
4226  // FSUB R,I,C
4227  // ==> FNMSUB R,A,B,C // = -C + A*B
4228  // --- Create(FNMSUB);
4229  if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
4230  Opc = AArch64::FNMSUBSrrr;
4231  RC = &AArch64::FPR32RegClass;
4232  } else {
4233  Opc = AArch64::FNMSUBDrrr;
4234  RC = &AArch64::FPR64RegClass;
4235  }
4236  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4237  break;
4238  }
4239 
4242  // FNMUL I=A,B,0
4243  // FSUB R,I,C
4244  // ==> FNMADD R,A,B,C // = -A*B - C
4245  // --- Create(FNMADD);
4246  if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) {
4247  Opc = AArch64::FNMADDSrrr;
4248  RC = &AArch64::FPR32RegClass;
4249  } else {
4250  Opc = AArch64::FNMADDDrrr;
4251  RC = &AArch64::FPR64RegClass;
4252  }
4253  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4254  break;
4255  }
4256 
4259  // FMUL I=A,B,0
4260  // FSUB R,C,I
4261  // ==> FMSUB R,A,B,C (computes C - A*B)
4262  // --- Create(FMSUB);
4263  if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
4264  Opc = AArch64::FMSUBSrrr;
4265  RC = &AArch64::FPR32RegClass;
4266  } else {
4267  Opc = AArch64::FMSUBDrrr;
4268  RC = &AArch64::FPR64RegClass;
4269  }
4270  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4271  break;
4272  }
4273 
4275  Opc = AArch64::FMLSv1i32_indexed;
4276  RC = &AArch64::FPR32RegClass;
4277  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4279  break;
4280 
4282  Opc = AArch64::FMLSv1i64_indexed;
4283  RC = &AArch64::FPR64RegClass;
4284  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4286  break;
4287 
4290  RC = &AArch64::FPR64RegClass;
4292  Opc = AArch64::FMLSv2i32_indexed;
4293  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4295  } else {
4296  Opc = AArch64::FMLSv2f32;
4297  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4299  }
4300  break;
4301 
4304  RC = &AArch64::FPR128RegClass;
4306  Opc = AArch64::FMLSv2i64_indexed;
4307  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4309  } else {
4310  Opc = AArch64::FMLSv2f64;
4311  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4313  }
4314  break;
4315 
4318  RC = &AArch64::FPR128RegClass;
4320  Opc = AArch64::FMLSv4i32_indexed;
4321  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4323  } else {
4324  Opc = AArch64::FMLSv4f32;
4325  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4327  }
4328  break;
4329  } // end switch (Pattern)
4330  // Record MUL and ADD/SUB for deletion
4331  DelInstrs.push_back(MUL);
4332  DelInstrs.push_back(&Root);
4333 }
4334 
4335 /// \brief Replace csincr-branch sequence by simple conditional branch
4336 ///
4337 /// Examples:
4338 /// 1. \code
4339 /// csinc w9, wzr, wzr, <condition code>
4340 /// tbnz w9, #0, 0x44
4341 /// \endcode
4342 /// to
4343 /// \code
4344 /// b.<inverted condition code>
4345 /// \endcode
4346 ///
4347 /// 2. \code
4348 /// csinc w9, wzr, wzr, <condition code>
4349 /// tbz w9, #0, 0x44
4350 /// \endcode
4351 /// to
4352 /// \code
4353 /// b.<condition code>
4354 /// \endcode
4355 ///
4356 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
4357 /// compare's constant operand is power of 2.
4358 ///
4359 /// Examples:
4360 /// \code
4361 /// and w8, w8, #0x400
4362 /// cbnz w8, L1
4363 /// \endcode
4364 /// to
4365 /// \code
4366 /// tbnz w8, #10, L1
4367 /// \endcode
4368 ///
4369 /// \param MI Conditional Branch
4370 /// \return True when the simple conditional branch is generated
4371 ///
4373  bool IsNegativeBranch = false;
4374  bool IsTestAndBranch = false;
4375  unsigned TargetBBInMI = 0;
4376  switch (MI.getOpcode()) {
4377  default:
4378  llvm_unreachable("Unknown branch instruction?");
4379  case AArch64::Bcc:
4380  return false;
4381  case AArch64::CBZW:
4382  case AArch64::CBZX:
4383  TargetBBInMI = 1;
4384  break;
4385  case AArch64::CBNZW:
4386  case AArch64::CBNZX:
4387  TargetBBInMI = 1;
4388  IsNegativeBranch = true;
4389  break;
4390  case AArch64::TBZW:
4391  case AArch64::TBZX:
4392  TargetBBInMI = 2;
4393  IsTestAndBranch = true;
4394  break;
4395  case AArch64::TBNZW:
4396  case AArch64::TBNZX:
4397  TargetBBInMI = 2;
4398  IsNegativeBranch = true;
4399  IsTestAndBranch = true;
4400  break;
4401  }
4402  // So we increment a zero register and test for bits other
4403  // than bit 0? Conservatively bail out in case the verifier
4404  // missed this case.
4405  if (IsTestAndBranch && MI.getOperand(1).getImm())
4406  return false;
4407 
4408  // Find Definition.
4409  assert(MI.getParent() && "Incomplete machine instruciton\n");
4410  MachineBasicBlock *MBB = MI.getParent();
4411  MachineFunction *MF = MBB->getParent();
4412  MachineRegisterInfo *MRI = &MF->getRegInfo();
4413  unsigned VReg = MI.getOperand(0).getReg();
4415  return false;
4416 
4417  MachineInstr *DefMI = MRI->getVRegDef(VReg);
4418 
4419  // Look through COPY instructions to find definition.
4420  while (DefMI->isCopy()) {
4421  unsigned CopyVReg = DefMI->getOperand(1).getReg();
4422  if (!MRI->hasOneNonDBGUse(CopyVReg))
4423  return false;
4424  if (!MRI->hasOneDef(CopyVReg))
4425  return false;
4426  DefMI = MRI->getVRegDef(CopyVReg);
4427  }
4428 
4429  switch (DefMI->getOpcode()) {
4430  default:
4431  return false;
4432  // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
4433  case AArch64::ANDWri:
4434  case AArch64::ANDXri: {
4435  if (IsTestAndBranch)
4436  return false;
4437  if (DefMI->getParent() != MBB)
4438  return false;
4439  if (!MRI->hasOneNonDBGUse(VReg))
4440  return false;
4441 
4442  bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
4444  DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
4445  if (!isPowerOf2_64(Mask))
4446  return false;
4447 
4448  MachineOperand &MO = DefMI->getOperand(1);
4449  unsigned NewReg = MO.getReg();
4451  return false;
4452 
4453  assert(!MRI->def_empty(NewReg) && "Register must be defined.");
4454 
4455  MachineBasicBlock &RefToMBB = *MBB;
4456  MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
4457  DebugLoc DL = MI.getDebugLoc();
4458  unsigned Imm = Log2_64(Mask);
4459  unsigned Opc = (Imm < 32)
4460  ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
4461  : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
4462  MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
4463  .addReg(NewReg)
4464  .addImm(Imm)
4465  .addMBB(TBB);
4466  // Register lives on to the CBZ now.
4467  MO.setIsKill(false);
4468 
4469  // For immediate smaller than 32, we need to use the 32-bit
4470  // variant (W) in all cases. Indeed the 64-bit variant does not
4471  // allow to encode them.
4472  // Therefore, if the input register is 64-bit, we need to take the
4473  // 32-bit sub-part.
4474  if (!Is32Bit && Imm < 32)
4475  NewMI->getOperand(0).setSubReg(AArch64::sub_32);
4476  MI.eraseFromParent();
4477  return true;
4478  }
4479  // Look for CSINC
4480  case AArch64::CSINCWr:
4481  case AArch64::CSINCXr: {
4482  if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
4483  DefMI->getOperand(2).getReg() == AArch64::WZR) &&
4484  !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
4485  DefMI->getOperand(2).getReg() == AArch64::XZR))
4486  return false;
4487 
4488  if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
4489  return false;
4490 
4492  // Convert only when the condition code is not modified between
4493  // the CSINC and the branch. The CC may be used by other
4494  // instructions in between.
4496  return false;
4497  MachineBasicBlock &RefToMBB = *MBB;
4498  MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
4499  DebugLoc DL = MI.getDebugLoc();
4500  if (IsNegativeBranch)
4502  BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
4503  MI.eraseFromParent();
4504  return true;
4505  }
4506  }
4507 }
4508 
4509 std::pair<unsigned, unsigned>
4511  const unsigned Mask = AArch64II::MO_FRAGMENT;
4512  return std::make_pair(TF & Mask, TF & ~Mask);
4513 }
4514 
4517  using namespace AArch64II;
4518 
4519  static const std::pair<unsigned, const char *> TargetFlags[] = {
4520  {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
4521  {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
4522  {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
4523  {MO_HI12, "aarch64-hi12"}};
4524  return makeArrayRef(TargetFlags);
4525 }
4526 
4529  using namespace AArch64II;
4530 
4531  static const std::pair<unsigned, const char *> TargetFlags[] = {
4532  {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"}, {MO_TLS, "aarch64-tls"}};
4533  return makeArrayRef(TargetFlags);
4534 }
4535 
4538  static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
4539  {{MOSuppressPair, "aarch64-suppress-pair"},
4540  {MOStridedAccess, "aarch64-strided-access"}};
4541  return makeArrayRef(TargetFlags);
4542 }
4543 
4544 /// Constants defining how certain sequences should be outlined.
4545 /// This encompasses how an outlined function should be called, and what kind of
4546 /// frame should be emitted for that outlined function.
4547 ///
4548 /// \p MachineOutlinerDefault implies that the function should be called with
4549 /// a save and restore of LR to the stack.
4550 ///
4551 /// That is,
4552 ///
4553 /// I1 Save LR OUTLINED_FUNCTION:
4554 /// I2 --> BL OUTLINED_FUNCTION I1
4555 /// I3 Restore LR I2
4556 /// I3
4557 /// RET
4558 ///
4559 /// * Call construction overhead: 3 (save + BL + restore)
4560 /// * Frame construction overhead: 1 (ret)
4561 /// * Requires stack fixups? Yes
4562 ///
4563 /// \p MachineOutlinerTailCall implies that the function is being created from
4564 /// a sequence of instructions ending in a return.
4565 ///
4566 /// That is,
4567 ///
4568 /// I1 OUTLINED_FUNCTION:
4569 /// I2 --> B OUTLINED_FUNCTION I1
4570 /// RET I2
4571 /// RET
4572 ///
4573 /// * Call construction overhead: 1 (B)
4574 /// * Frame construction overhead: 0 (Return included in sequence)
4575 /// * Requires stack fixups? No
4576 ///
4577 /// \p MachineOutlinerNoLRSave implies that the function should be called using
4578 /// a BL instruction, but doesn't require LR to be saved and restored. This
4579 /// happens when LR is known to be dead.
4580 ///
4581 /// That is,
4582 ///
4583 /// I1 OUTLINED_FUNCTION:
4584 /// I2 --> BL OUTLINED_FUNCTION I1
4585 /// I3 I2
4586 /// I3
4587 /// RET
4588 ///
4589 /// * Call construction overhead: 1 (BL)
4590 /// * Frame construction overhead: 1 (RET)
4591 /// * Requires stack fixups? No
4592 ///
4594  MachineOutlinerDefault, /// Emit a save, restore, call, and return.
4595  MachineOutlinerTailCall, /// Only emit a branch.
4596  MachineOutlinerNoLRSave /// Emit a call and return.
4597 };
4598 
4600  MachineBasicBlock::iterator &CallInsertionPt) const {
4601  // Was LR saved in the function containing this basic block?
4602  MachineBasicBlock &MBB = *(CallInsertionPt->getParent());
4604  LRU.addLiveOuts(MBB);
4605 
4606  // Get liveness information from the end of the block to the end of the
4607  // prospective outlined region.
4608  std::for_each(MBB.rbegin(),
4609  (MachineBasicBlock::reverse_iterator)CallInsertionPt,
4610  [&LRU](MachineInstr &MI) {LRU.stepBackward(MI);}
4611  );
4612 
4613  // If the link register is available at this point, then we can safely outline
4614  // the region without saving/restoring LR. Otherwise, we must emit a save and
4615  // restore.
4616  return LRU.available(AArch64::LR);
4617 }
4618 
4619 AArch64GenInstrInfo::MachineOutlinerInfo
4621  std::vector<
4622  std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
4623  &RepeatedSequenceLocs) const {
4624 
4625  unsigned CallID = MachineOutlinerDefault;
4626  unsigned FrameID = MachineOutlinerDefault;
4627  unsigned NumInstrsForCall = 3;
4628  unsigned NumInstrsToCreateFrame = 1;
4629 
4630  auto DoesntNeedLRSave =
4631  [this](std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>
4632  &I) { return canOutlineWithoutLRSave(I.second); };
4633 
4634  // If the last instruction in any candidate is a terminator, then we should
4635  // tail call all of the candidates.
4636  if (RepeatedSequenceLocs[0].second->isTerminator()) {
4637  CallID = MachineOutlinerTailCall;
4638  FrameID = MachineOutlinerTailCall;
4639  NumInstrsForCall = 1;
4640  NumInstrsToCreateFrame = 0;
4641  }
4642 
4643  else if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
4644  DoesntNeedLRSave)) {
4645  CallID = MachineOutlinerNoLRSave;
4646  FrameID = MachineOutlinerNoLRSave;
4647  NumInstrsForCall = 1;
4648  NumInstrsToCreateFrame = 1;
4649  }
4650 
4651  return MachineOutlinerInfo(NumInstrsForCall, NumInstrsToCreateFrame, CallID,
4652  FrameID);
4653 }
4654 
4656  bool OutlineFromLinkOnceODRs) const {
4657  const Function *F = MF.getFunction();
4658 
4659  // If F uses a redzone, then don't outline from it because it might mess up
4660  // the stack.
4661  if (!F->hasFnAttribute(Attribute::NoRedZone))
4662  return false;
4663 
4664  // If anyone is using the address of this function, don't outline from it.
4665  if (F->hasAddressTaken())
4666  return false;
4667 
4668  // Can F be deduplicated by the linker? If it can, don't outline from it.
4669  if (!OutlineFromLinkOnceODRs && F->hasLinkOnceODRLinkage())
4670  return false;
4671 
4672  return true;
4673 }
4674 
4675 AArch64GenInstrInfo::MachineOutlinerInstrType
4677 
4678  MachineFunction *MF = MI.getParent()->getParent();
4679  AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
4680 
4681  // Don't outline LOHs.
4682  if (FuncInfo->getLOHRelated().count(&MI))
4683  return MachineOutlinerInstrType::Illegal;
4684 
4685  // Don't allow debug values to impact outlining type.
4686  if (MI.isDebugValue() || MI.isIndirectDebugValue())
4687  return MachineOutlinerInstrType::Invisible;
4688 
4689  // Is this a terminator for a basic block?
4690  if (MI.isTerminator()) {
4691 
4692  // Is this the end of a function?
4693  if (MI.getParent()->succ_empty())
4694  return MachineOutlinerInstrType::Legal;
4695 
4696  // It's not, so don't outline it.
4697  return MachineOutlinerInstrType::Illegal;
4698  }
4699 
4700  // Don't outline positions.
4701  if (MI.isPosition())
4702  return MachineOutlinerInstrType::Illegal;
4703 
4704  // Don't touch the link register or W30.
4705  if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
4706  MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
4707  return MachineOutlinerInstrType::Illegal;
4708 
4709  // Make sure none of the operands are un-outlinable.
4710  for (const MachineOperand &MOP : MI.operands()) {
4711  if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
4712  MOP.isTargetIndex())
4713  return MachineOutlinerInstrType::Illegal;
4714 
4715  // Don't outline anything that uses the link register.
4716  if (MOP.isReg() && getRegisterInfo().regsOverlap(MOP.getReg(), AArch64::LR))
4717  return MachineOutlinerInstrType::Illegal;
4718  }
4719 
4720  // Does this use the stack?
4721  if (MI.modifiesRegister(AArch64::SP, &RI) ||
4722  MI.readsRegister(AArch64::SP, &RI)) {
4723 
4724  // Is it a memory operation?
4725  if (MI.mayLoadOrStore()) {
4726  unsigned Base; // Filled with the base regiser of MI.
4727  int64_t Offset; // Filled with the offset of MI.
4728  unsigned DummyWidth;
4729 
4730  // Does it allow us to offset the base register and is the base SP?
4731  if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) ||
4732  Base != AArch64::SP)
4733  return MachineOutlinerInstrType::Illegal;
4734 
4735  // Find the minimum/maximum offset for this instruction and check if
4736  // fixing it up would be in range.
4737  int64_t MinOffset, MaxOffset;
4738  unsigned DummyScale;
4739  getMemOpInfo(MI.getOpcode(), DummyScale, DummyWidth, MinOffset,
4740  MaxOffset);
4741 
4742  // TODO: We should really test what happens if an instruction overflows.
4743  // This is tricky to test with IR tests, but when the outliner is moved
4744  // to a MIR test, it really ought to be checked.
4745  if (Offset + 16 < MinOffset || Offset + 16 > MaxOffset)
4746  return MachineOutlinerInstrType::Illegal;
4747 
4748  // It's in range, so we can outline it.
4749  return MachineOutlinerInstrType::Legal;
4750  }
4751 
4752  // We can't fix it up, so don't outline it.
4753  return MachineOutlinerInstrType::Illegal;
4754  }
4755 
4756  return MachineOutlinerInstrType::Legal;
4757 }
4758 
4759 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
4760  for (MachineInstr &MI : MBB) {
4761  unsigned Base, Width;
4762  int64_t Offset;
4763 
4764  // Is this a load or store with an immediate offset with SP as the base?
4765  if (!MI.mayLoadOrStore() ||
4766  !getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Width, &RI) ||
4767  Base != AArch64::SP)
4768  continue;
4769 
4770  // It is, so we have to fix it up.
4771  unsigned Scale;
4772  int64_t Dummy1, Dummy2;
4773 
4774  MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
4775  assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
4776  getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
4777  assert(Scale != 0 && "Unexpected opcode!");
4778 
4779  // We've pushed the return address to the stack, so add 16 to the offset.
4780  // This is safe, since we already checked if it would overflow when we
4781  // checked if this instruction was legal to outline.
4782  int64_t NewImm = (Offset + 16) / Scale;
4783  StackOffsetOperand.setImm(NewImm);
4784  }
4785 }
4786 
4789  const MachineOutlinerInfo &MInfo) const {
4790 
4791  // If this is a tail call outlined function, then there's already a return.
4792  if (MInfo.FrameConstructionID == MachineOutlinerTailCall)
4793  return;
4794 
4795  // It's not a tail call, so we have to insert the return ourselves.
4796  MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
4797  .addReg(AArch64::LR, RegState::Undef);
4798  MBB.insert(MBB.end(), ret);
4799 
4800  // Did we have to modify the stack by saving the link register?
4801  if (MInfo.FrameConstructionID == MachineOutlinerNoLRSave)
4802  return;
4803 
4804  // We modified the stack.
4805  // Walk over the basic block and fix up all the stack accesses.
4806  fixupPostOutline(MBB);
4807 }
4808 
4811  const MachineOutlinerInfo &MInfo) const {}
4812 
4815  MachineFunction &MF, const MachineOutlinerInfo &MInfo) const {
4816 
4817  // Are we tail calling?
4818  if (MInfo.CallConstructionID == MachineOutlinerTailCall) {
4819  // If yes, then we can just branch to the label.
4820  It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::B))
4821  .addGlobalAddress(M.getNamedValue(MF.getName())));
4822  return It;
4823  }
4824 
4825  // Are we saving the link register?
4826  if (MInfo.CallConstructionID == MachineOutlinerNoLRSave) {
4827  // No, so just insert the call.
4828  It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
4829  .addGlobalAddress(M.getNamedValue(MF.getName())));
4830  return It;
4831  }
4832 
4833  // We have a default call. Save the link register.
4834  MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
4835  .addReg(AArch64::SP, RegState::Define)
4836  .addReg(AArch64::LR)
4837  .addReg(AArch64::SP)
4838  .addImm(-16);
4839  It = MBB.insert(It, STRXpre);
4840  It++;
4841 
4842  // Insert the call.
4843  It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
4844  .addGlobalAddress(M.getNamedValue(MF.getName())));
4845 
4846  It++;
4847 
4848  // Restore the link register.
4849  MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
4850  .addReg(AArch64::SP, RegState::Define)
4851  .addReg(AArch64::LR, RegState::Define)
4852  .addReg(AArch64::SP)
4853  .addImm(16);
4854  It = MBB.insert(It, LDRXpost);
4855 
4856  return It;
4857 }
uint64_t CallInst * C
ArrayRef< std::pair< unsigned, const char * > > getSerializableBitmaskMachineOperandTargetFlags() const override
const MachineInstrBuilder & add(const MachineOperand &MO) const
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
virtual void genAlternativeCodeSequence(MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl< MachineInstr *> &InsInstrs, SmallVectorImpl< MachineInstr *> &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address...
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
Definition: MachineInstr.h:965
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.
instr_iterator instr_end()
MachineBasicBlock * getMBB() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
AArch64GenInstrInfo::MachineOutlinerInstrType getOutliningType(MachineInstr &MI) const override
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
bool isScaledAddr(const MachineInstr &MI) const
Return true if this is load/store scales or extends its register offset.
bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) const override
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
bool hasExtendedReg(const MachineInstr &MI) const
Returns true if there is an extendable register and that the extending value is non-zero.
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern > &Patterns)
Floating-Point Support.
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
bool isGPRCopy(const MachineInstr &MI) const
Does this instruction rename a GPR without modifying bits?
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:268
bool hasZeroCycleZeroing() const
static CondCode getInvertedCondCode(CondCode Code)
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
void setIsUndef(bool Val=true)
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static bool isSUBSRegImm(unsigned Opcode)
static bool processLogicalImmediate(uint64_t Imm, unsigned RegSize, uint64_t &Encoding)
processLogicalImmediate - Determine if an immediate value can be encoded as the immediate operand of ...
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr) const override
unsigned getSubReg() const
Offset can apply, at least partly.
const SetOfInstructions & getLOHRelated() const
Emit a save, restore, call, and return.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:649
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:262
unsigned second
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:767
A debug info location.
Definition: DebugLoc.h:34
F(f)
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address...
static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr *> &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind=FMAInstKind::Default)
genFusedMultiply - Generate fused multiply instructions.
bool hasOneDef(unsigned RegNo) const
Return true if there is exactly one operand defining the specified register.
iterator_range< mmo_iterator > memoperands()
Definition: MachineInstr.h:396
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:332
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode...
bool isFPRCopy(const MachineInstr &MI) const
Does this instruction rename an FPR without modifying bits?
bool hasCustomCheapAsMoveHandling() const
unsigned getSpillSize(const TargetRegisterClass &RC) const
Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
return AArch64::GPR64RegClass contains(Reg)
iterator_range< succ_iterator > successors()
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address...
static bool getMemDoShift(unsigned Imm)
getMemDoShift - Extract the "do shift" flag value for load/store instructions.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
A description of a memory reference used in the backend.
bool isAssociativeAndCommutative(const MachineInstr &Inst) const override
Return true when Inst is associative and commutative so that it can be reassociated.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
const HexagonInstrInfo * TII
bool isStridedAccess(const MachineInstr &MI) const
Return true if the given load or store is a strided memory access.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize)
Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
const TargetRegisterClass * getRegClassConstraint(unsigned OpIdx, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
Compute the static register class constraint for operand OpIdx.
void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:293
bool getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width, int64_t &MinOffset, int64_t &MaxOffset) const
Returns true if opcode Opc is a memory operation.
bool hasShiftedReg(const MachineInstr &MI) const
Returns true if there is a shiftable register and that the shift value is non-zero.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned SubReg
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
bool useMachineCombiner() const override
AArch64 supports MachineCombiner.
Reg
All possible values of the reg field in the ModR/M byte.
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:474
static int getRegClass(RegisterKind Is, unsigned RegWidth)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:290
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
const char * getSymbolName() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
zlib-gnu style compression
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:634
static bool isCombineInstrCandidate64(unsigned Opc)
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
int isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
void RemoveOperand(unsigned i)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:287
AArch64InstrInfo(const AArch64Subtarget &STI)
bool isFullCopy() const
Definition: MachineInstr.h:861
void clearKillFlags(unsigned Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
static AArch64_AM::ShiftExtendType getMemExtendType(unsigned Imm)
getExtendType - Extract the extend type for the offset operand of loads/stores.
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
bool isUnscaledLdSt(unsigned Opc) const
Return true if this is an unscaled load/store.
GlobalValue * getNamedValue(StringRef Name) const
Return the global value in the module with the specified name, of arbitrary type. ...
Definition: Module.cpp:112
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern > &Patterns) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
iterator getLastNonDebugInstr()
Returns an iterator to the last non-debug instruction in the basic block, or end().
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, unsigned, unsigned, int &, int &, int &) const override
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc, unsigned ZeroReg=0, bool CheckZeroReg=false)
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
virtual const TargetInstrInfo * getInstrInfo() const
static const MachineMemOperand::Flags MOSuppressPair
bool shouldClusterMemOps(MachineInstr &FirstLdSt, unsigned BaseReg1, MachineInstr &SecondLdSt, unsigned BaseReg2, unsigned NumLoads) const override
Detect opportunities for ldp/stp formation.
bool isLdStPairSuppressed(const MachineInstr &MI) const
Return true if pairing the given load or store is hinted to be unprofitable.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
bool isExynosShiftLeftFast(const MachineInstr &MI) const
Returns true if the instruction has a shift left that can be executed more efficiently.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc)
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
reverse_iterator rbegin()
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:159
unsigned getKillRegState(bool B)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:121
TargetInstrInfo - Interface to description of machine instruction set.
static bool isCondBranchOpcode(int Opc)
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:56
unsigned getDefRegState(bool B)
static bool isCombineInstrCandidateFP(const MachineInstr &Inst)
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
bool isGPRZero(const MachineInstr &MI) const
Does this instruction set its full destination register to zero?
bool operator|=(SparseBitVector< ElementSize > &LHS, const SparseBitVector< ElementSize > *RHS)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
bool hasLinkOnceODRLinkage() const
Definition: GlobalValue.h:418
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
void insertOutlinerEpilogue(MachineBasicBlock &MBB, MachineFunction &MF, const MachineOutlinerInfo &MInfo) const override
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
FPOpFusion::FPOpFusionMode AllowFPOpFusion
AllowFPOpFusion - This flag is set by the -fuse-fp-ops=xxx option.
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address...
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void genAlternativeCodeSequence(MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl< MachineInstr *> &InsInstrs, SmallVectorImpl< MachineInstr *> &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
static cl::opt< unsigned > BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)"))
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm)
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access &#39;Offset&#39; bytes from the FP.
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
unsigned getSubReg(unsigned Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo...
static cl::opt< unsigned > CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"))
static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr, const TargetRegisterInfo *TRI)
Check if CmpInstr can be substituted by MI.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:426
MachineOutlinerInfo getOutlininingCandidateInfo(std::vector< std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator >> &RepeatedSequenceLocs) const override
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:774
static bool isPairableLdStInst(const MachineInstr &MI)
void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
static const unsigned End
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
bool definesRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr fully defines the specified register.
Definition: MachineInstr.h:957
bool def_empty(unsigned RegNo) const
def_empty - Return true if there are no instructions defining the specified register (it may be live-...
MI-level patchpoint operands.
Definition: StackMaps.h:77
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
A set of register units.
self_iterator getIterator()
Definition: ilist_node.h:82
bool areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA=nullptr) const override
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
bool isAsCheapAsAMove(const MachineInstr &MI) const override
bool expandPostRAPseudo(MachineInstr &MI) const override
const MachineInstrBuilder & addFrameIndex(int Idx) const
bool isThroughputPattern(MachineCombinerPattern Pattern) const override
Return true when a code sequence can improve throughput.
MachineCombinerPattern
These are instruction patterns matched by the machine combiner pass.
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern > &Patterns) const override
Return true when there is potentially a faster code sequence for an instruction chain ending in Root...
MO_HI12 - This flag indicates that a symbol operand represents the bits 13-24 of a 64-bit address...
bool isCopy() const
Definition: MachineInstr.h:857
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:390
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DstReg, ArrayRef< MachineOperand > Cond, unsigned TrueReg, unsigned FalseReg) const override
This class contains a discriminated union of information about pointers in memory operands...
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned &SubIdx) const override
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
EH_LABEL - Represents a label in mid basic block used to track locations needed for debug and excepti...
Definition: ISDOpcodes.h:639
void setIsKill(bool Val=true)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
The memory access writes data.
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewVReg=nullptr)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition: StackMaps.h:51
static bool isIndirectBranchOpcode(int Opc)
void setOpcode(unsigned Op)
Definition: MCInst.h:171
static bool isUncondBranchOpcode(int Opc)
void getNoop(MCInst &NopInst) const override
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:389
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
bool isDebugValue() const
Definition: MachineInstr.h:816
MachineOperand class - Representation of each machine instruction operand.
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
MachineInstrBuilder MachineInstrBuilder & DefMI
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, const MachineOutlinerInfo &MInfo) const override