LLVM  7.0.0svn
AArch64InstrInfo.cpp
Go to the documentation of this file.
1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the AArch64 implementation of the TargetInstrInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64InstrInfo.h"
16 #include "AArch64Subtarget.h"
18 #include "Utils/AArch64BaseInfo.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/CodeGen/StackMaps.h"
35 #include "llvm/IR/DebugLoc.h"
36 #include "llvm/IR/GlobalValue.h"
37 #include "llvm/MC/MCInst.h"
38 #include "llvm/MC/MCInstrDesc.h"
39 #include "llvm/Support/Casting.h"
40 #include "llvm/Support/CodeGen.h"
42 #include "llvm/Support/Compiler.h"
47 #include <cassert>
48 #include <cstdint>
49 #include <iterator>
50 #include <utility>
51 
52 using namespace llvm;
53 
54 #define GET_INSTRINFO_CTOR_DTOR
55 #include "AArch64GenInstrInfo.inc"
56 
58  "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
59  cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
60 
62  "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
63  cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
64 
65 static cl::opt<unsigned>
66  BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
67  cl::desc("Restrict range of Bcc instructions (DEBUG)"));
68 
70  : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
71  RI(STI.getTargetTriple()), Subtarget(STI) {}
72 
73 /// GetInstSize - Return the number of bytes of code the specified
74 /// instruction may be. This returns the maximum number of bytes.
76  const MachineBasicBlock &MBB = *MI.getParent();
77  const MachineFunction *MF = MBB.getParent();
78  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
79 
80  if (MI.getOpcode() == AArch64::INLINEASM)
81  return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
82 
83  // FIXME: We currently only handle pseudoinstructions that don't get expanded
84  // before the assembly printer.
85  unsigned NumBytes = 0;
86  const MCInstrDesc &Desc = MI.getDesc();
87  switch (Desc.getOpcode()) {
88  default:
89  // Anything not explicitly designated otherwise is a normal 4-byte insn.
90  NumBytes = 4;
91  break;
92  case TargetOpcode::DBG_VALUE:
94  case TargetOpcode::IMPLICIT_DEF:
95  case TargetOpcode::KILL:
96  NumBytes = 0;
97  break;
98  case TargetOpcode::STACKMAP:
99  // The upper bound for a stackmap intrinsic is the full length of its shadow
100  NumBytes = StackMapOpers(&MI).getNumPatchBytes();
101  assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
102  break;
103  case TargetOpcode::PATCHPOINT:
104  // The size of the patchpoint intrinsic is the number of bytes requested
105  NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
106  assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
107  break;
109  // This gets lowered to an instruction sequence which takes 16 bytes
110  NumBytes = 16;
111  break;
112  }
113 
114  return NumBytes;
115 }
116 
119  // Block ends with fall-through condbranch.
120  switch (LastInst->getOpcode()) {
121  default:
122  llvm_unreachable("Unknown branch instruction?");
123  case AArch64::Bcc:
124  Target = LastInst->getOperand(1).getMBB();
125  Cond.push_back(LastInst->getOperand(0));
126  break;
127  case AArch64::CBZW:
128  case AArch64::CBZX:
129  case AArch64::CBNZW:
130  case AArch64::CBNZX:
131  Target = LastInst->getOperand(1).getMBB();
133  Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
134  Cond.push_back(LastInst->getOperand(0));
135  break;
136  case AArch64::TBZW:
137  case AArch64::TBZX:
138  case AArch64::TBNZW:
139  case AArch64::TBNZX:
140  Target = LastInst->getOperand(2).getMBB();
142  Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
143  Cond.push_back(LastInst->getOperand(0));
144  Cond.push_back(LastInst->getOperand(1));
145  }
146 }
147 
148 static unsigned getBranchDisplacementBits(unsigned Opc) {
149  switch (Opc) {
150  default:
151  llvm_unreachable("unexpected opcode!");
152  case AArch64::B:
153  return 64;
154  case AArch64::TBNZW:
155  case AArch64::TBZW:
156  case AArch64::TBNZX:
157  case AArch64::TBZX:
158  return TBZDisplacementBits;
159  case AArch64::CBNZW:
160  case AArch64::CBZW:
161  case AArch64::CBNZX:
162  case AArch64::CBZX:
163  return CBZDisplacementBits;
164  case AArch64::Bcc:
165  return BCCDisplacementBits;
166  }
167 }
168 
170  int64_t BrOffset) const {
171  unsigned Bits = getBranchDisplacementBits(BranchOp);
172  assert(Bits >= 3 && "max branch displacement must be enough to jump"
173  "over conditional branch expansion");
174  return isIntN(Bits, BrOffset / 4);
175 }
176 
179  switch (MI.getOpcode()) {
180  default:
181  llvm_unreachable("unexpected opcode!");
182  case AArch64::B:
183  return MI.getOperand(0).getMBB();
184  case AArch64::TBZW:
185  case AArch64::TBNZW:
186  case AArch64::TBZX:
187  case AArch64::TBNZX:
188  return MI.getOperand(2).getMBB();
189  case AArch64::CBZW:
190  case AArch64::CBNZW:
191  case AArch64::CBZX:
192  case AArch64::CBNZX:
193  case AArch64::Bcc:
194  return MI.getOperand(1).getMBB();
195  }
196 }
197 
198 // Branch analysis.
200  MachineBasicBlock *&TBB,
201  MachineBasicBlock *&FBB,
203  bool AllowModify) const {
204  // If the block has no terminators, it just falls into the block after it.
206  if (I == MBB.end())
207  return false;
208 
209  if (!isUnpredicatedTerminator(*I))
210  return false;
211 
212  // Get the last instruction in the block.
213  MachineInstr *LastInst = &*I;
214 
215  // If there is only one terminator instruction, process it.
216  unsigned LastOpc = LastInst->getOpcode();
217  if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
218  if (isUncondBranchOpcode(LastOpc)) {
219  TBB = LastInst->getOperand(0).getMBB();
220  return false;
221  }
222  if (isCondBranchOpcode(LastOpc)) {
223  // Block ends with fall-through condbranch.
224  parseCondBranch(LastInst, TBB, Cond);
225  return false;
226  }
227  return true; // Can't handle indirect branch.
228  }
229 
230  // Get the instruction before it if it is a terminator.
231  MachineInstr *SecondLastInst = &*I;
232  unsigned SecondLastOpc = SecondLastInst->getOpcode();
233 
234  // If AllowModify is true and the block ends with two or more unconditional
235  // branches, delete all but the first unconditional branch.
236  if (AllowModify && isUncondBranchOpcode(LastOpc)) {
237  while (isUncondBranchOpcode(SecondLastOpc)) {
238  LastInst->eraseFromParent();
239  LastInst = SecondLastInst;
240  LastOpc = LastInst->getOpcode();
241  if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
242  // Return now the only terminator is an unconditional branch.
243  TBB = LastInst->getOperand(0).getMBB();
244  return false;
245  } else {
246  SecondLastInst = &*I;
247  SecondLastOpc = SecondLastInst->getOpcode();
248  }
249  }
250  }
251 
252  // If there are three terminators, we don't know what sort of block this is.
253  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
254  return true;
255 
256  // If the block ends with a B and a Bcc, handle it.
257  if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
258  parseCondBranch(SecondLastInst, TBB, Cond);
259  FBB = LastInst->getOperand(0).getMBB();
260  return false;
261  }
262 
263  // If the block ends with two unconditional branches, handle it. The second
264  // one is not executed, so remove it.
265  if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
266  TBB = SecondLastInst->getOperand(0).getMBB();
267  I = LastInst;
268  if (AllowModify)
269  I->eraseFromParent();
270  return false;
271  }
272 
273  // ...likewise if it ends with an indirect branch followed by an unconditional
274  // branch.
275  if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
276  I = LastInst;
277  if (AllowModify)
278  I->eraseFromParent();
279  return true;
280  }
281 
282  // Otherwise, can't handle this.
283  return true;
284 }
285 
287  SmallVectorImpl<MachineOperand> &Cond) const {
288  if (Cond[0].getImm() != -1) {
289  // Regular Bcc
290  AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
291  Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
292  } else {
293  // Folded compare-and-branch
294  switch (Cond[1].getImm()) {
295  default:
296  llvm_unreachable("Unknown conditional branch!");
297  case AArch64::CBZW:
298  Cond[1].setImm(AArch64::CBNZW);
299  break;
300  case AArch64::CBNZW:
301  Cond[1].setImm(AArch64::CBZW);
302  break;
303  case AArch64::CBZX:
304  Cond[1].setImm(AArch64::CBNZX);
305  break;
306  case AArch64::CBNZX:
307  Cond[1].setImm(AArch64::CBZX);
308  break;
309  case AArch64::TBZW:
310  Cond[1].setImm(AArch64::TBNZW);
311  break;
312  case AArch64::TBNZW:
313  Cond[1].setImm(AArch64::TBZW);
314  break;
315  case AArch64::TBZX:
316  Cond[1].setImm(AArch64::TBNZX);
317  break;
318  case AArch64::TBNZX:
319  Cond[1].setImm(AArch64::TBZX);
320  break;
321  }
322  }
323 
324  return false;
325 }
326 
328  int *BytesRemoved) const {
330  if (I == MBB.end())
331  return 0;
332 
333  if (!isUncondBranchOpcode(I->getOpcode()) &&
334  !isCondBranchOpcode(I->getOpcode()))
335  return 0;
336 
337  // Remove the branch.
338  I->eraseFromParent();
339 
340  I = MBB.end();
341 
342  if (I == MBB.begin()) {
343  if (BytesRemoved)
344  *BytesRemoved = 4;
345  return 1;
346  }
347  --I;
348  if (!isCondBranchOpcode(I->getOpcode())) {
349  if (BytesRemoved)
350  *BytesRemoved = 4;
351  return 1;
352  }
353 
354  // Remove the branch.
355  I->eraseFromParent();
356  if (BytesRemoved)
357  *BytesRemoved = 8;
358 
359  return 2;
360 }
361 
362 void AArch64InstrInfo::instantiateCondBranch(
363  MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
364  ArrayRef<MachineOperand> Cond) const {
365  if (Cond[0].getImm() != -1) {
366  // Regular Bcc
367  BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
368  } else {
369  // Folded compare-and-branch
370  // Note that we use addOperand instead of addReg to keep the flags.
371  const MachineInstrBuilder MIB =
372  BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
373  if (Cond.size() > 3)
374  MIB.addImm(Cond[3].getImm());
375  MIB.addMBB(TBB);
376  }
377 }
378 
381  ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
382  // Shouldn't be a fall through.
383  assert(TBB && "insertBranch must not be told to insert a fallthrough");
384 
385  if (!FBB) {
386  if (Cond.empty()) // Unconditional branch?
387  BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
388  else
389  instantiateCondBranch(MBB, DL, TBB, Cond);
390 
391  if (BytesAdded)
392  *BytesAdded = 4;
393 
394  return 1;
395  }
396 
397  // Two-way conditional branch.
398  instantiateCondBranch(MBB, DL, TBB, Cond);
399  BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
400 
401  if (BytesAdded)
402  *BytesAdded = 8;
403 
404  return 2;
405 }
406 
407 // Find the original register that VReg is copied from.
408 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
410  const MachineInstr *DefMI = MRI.getVRegDef(VReg);
411  if (!DefMI->isFullCopy())
412  return VReg;
413  VReg = DefMI->getOperand(1).getReg();
414  }
415  return VReg;
416 }
417 
418 // Determine if VReg is defined by an instruction that can be folded into a
419 // csel instruction. If so, return the folded opcode, and the replacement
420 // register.
421 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
422  unsigned *NewVReg = nullptr) {
423  VReg = removeCopies(MRI, VReg);
425  return 0;
426 
427  bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
428  const MachineInstr *DefMI = MRI.getVRegDef(VReg);
429  unsigned Opc = 0;
430  unsigned SrcOpNum = 0;
431  switch (DefMI->getOpcode()) {
432  case AArch64::ADDSXri:
433  case AArch64::ADDSWri:
434  // if NZCV is used, do not fold.
435  if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
436  return 0;
437  // fall-through to ADDXri and ADDWri.
439  case AArch64::ADDXri:
440  case AArch64::ADDWri:
441  // add x, 1 -> csinc.
442  if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
443  DefMI->getOperand(3).getImm() != 0)
444  return 0;
445  SrcOpNum = 1;
446  Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
447  break;
448 
449  case AArch64::ORNXrr:
450  case AArch64::ORNWrr: {
451  // not x -> csinv, represented as orn dst, xzr, src.
452  unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
453  if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
454  return 0;
455  SrcOpNum = 2;
456  Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
457  break;
458  }
459 
460  case AArch64::SUBSXrr:
461  case AArch64::SUBSWrr:
462  // if NZCV is used, do not fold.
463  if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
464  return 0;
465  // fall-through to SUBXrr and SUBWrr.
467  case AArch64::SUBXrr:
468  case AArch64::SUBWrr: {
469  // neg x -> csneg, represented as sub dst, xzr, src.
470  unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
471  if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
472  return 0;
473  SrcOpNum = 2;
474  Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
475  break;
476  }
477  default:
478  return 0;
479  }
480  assert(Opc && SrcOpNum && "Missing parameters");
481 
482  if (NewVReg)
483  *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
484  return Opc;
485 }
486 
489  unsigned TrueReg, unsigned FalseReg,
490  int &CondCycles, int &TrueCycles,
491  int &FalseCycles) const {
492  // Check register classes.
493  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
494  const TargetRegisterClass *RC =
495  RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
496  if (!RC)
497  return false;
498 
499  // Expanding cbz/tbz requires an extra cycle of latency on the condition.
500  unsigned ExtraCondLat = Cond.size() != 1;
501 
502  // GPRs are handled by csel.
503  // FIXME: Fold in x+1, -x, and ~x when applicable.
504  if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
505  AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
506  // Single-cycle csel, csinc, csinv, and csneg.
507  CondCycles = 1 + ExtraCondLat;
508  TrueCycles = FalseCycles = 1;
509  if (canFoldIntoCSel(MRI, TrueReg))
510  TrueCycles = 0;
511  else if (canFoldIntoCSel(MRI, FalseReg))
512  FalseCycles = 0;
513  return true;
514  }
515 
516  // Scalar floating point is handled by fcsel.
517  // FIXME: Form fabs, fmin, and fmax when applicable.
518  if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
519  AArch64::FPR32RegClass.hasSubClassEq(RC)) {
520  CondCycles = 5 + ExtraCondLat;
521  TrueCycles = FalseCycles = 2;
522  return true;
523  }
524 
525  // Can't do vectors.
526  return false;
527 }
528 
531  const DebugLoc &DL, unsigned DstReg,
533  unsigned TrueReg, unsigned FalseReg) const {
535 
536  // Parse the condition code, see parseCondBranch() above.
538  switch (Cond.size()) {
539  default:
540  llvm_unreachable("Unknown condition opcode in Cond");
541  case 1: // b.cc
542  CC = AArch64CC::CondCode(Cond[0].getImm());
543  break;
544  case 3: { // cbz/cbnz
545  // We must insert a compare against 0.
546  bool Is64Bit;
547  switch (Cond[1].getImm()) {
548  default:
549  llvm_unreachable("Unknown branch opcode in Cond");
550  case AArch64::CBZW:
551  Is64Bit = false;
552  CC = AArch64CC::EQ;
553  break;
554  case AArch64::CBZX:
555  Is64Bit = true;
556  CC = AArch64CC::EQ;
557  break;
558  case AArch64::CBNZW:
559  Is64Bit = false;
560  CC = AArch64CC::NE;
561  break;
562  case AArch64::CBNZX:
563  Is64Bit = true;
564  CC = AArch64CC::NE;
565  break;
566  }
567  unsigned SrcReg = Cond[2].getReg();
568  if (Is64Bit) {
569  // cmp reg, #0 is actually subs xzr, reg, #0.
570  MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
571  BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
572  .addReg(SrcReg)
573  .addImm(0)
574  .addImm(0);
575  } else {
576  MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
577  BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
578  .addReg(SrcReg)
579  .addImm(0)
580  .addImm(0);
581  }
582  break;
583  }
584  case 4: { // tbz/tbnz
585  // We must insert a tst instruction.
586  switch (Cond[1].getImm()) {
587  default:
588  llvm_unreachable("Unknown branch opcode in Cond");
589  case AArch64::TBZW:
590  case AArch64::TBZX:
591  CC = AArch64CC::EQ;
592  break;
593  case AArch64::TBNZW:
594  case AArch64::TBNZX:
595  CC = AArch64CC::NE;
596  break;
597  }
598  // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
599  if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
600  BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
601  .addReg(Cond[2].getReg())
602  .addImm(
603  AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
604  else
605  BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
606  .addReg(Cond[2].getReg())
607  .addImm(
608  AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
609  break;
610  }
611  }
612 
613  unsigned Opc = 0;
614  const TargetRegisterClass *RC = nullptr;
615  bool TryFold = false;
616  if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
617  RC = &AArch64::GPR64RegClass;
618  Opc = AArch64::CSELXr;
619  TryFold = true;
620  } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
621  RC = &AArch64::GPR32RegClass;
622  Opc = AArch64::CSELWr;
623  TryFold = true;
624  } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
625  RC = &AArch64::FPR64RegClass;
626  Opc = AArch64::FCSELDrrr;
627  } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
628  RC = &AArch64::FPR32RegClass;
629  Opc = AArch64::FCSELSrrr;
630  }
631  assert(RC && "Unsupported regclass");
632 
633  // Try folding simple instructions into the csel.
634  if (TryFold) {
635  unsigned NewVReg = 0;
636  unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
637  if (FoldedOpc) {
638  // The folded opcodes csinc, csinc and csneg apply the operation to
639  // FalseReg, so we need to invert the condition.
641  TrueReg = FalseReg;
642  } else
643  FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
644 
645  // Fold the operation. Leave any dead instructions for DCE to clean up.
646  if (FoldedOpc) {
647  FalseReg = NewVReg;
648  Opc = FoldedOpc;
649  // The extends the live range of NewVReg.
650  MRI.clearKillFlags(NewVReg);
651  }
652  }
653 
654  // Pull all virtual register into the appropriate class.
655  MRI.constrainRegClass(TrueReg, RC);
656  MRI.constrainRegClass(FalseReg, RC);
657 
658  // Insert the csel.
659  BuildMI(MBB, I, DL, get(Opc), DstReg)
660  .addReg(TrueReg)
661  .addReg(FalseReg)
662  .addImm(CC);
663 }
664 
665 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
666 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
667  uint64_t Imm = MI.getOperand(1).getImm();
668  uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
669  uint64_t Encoding;
670  return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
671 }
672 
673 // FIXME: this implementation should be micro-architecture dependent, so a
674 // micro-architecture target hook should be introduced here in future.
676  if (!Subtarget.hasCustomCheapAsMoveHandling())
677  return MI.isAsCheapAsAMove();
678  if (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
680  return true;
681 
682  switch (MI.getOpcode()) {
683  default:
684  return false;
685 
686  // add/sub on register without shift
687  case AArch64::ADDWri:
688  case AArch64::ADDXri:
689  case AArch64::SUBWri:
690  case AArch64::SUBXri:
691  return (MI.getOperand(3).getImm() == 0);
692 
693  // logical ops on immediate
694  case AArch64::ANDWri:
695  case AArch64::ANDXri:
696  case AArch64::EORWri:
697  case AArch64::EORXri:
698  case AArch64::ORRWri:
699  case AArch64::ORRXri:
700  return true;
701 
702  // logical ops on register without shift
703  case AArch64::ANDWrr:
704  case AArch64::ANDXrr:
705  case AArch64::BICWrr:
706  case AArch64::BICXrr:
707  case AArch64::EONWrr:
708  case AArch64::EONXrr:
709  case AArch64::EORWrr:
710  case AArch64::EORXrr:
711  case AArch64::ORNWrr:
712  case AArch64::ORNXrr:
713  case AArch64::ORRWrr:
714  case AArch64::ORRXrr:
715  return true;
716 
717  // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
718  // ORRXri, it is as cheap as MOV
719  case AArch64::MOVi32imm:
720  return canBeExpandedToORR(MI, 32);
721  case AArch64::MOVi64imm:
722  return canBeExpandedToORR(MI, 64);
723 
724  // It is cheap to zero out registers if the subtarget has ZeroCycleZeroing
725  // feature.
726  case AArch64::FMOVH0:
727  case AArch64::FMOVS0:
728  case AArch64::FMOVD0:
729  return Subtarget.hasZeroCycleZeroing();
730  case TargetOpcode::COPY:
731  return (Subtarget.hasZeroCycleZeroing() &&
732  (MI.getOperand(1).getReg() == AArch64::WZR ||
733  MI.getOperand(1).getReg() == AArch64::XZR));
734  }
735 
736  llvm_unreachable("Unknown opcode to check as cheap as a move!");
737 }
738 
740  unsigned Imm, Shift;
742 
743  switch (MI.getOpcode()) {
744  default:
745  return false;
746 
747  // WriteI
748  case AArch64::ADDSWri:
749  case AArch64::ADDSXri:
750  case AArch64::ADDWri:
751  case AArch64::ADDXri:
752  case AArch64::SUBSWri:
753  case AArch64::SUBSXri:
754  case AArch64::SUBWri:
755  case AArch64::SUBXri:
756  return true;
757 
758  // WriteISReg
759  case AArch64::ADDSWrs:
760  case AArch64::ADDSXrs:
761  case AArch64::ADDWrs:
762  case AArch64::ADDXrs:
763  case AArch64::ANDSWrs:
764  case AArch64::ANDSXrs:
765  case AArch64::ANDWrs:
766  case AArch64::ANDXrs:
767  case AArch64::BICSWrs:
768  case AArch64::BICSXrs:
769  case AArch64::BICWrs:
770  case AArch64::BICXrs:
771  case AArch64::EONWrs:
772  case AArch64::EONXrs:
773  case AArch64::EORWrs:
774  case AArch64::EORXrs:
775  case AArch64::ORNWrs:
776  case AArch64::ORNXrs:
777  case AArch64::ORRWrs:
778  case AArch64::ORRXrs:
779  case AArch64::SUBSWrs:
780  case AArch64::SUBSXrs:
781  case AArch64::SUBWrs:
782  case AArch64::SUBXrs:
783  Imm = MI.getOperand(3).getImm();
784  Shift = AArch64_AM::getShiftValue(Imm);
785  Ext = AArch64_AM::getShiftType(Imm);
786  return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::LSL));
787 
788  // WriteIEReg
789  case AArch64::ADDSWrx:
790  case AArch64::ADDSXrx:
791  case AArch64::ADDSXrx64:
792  case AArch64::ADDWrx:
793  case AArch64::ADDXrx:
794  case AArch64::ADDXrx64:
795  case AArch64::SUBSWrx:
796  case AArch64::SUBSXrx:
797  case AArch64::SUBSXrx64:
798  case AArch64::SUBWrx:
799  case AArch64::SUBXrx:
800  case AArch64::SUBXrx64:
801  Imm = MI.getOperand(3).getImm();
802  Shift = AArch64_AM::getArithShiftValue(Imm);
804  return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::UXTX));
805 
806  case AArch64::PRFMroW:
807  case AArch64::PRFMroX:
808 
809  // WriteLDIdx
810  case AArch64::LDRBBroW:
811  case AArch64::LDRBBroX:
812  case AArch64::LDRHHroW:
813  case AArch64::LDRHHroX:
814  case AArch64::LDRSBWroW:
815  case AArch64::LDRSBWroX:
816  case AArch64::LDRSBXroW:
817  case AArch64::LDRSBXroX:
818  case AArch64::LDRSHWroW:
819  case AArch64::LDRSHWroX:
820  case AArch64::LDRSHXroW:
821  case AArch64::LDRSHXroX:
822  case AArch64::LDRSWroW:
823  case AArch64::LDRSWroX:
824  case AArch64::LDRWroW:
825  case AArch64::LDRWroX:
826  case AArch64::LDRXroW:
827  case AArch64::LDRXroX:
828 
829  case AArch64::LDRBroW:
830  case AArch64::LDRBroX:
831  case AArch64::LDRDroW:
832  case AArch64::LDRDroX:
833  case AArch64::LDRHroW:
834  case AArch64::LDRHroX:
835  case AArch64::LDRSroW:
836  case AArch64::LDRSroX:
837 
838  // WriteSTIdx
839  case AArch64::STRBBroW:
840  case AArch64::STRBBroX:
841  case AArch64::STRHHroW:
842  case AArch64::STRHHroX:
843  case AArch64::STRWroW:
844  case AArch64::STRWroX:
845  case AArch64::STRXroW:
846  case AArch64::STRXroX:
847 
848  case AArch64::STRBroW:
849  case AArch64::STRBroX:
850  case AArch64::STRDroW:
851  case AArch64::STRDroX:
852  case AArch64::STRHroW:
853  case AArch64::STRHroX:
854  case AArch64::STRSroW:
855  case AArch64::STRSroX:
856  Imm = MI.getOperand(3).getImm();
857  Ext = AArch64_AM::getMemExtendType(Imm);
858  return (Ext == AArch64_AM::SXTX || Ext == AArch64_AM::UXTX);
859  }
860 }
861 
863  switch (MI.getOpcode()) {
864  default:
865  return false;
866 
867  case AArch64::ADDWrs:
868  case AArch64::ADDXrs:
869  case AArch64::ADDSWrs:
870  case AArch64::ADDSXrs: {
871  unsigned Imm = MI.getOperand(3).getImm();
872  unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
873  if (ShiftVal == 0)
874  return true;
875  return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
876  }
877 
878  case AArch64::ADDWrx:
879  case AArch64::ADDXrx:
880  case AArch64::ADDXrx64:
881  case AArch64::ADDSWrx:
882  case AArch64::ADDSXrx:
883  case AArch64::ADDSXrx64: {
884  unsigned Imm = MI.getOperand(3).getImm();
885  switch (AArch64_AM::getArithExtendType(Imm)) {
886  default:
887  return false;
888  case AArch64_AM::UXTB:
889  case AArch64_AM::UXTH:
890  case AArch64_AM::UXTW:
891  case AArch64_AM::UXTX:
892  return AArch64_AM::getArithShiftValue(Imm) <= 4;
893  }
894  }
895 
896  case AArch64::SUBWrs:
897  case AArch64::SUBSWrs: {
898  unsigned Imm = MI.getOperand(3).getImm();
899  unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
900  return ShiftVal == 0 ||
901  (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
902  }
903 
904  case AArch64::SUBXrs:
905  case AArch64::SUBSXrs: {
906  unsigned Imm = MI.getOperand(3).getImm();
907  unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
908  return ShiftVal == 0 ||
909  (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
910  }
911 
912  case AArch64::SUBWrx:
913  case AArch64::SUBXrx:
914  case AArch64::SUBXrx64:
915  case AArch64::SUBSWrx:
916  case AArch64::SUBSXrx:
917  case AArch64::SUBSXrx64: {
918  unsigned Imm = MI.getOperand(3).getImm();
919  switch (AArch64_AM::getArithExtendType(Imm)) {
920  default:
921  return false;
922  case AArch64_AM::UXTB:
923  case AArch64_AM::UXTH:
924  case AArch64_AM::UXTW:
925  case AArch64_AM::UXTX:
926  return AArch64_AM::getArithShiftValue(Imm) == 0;
927  }
928  }
929 
930  case AArch64::LDRBBroW:
931  case AArch64::LDRBBroX:
932  case AArch64::LDRBroW:
933  case AArch64::LDRBroX:
934  case AArch64::LDRDroW:
935  case AArch64::LDRDroX:
936  case AArch64::LDRHHroW:
937  case AArch64::LDRHHroX:
938  case AArch64::LDRHroW:
939  case AArch64::LDRHroX:
940  case AArch64::LDRQroW:
941  case AArch64::LDRQroX:
942  case AArch64::LDRSBWroW:
943  case AArch64::LDRSBWroX:
944  case AArch64::LDRSBXroW:
945  case AArch64::LDRSBXroX:
946  case AArch64::LDRSHWroW:
947  case AArch64::LDRSHWroX:
948  case AArch64::LDRSHXroW:
949  case AArch64::LDRSHXroX:
950  case AArch64::LDRSWroW:
951  case AArch64::LDRSWroX:
952  case AArch64::LDRSroW:
953  case AArch64::LDRSroX:
954  case AArch64::LDRWroW:
955  case AArch64::LDRWroX:
956  case AArch64::LDRXroW:
957  case AArch64::LDRXroX:
958  case AArch64::PRFMroW:
959  case AArch64::PRFMroX:
960  case AArch64::STRBBroW:
961  case AArch64::STRBBroX:
962  case AArch64::STRBroW:
963  case AArch64::STRBroX:
964  case AArch64::STRDroW:
965  case AArch64::STRDroX:
966  case AArch64::STRHHroW:
967  case AArch64::STRHHroX:
968  case AArch64::STRHroW:
969  case AArch64::STRHroX:
970  case AArch64::STRQroW:
971  case AArch64::STRQroX:
972  case AArch64::STRSroW:
973  case AArch64::STRSroX:
974  case AArch64::STRWroW:
975  case AArch64::STRWroX:
976  case AArch64::STRXroW:
977  case AArch64::STRXroX: {
978  unsigned IsSigned = MI.getOperand(3).getImm();
979  return !IsSigned;
980  }
981  }
982 }
983 
985  unsigned &SrcReg, unsigned &DstReg,
986  unsigned &SubIdx) const {
987  switch (MI.getOpcode()) {
988  default:
989  return false;
990  case AArch64::SBFMXri: // aka sxtw
991  case AArch64::UBFMXri: // aka uxtw
992  // Check for the 32 -> 64 bit extension case, these instructions can do
993  // much more.
994  if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
995  return false;
996  // This is a signed or unsigned 32 -> 64 bit extension.
997  SrcReg = MI.getOperand(1).getReg();
998  DstReg = MI.getOperand(0).getReg();
999  SubIdx = AArch64::sub_32;
1000  return true;
1001  }
1002 }
1003 
1005  MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
1006  const TargetRegisterInfo *TRI = &getRegisterInfo();
1007  unsigned BaseRegA = 0, BaseRegB = 0;
1008  int64_t OffsetA = 0, OffsetB = 0;
1009  unsigned WidthA = 0, WidthB = 0;
1010 
1011  assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1012  assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1013 
1014  if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
1016  return false;
1017 
1018  // Retrieve the base register, offset from the base register and width. Width
1019  // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1020  // base registers are identical, and the offset of a lower memory access +
1021  // the width doesn't overlap the offset of a higher memory access,
1022  // then the memory accesses are different.
1023  if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
1024  getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
1025  if (BaseRegA == BaseRegB) {
1026  int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1027  int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1028  int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1029  if (LowOffset + LowWidth <= HighOffset)
1030  return true;
1031  }
1032  }
1033  return false;
1034 }
1035 
1036 /// analyzeCompare - For a comparison instruction, return the source registers
1037 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1038 /// Return true if the comparison instruction can be analyzed.
1039 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
1040  unsigned &SrcReg2, int &CmpMask,
1041  int &CmpValue) const {
1042  // The first operand can be a frame index where we'd normally expect a
1043  // register.
1044  assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1045  if (!MI.getOperand(1).isReg())
1046  return false;
1047 
1048  switch (MI.getOpcode()) {
1049  default:
1050  break;
1051  case AArch64::SUBSWrr:
1052  case AArch64::SUBSWrs:
1053  case AArch64::SUBSWrx:
1054  case AArch64::SUBSXrr:
1055  case AArch64::SUBSXrs:
1056  case AArch64::SUBSXrx:
1057  case AArch64::ADDSWrr:
1058  case AArch64::ADDSWrs:
1059  case AArch64::ADDSWrx:
1060  case AArch64::ADDSXrr:
1061  case AArch64::ADDSXrs:
1062  case AArch64::ADDSXrx:
1063  // Replace SUBSWrr with SUBWrr if NZCV is not used.
1064  SrcReg = MI.getOperand(1).getReg();
1065  SrcReg2 = MI.getOperand(2).getReg();
1066  CmpMask = ~0;
1067  CmpValue = 0;
1068  return true;
1069  case AArch64::SUBSWri:
1070  case AArch64::ADDSWri:
1071  case AArch64::SUBSXri:
1072  case AArch64::ADDSXri:
1073  SrcReg = MI.getOperand(1).getReg();
1074  SrcReg2 = 0;
1075  CmpMask = ~0;
1076  // FIXME: In order to convert CmpValue to 0 or 1
1077  CmpValue = MI.getOperand(2).getImm() != 0;
1078  return true;
1079  case AArch64::ANDSWri:
1080  case AArch64::ANDSXri:
1081  // ANDS does not use the same encoding scheme as the others xxxS
1082  // instructions.
1083  SrcReg = MI.getOperand(1).getReg();
1084  SrcReg2 = 0;
1085  CmpMask = ~0;
1086  // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
1087  // while the type of CmpValue is int. When converting uint64_t to int,
1088  // the high 32 bits of uint64_t will be lost.
1089  // In fact it causes a bug in spec2006-483.xalancbmk
1090  // CmpValue is only used to compare with zero in OptimizeCompareInstr
1092  MI.getOperand(2).getImm(),
1093  MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
1094  return true;
1095  }
1096 
1097  return false;
1098 }
1099 
1101  MachineBasicBlock *MBB = Instr.getParent();
1102  assert(MBB && "Can't get MachineBasicBlock here");
1103  MachineFunction *MF = MBB->getParent();
1104  assert(MF && "Can't get MachineFunction here");
1105  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1106  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1108 
1109  for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1110  ++OpIdx) {
1111  MachineOperand &MO = Instr.getOperand(OpIdx);
1112  const TargetRegisterClass *OpRegCstraints =
1113  Instr.getRegClassConstraint(OpIdx, TII, TRI);
1114 
1115  // If there's no constraint, there's nothing to do.
1116  if (!OpRegCstraints)
1117  continue;
1118  // If the operand is a frame index, there's nothing to do here.
1119  // A frame index operand will resolve correctly during PEI.
1120  if (MO.isFI())
1121  continue;
1122 
1123  assert(MO.isReg() &&
1124  "Operand has register constraints without being a register!");
1125 
1126  unsigned Reg = MO.getReg();
1128  if (!OpRegCstraints->contains(Reg))
1129  return false;
1130  } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1131  !MRI->constrainRegClass(Reg, OpRegCstraints))
1132  return false;
1133  }
1134 
1135  return true;
1136 }
1137 
1138 /// \brief Return the opcode that does not set flags when possible - otherwise
1139 /// return the original opcode. The caller is responsible to do the actual
1140 /// substitution and legality checking.
1141 static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
1142  // Don't convert all compare instructions, because for some the zero register
1143  // encoding becomes the sp register.
1144  bool MIDefinesZeroReg = false;
1145  if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
1146  MIDefinesZeroReg = true;
1147 
1148  switch (MI.getOpcode()) {
1149  default:
1150  return MI.getOpcode();
1151  case AArch64::ADDSWrr:
1152  return AArch64::ADDWrr;
1153  case AArch64::ADDSWri:
1154  return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1155  case AArch64::ADDSWrs:
1156  return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1157  case AArch64::ADDSWrx:
1158  return AArch64::ADDWrx;
1159  case AArch64::ADDSXrr:
1160  return AArch64::ADDXrr;
1161  case AArch64::ADDSXri:
1162  return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1163  case AArch64::ADDSXrs:
1164  return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1165  case AArch64::ADDSXrx:
1166  return AArch64::ADDXrx;
1167  case AArch64::SUBSWrr:
1168  return AArch64::SUBWrr;
1169  case AArch64::SUBSWri:
1170  return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1171  case AArch64::SUBSWrs:
1172  return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1173  case AArch64::SUBSWrx:
1174  return AArch64::SUBWrx;
1175  case AArch64::SUBSXrr:
1176  return AArch64::SUBXrr;
1177  case AArch64::SUBSXri:
1178  return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1179  case AArch64::SUBSXrs:
1180  return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1181  case AArch64::SUBSXrx:
1182  return AArch64::SUBXrx;
1183  }
1184 }
1185 
1186 enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1187 
1188 /// True when condition flags are accessed (either by writing or reading)
1189 /// on the instruction trace starting at From and ending at To.
1190 ///
1191 /// Note: If From and To are from different blocks it's assumed CC are accessed
1192 /// on the path.
1195  const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1196  // Early exit if To is at the beginning of the BB.
1197  if (To == To->getParent()->begin())
1198  return true;
1199 
1200  // Check whether the instructions are in the same basic block
1201  // If not, assume the condition flags might get modified somewhere.
1202  if (To->getParent() != From->getParent())
1203  return true;
1204 
1205  // From must be above To.
1206  assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
1207  [From](MachineInstr &MI) {
1208  return MI.getIterator() == From;
1209  }) != To->getParent()->rend());
1210 
1211  // We iterate backward starting \p To until we hit \p From.
1212  for (--To; To != From; --To) {
1213  const MachineInstr &Instr = *To;
1214 
1215  if (((AccessToCheck & AK_Write) &&
1216  Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1217  ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1218  return true;
1219  }
1220  return false;
1221 }
1222 
1223 /// Try to optimize a compare instruction. A compare instruction is an
1224 /// instruction which produces AArch64::NZCV. It can be truly compare
1225 /// instruction
1226 /// when there are no uses of its destination register.
1227 ///
1228 /// The following steps are tried in order:
1229 /// 1. Convert CmpInstr into an unconditional version.
1230 /// 2. Remove CmpInstr if above there is an instruction producing a needed
1231 /// condition code or an instruction which can be converted into such an
1232 /// instruction.
1233 /// Only comparison with zero is supported.
1235  MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
1236  int CmpValue, const MachineRegisterInfo *MRI) const {
1237  assert(CmpInstr.getParent());
1238  assert(MRI);
1239 
1240  // Replace SUBSWrr with SUBWrr if NZCV is not used.
1241  int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
1242  if (DeadNZCVIdx != -1) {
1243  if (CmpInstr.definesRegister(AArch64::WZR) ||
1244  CmpInstr.definesRegister(AArch64::XZR)) {
1245  CmpInstr.eraseFromParent();
1246  return true;
1247  }
1248  unsigned Opc = CmpInstr.getOpcode();
1249  unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1250  if (NewOpc == Opc)
1251  return false;
1252  const MCInstrDesc &MCID = get(NewOpc);
1253  CmpInstr.setDesc(MCID);
1254  CmpInstr.RemoveOperand(DeadNZCVIdx);
1255  bool succeeded = UpdateOperandRegClass(CmpInstr);
1256  (void)succeeded;
1257  assert(succeeded && "Some operands reg class are incompatible!");
1258  return true;
1259  }
1260 
1261  // Continue only if we have a "ri" where immediate is zero.
1262  // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1263  // function.
1264  assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
1265  if (CmpValue != 0 || SrcReg2 != 0)
1266  return false;
1267 
1268  // CmpInstr is a Compare instruction if destination register is not used.
1269  if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1270  return false;
1271 
1272  return substituteCmpToZero(CmpInstr, SrcReg, MRI);
1273 }
1274 
1275 /// Get opcode of S version of Instr.
1276 /// If Instr is S version its opcode is returned.
1277 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1278 /// or we are not interested in it.
1279 static unsigned sForm(MachineInstr &Instr) {
1280  switch (Instr.getOpcode()) {
1281  default:
1282  return AArch64::INSTRUCTION_LIST_END;
1283 
1284  case AArch64::ADDSWrr:
1285  case AArch64::ADDSWri:
1286  case AArch64::ADDSXrr:
1287  case AArch64::ADDSXri:
1288  case AArch64::SUBSWrr:
1289  case AArch64::SUBSWri:
1290  case AArch64::SUBSXrr:
1291  case AArch64::SUBSXri:
1292  return Instr.getOpcode();
1293 
1294  case AArch64::ADDWrr:
1295  return AArch64::ADDSWrr;
1296  case AArch64::ADDWri:
1297  return AArch64::ADDSWri;
1298  case AArch64::ADDXrr:
1299  return AArch64::ADDSXrr;
1300  case AArch64::ADDXri:
1301  return AArch64::ADDSXri;
1302  case AArch64::ADCWr:
1303  return AArch64::ADCSWr;
1304  case AArch64::ADCXr:
1305  return AArch64::ADCSXr;
1306  case AArch64::SUBWrr:
1307  return AArch64::SUBSWrr;
1308  case AArch64::SUBWri:
1309  return AArch64::SUBSWri;
1310  case AArch64::SUBXrr:
1311  return AArch64::SUBSXrr;
1312  case AArch64::SUBXri:
1313  return AArch64::SUBSXri;
1314  case AArch64::SBCWr:
1315  return AArch64::SBCSWr;
1316  case AArch64::SBCXr:
1317  return AArch64::SBCSXr;
1318  case AArch64::ANDWri:
1319  return AArch64::ANDSWri;
1320  case AArch64::ANDXri:
1321  return AArch64::ANDSXri;
1322  }
1323 }
1324 
1325 /// Check if AArch64::NZCV should be alive in successors of MBB.
1327  for (auto *BB : MBB->successors())
1328  if (BB->isLiveIn(AArch64::NZCV))
1329  return true;
1330  return false;
1331 }
1332 
1333 namespace {
1334 
1335 struct UsedNZCV {
1336  bool N = false;
1337  bool Z = false;
1338  bool C = false;
1339  bool V = false;
1340 
1341  UsedNZCV() = default;
1342 
1343  UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
1344  this->N |= UsedFlags.N;
1345  this->Z |= UsedFlags.Z;
1346  this->C |= UsedFlags.C;
1347  this->V |= UsedFlags.V;
1348  return *this;
1349  }
1350 };
1351 
1352 } // end anonymous namespace
1353 
1354 /// Find a condition code used by the instruction.
1355 /// Returns AArch64CC::Invalid if either the instruction does not use condition
1356 /// codes or we don't optimize CmpInstr in the presence of such instructions.
1358  switch (Instr.getOpcode()) {
1359  default:
1360  return AArch64CC::Invalid;
1361 
1362  case AArch64::Bcc: {
1363  int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1364  assert(Idx >= 2);
1365  return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1366  }
1367 
1368  case AArch64::CSINVWr:
1369  case AArch64::CSINVXr:
1370  case AArch64::CSINCWr:
1371  case AArch64::CSINCXr:
1372  case AArch64::CSELWr:
1373  case AArch64::CSELXr:
1374  case AArch64::CSNEGWr:
1375  case AArch64::CSNEGXr:
1376  case AArch64::FCSELSrrr:
1377  case AArch64::FCSELDrrr: {
1378  int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1379  assert(Idx >= 1);
1380  return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1381  }
1382  }
1383 }
1384 
1385 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1386  assert(CC != AArch64CC::Invalid);
1387  UsedNZCV UsedFlags;
1388  switch (CC) {
1389  default:
1390  break;
1391 
1392  case AArch64CC::EQ: // Z set
1393  case AArch64CC::NE: // Z clear
1394  UsedFlags.Z = true;
1395  break;
1396 
1397  case AArch64CC::HI: // Z clear and C set
1398  case AArch64CC::LS: // Z set or C clear
1399  UsedFlags.Z = true;
1401  case AArch64CC::HS: // C set
1402  case AArch64CC::LO: // C clear
1403  UsedFlags.C = true;
1404  break;
1405 
1406  case AArch64CC::MI: // N set
1407  case AArch64CC::PL: // N clear
1408  UsedFlags.N = true;
1409  break;
1410 
1411  case AArch64CC::VS: // V set
1412  case AArch64CC::VC: // V clear
1413  UsedFlags.V = true;
1414  break;
1415 
1416  case AArch64CC::GT: // Z clear, N and V the same
1417  case AArch64CC::LE: // Z set, N and V differ
1418  UsedFlags.Z = true;
1420  case AArch64CC::GE: // N and V the same
1421  case AArch64CC::LT: // N and V differ
1422  UsedFlags.N = true;
1423  UsedFlags.V = true;
1424  break;
1425  }
1426  return UsedFlags;
1427 }
1428 
1429 static bool isADDSRegImm(unsigned Opcode) {
1430  return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1431 }
1432 
1433 static bool isSUBSRegImm(unsigned Opcode) {
1434  return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1435 }
1436 
1437 /// Check if CmpInstr can be substituted by MI.
1438 ///
1439 /// CmpInstr can be substituted:
1440 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1441 /// - and, MI and CmpInstr are from the same MachineBB
1442 /// - and, condition flags are not alive in successors of the CmpInstr parent
1443 /// - and, if MI opcode is the S form there must be no defs of flags between
1444 /// MI and CmpInstr
1445 /// or if MI opcode is not the S form there must be neither defs of flags
1446 /// nor uses of flags between MI and CmpInstr.
1447 /// - and C/V flags are not used after CmpInstr
1449  const TargetRegisterInfo *TRI) {
1450  assert(MI);
1451  assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1452  assert(CmpInstr);
1453 
1454  const unsigned CmpOpcode = CmpInstr->getOpcode();
1455  if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1456  return false;
1457 
1458  if (MI->getParent() != CmpInstr->getParent())
1459  return false;
1460 
1461  if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1462  return false;
1463 
1464  AccessKind AccessToCheck = AK_Write;
1465  if (sForm(*MI) != MI->getOpcode())
1466  AccessToCheck = AK_All;
1467  if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1468  return false;
1469 
1470  UsedNZCV NZCVUsedAfterCmp;
1471  for (auto I = std::next(CmpInstr->getIterator()),
1472  E = CmpInstr->getParent()->instr_end();
1473  I != E; ++I) {
1474  const MachineInstr &Instr = *I;
1475  if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1477  if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1478  return false;
1479  NZCVUsedAfterCmp |= getUsedNZCV(CC);
1480  }
1481 
1482  if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1483  break;
1484  }
1485 
1486  return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1487 }
1488 
1489 /// Substitute an instruction comparing to zero with another instruction
1490 /// which produces needed condition flags.
1491 ///
1492 /// Return true on success.
1493 bool AArch64InstrInfo::substituteCmpToZero(
1494  MachineInstr &CmpInstr, unsigned SrcReg,
1495  const MachineRegisterInfo *MRI) const {
1496  assert(MRI);
1497  // Get the unique definition of SrcReg.
1498  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1499  if (!MI)
1500  return false;
1501 
1502  const TargetRegisterInfo *TRI = &getRegisterInfo();
1503 
1504  unsigned NewOpc = sForm(*MI);
1505  if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1506  return false;
1507 
1508  if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
1509  return false;
1510 
1511  // Update the instruction to set NZCV.
1512  MI->setDesc(get(NewOpc));
1513  CmpInstr.eraseFromParent();
1514  bool succeeded = UpdateOperandRegClass(*MI);
1515  (void)succeeded;
1516  assert(succeeded && "Some operands reg class are incompatible!");
1517  MI->addRegisterDefined(AArch64::NZCV, TRI);
1518  return true;
1519 }
1520 
1522  if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
1523  return false;
1524 
1525  MachineBasicBlock &MBB = *MI.getParent();
1526  DebugLoc DL = MI.getDebugLoc();
1527  unsigned Reg = MI.getOperand(0).getReg();
1528  const GlobalValue *GV =
1529  cast<GlobalValue>((*MI.memoperands_begin())->getValue());
1530  const TargetMachine &TM = MBB.getParent()->getTarget();
1531  unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1532  const unsigned char MO_NC = AArch64II::MO_NC;
1533 
1534  if ((OpFlags & AArch64II::MO_GOT) != 0) {
1535  BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1536  .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
1537  BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1538  .addReg(Reg, RegState::Kill)
1539  .addImm(0)
1541  } else if (TM.getCodeModel() == CodeModel::Large) {
1542  BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1543  .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
1544  .addImm(0);
1545  BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1546  .addReg(Reg, RegState::Kill)
1547  .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
1548  .addImm(16);
1549  BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1550  .addReg(Reg, RegState::Kill)
1551  .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
1552  .addImm(32);
1553  BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1554  .addReg(Reg, RegState::Kill)
1556  .addImm(48);
1557  BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1558  .addReg(Reg, RegState::Kill)
1559  .addImm(0)
1561  } else {
1562  BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1563  .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1564  unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1565  BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1566  .addReg(Reg, RegState::Kill)
1567  .addGlobalAddress(GV, 0, LoFlags)
1569  }
1570 
1571  MBB.erase(MI);
1572 
1573  return true;
1574 }
1575 
1576 /// Return true if this is this instruction has a non-zero immediate
1578  switch (MI.getOpcode()) {
1579  default:
1580  break;
1581  case AArch64::ADDSWrs:
1582  case AArch64::ADDSXrs:
1583  case AArch64::ADDWrs:
1584  case AArch64::ADDXrs:
1585  case AArch64::ANDSWrs:
1586  case AArch64::ANDSXrs:
1587  case AArch64::ANDWrs:
1588  case AArch64::ANDXrs:
1589  case AArch64::BICSWrs:
1590  case AArch64::BICSXrs:
1591  case AArch64::BICWrs:
1592  case AArch64::BICXrs:
1593  case AArch64::EONWrs:
1594  case AArch64::EONXrs:
1595  case AArch64::EORWrs:
1596  case AArch64::EORXrs:
1597  case AArch64::ORNWrs:
1598  case AArch64::ORNXrs:
1599  case AArch64::ORRWrs:
1600  case AArch64::ORRXrs:
1601  case AArch64::SUBSWrs:
1602  case AArch64::SUBSXrs:
1603  case AArch64::SUBWrs:
1604  case AArch64::SUBXrs:
1605  if (MI.getOperand(3).isImm()) {
1606  unsigned val = MI.getOperand(3).getImm();
1607  return (val != 0);
1608  }
1609  break;
1610  }
1611  return false;
1612 }
1613 
1614 /// Return true if this is this instruction has a non-zero immediate
1616  switch (MI.getOpcode()) {
1617  default:
1618  break;
1619  case AArch64::ADDSWrx:
1620  case AArch64::ADDSXrx:
1621  case AArch64::ADDSXrx64:
1622  case AArch64::ADDWrx:
1623  case AArch64::ADDXrx:
1624  case AArch64::ADDXrx64:
1625  case AArch64::SUBSWrx:
1626  case AArch64::SUBSXrx:
1627  case AArch64::SUBSXrx64:
1628  case AArch64::SUBWrx:
1629  case AArch64::SUBXrx:
1630  case AArch64::SUBXrx64:
1631  if (MI.getOperand(3).isImm()) {
1632  unsigned val = MI.getOperand(3).getImm();
1633  return (val != 0);
1634  }
1635  break;
1636  }
1637 
1638  return false;
1639 }
1640 
1641 // Return true if this instruction simply sets its single destination register
1642 // to zero. This is equivalent to a register rename of the zero-register.
1644  switch (MI.getOpcode()) {
1645  default:
1646  break;
1647  case AArch64::MOVZWi:
1648  case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
1649  if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1650  assert(MI.getDesc().getNumOperands() == 3 &&
1651  MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1652  return true;
1653  }
1654  break;
1655  case AArch64::ANDWri: // and Rd, Rzr, #imm
1656  return MI.getOperand(1).getReg() == AArch64::WZR;
1657  case AArch64::ANDXri:
1658  return MI.getOperand(1).getReg() == AArch64::XZR;
1659  case TargetOpcode::COPY:
1660  return MI.getOperand(1).getReg() == AArch64::WZR;
1661  }
1662  return false;
1663 }
1664 
1665 // Return true if this instruction simply renames a general register without
1666 // modifying bits.
1668  switch (MI.getOpcode()) {
1669  default:
1670  break;
1671  case TargetOpcode::COPY: {
1672  // GPR32 copies will by lowered to ORRXrs
1673  unsigned DstReg = MI.getOperand(0).getReg();
1674  return (AArch64::GPR32RegClass.contains(DstReg) ||
1675  AArch64::GPR64RegClass.contains(DstReg));
1676  }
1677  case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
1678  if (MI.getOperand(1).getReg() == AArch64::XZR) {
1679  assert(MI.getDesc().getNumOperands() == 4 &&
1680  MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1681  return true;
1682  }
1683  break;
1684  case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
1685  if (MI.getOperand(2).getImm() == 0) {
1686  assert(MI.getDesc().getNumOperands() == 4 &&
1687  MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1688  return true;
1689  }
1690  break;
1691  }
1692  return false;
1693 }
1694 
1695 // Return true if this instruction simply renames a general register without
1696 // modifying bits.
1698  switch (MI.getOpcode()) {
1699  default:
1700  break;
1701  case TargetOpcode::COPY: {
1702  // FPR64 copies will by lowered to ORR.16b
1703  unsigned DstReg = MI.getOperand(0).getReg();
1704  return (AArch64::FPR64RegClass.contains(DstReg) ||
1705  AArch64::FPR128RegClass.contains(DstReg));
1706  }
1707  case AArch64::ORRv16i8:
1708  if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1709  assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
1710  "invalid ORRv16i8 operands");
1711  return true;
1712  }
1713  break;
1714  }
1715  return false;
1716 }
1717 
1719  int &FrameIndex) const {
1720  switch (MI.getOpcode()) {
1721  default:
1722  break;
1723  case AArch64::LDRWui:
1724  case AArch64::LDRXui:
1725  case AArch64::LDRBui:
1726  case AArch64::LDRHui:
1727  case AArch64::LDRSui:
1728  case AArch64::LDRDui:
1729  case AArch64::LDRQui:
1730  if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1731  MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1732  FrameIndex = MI.getOperand(1).getIndex();
1733  return MI.getOperand(0).getReg();
1734  }
1735  break;
1736  }
1737 
1738  return 0;
1739 }
1740 
1742  int &FrameIndex) const {
1743  switch (MI.getOpcode()) {
1744  default:
1745  break;
1746  case AArch64::STRWui:
1747  case AArch64::STRXui:
1748  case AArch64::STRBui:
1749  case AArch64::STRHui:
1750  case AArch64::STRSui:
1751  case AArch64::STRDui:
1752  case AArch64::STRQui:
1753  if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1754  MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1755  FrameIndex = MI.getOperand(1).getIndex();
1756  return MI.getOperand(0).getReg();
1757  }
1758  break;
1759  }
1760  return 0;
1761 }
1762 
1763 /// Return true if this is load/store scales or extends its register offset.
1764 /// This refers to scaling a dynamic index as opposed to scaled immediates.
1765 /// MI should be a memory op that allows scaled addressing.
1767  switch (MI.getOpcode()) {
1768  default:
1769  break;
1770  case AArch64::LDRBBroW:
1771  case AArch64::LDRBroW:
1772  case AArch64::LDRDroW:
1773  case AArch64::LDRHHroW:
1774  case AArch64::LDRHroW:
1775  case AArch64::LDRQroW:
1776  case AArch64::LDRSBWroW:
1777  case AArch64::LDRSBXroW:
1778  case AArch64::LDRSHWroW:
1779  case AArch64::LDRSHXroW:
1780  case AArch64::LDRSWroW:
1781  case AArch64::LDRSroW:
1782  case AArch64::LDRWroW:
1783  case AArch64::LDRXroW:
1784  case AArch64::STRBBroW:
1785  case AArch64::STRBroW:
1786  case AArch64::STRDroW:
1787  case AArch64::STRHHroW:
1788  case AArch64::STRHroW:
1789  case AArch64::STRQroW:
1790  case AArch64::STRSroW:
1791  case AArch64::STRWroW:
1792  case AArch64::STRXroW:
1793  case AArch64::LDRBBroX:
1794  case AArch64::LDRBroX:
1795  case AArch64::LDRDroX:
1796  case AArch64::LDRHHroX:
1797  case AArch64::LDRHroX:
1798  case AArch64::LDRQroX:
1799  case AArch64::LDRSBWroX:
1800  case AArch64::LDRSBXroX:
1801  case AArch64::LDRSHWroX:
1802  case AArch64::LDRSHXroX:
1803  case AArch64::LDRSWroX:
1804  case AArch64::LDRSroX:
1805  case AArch64::LDRWroX:
1806  case AArch64::LDRXroX:
1807  case AArch64::STRBBroX:
1808  case AArch64::STRBroX:
1809  case AArch64::STRDroX:
1810  case AArch64::STRHHroX:
1811  case AArch64::STRHroX:
1812  case AArch64::STRQroX:
1813  case AArch64::STRSroX:
1814  case AArch64::STRWroX:
1815  case AArch64::STRXroX:
1816 
1817  unsigned Val = MI.getOperand(3).getImm();
1819  return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
1820  }
1821  return false;
1822 }
1823 
1824 /// Check all MachineMemOperands for a hint to suppress pairing.
1826  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1827  return MMO->getFlags() & MOSuppressPair;
1828  });
1829 }
1830 
1831 /// Set a flag on the first MachineMemOperand to suppress pairing.
1833  if (MI.memoperands_empty())
1834  return;
1835  (*MI.memoperands_begin())->setFlags(MOSuppressPair);
1836 }
1837 
1838 /// Check all MachineMemOperands for a hint that the load/store is strided.
1840  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1841  return MMO->getFlags() & MOStridedAccess;
1842  });
1843 }
1844 
1845 bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) const {
1846  switch (Opc) {
1847  default:
1848  return false;
1849  case AArch64::STURSi:
1850  case AArch64::STURDi:
1851  case AArch64::STURQi:
1852  case AArch64::STURBBi:
1853  case AArch64::STURHHi:
1854  case AArch64::STURWi:
1855  case AArch64::STURXi:
1856  case AArch64::LDURSi:
1857  case AArch64::LDURDi:
1858  case AArch64::LDURQi:
1859  case AArch64::LDURWi:
1860  case AArch64::LDURXi:
1861  case AArch64::LDURSWi:
1862  case AArch64::LDURHHi:
1863  case AArch64::LDURBBi:
1864  case AArch64::LDURSBWi:
1865  case AArch64::LDURSHWi:
1866  return true;
1867  }
1868 }
1869 
1871  return isUnscaledLdSt(MI.getOpcode());
1872 }
1873 
1874 // Is this a candidate for ld/st merging or pairing? For example, we don't
1875 // touch volatiles or load/stores that have a hint to avoid pair formation.
1877  // If this is a volatile load/store, don't mess with it.
1878  if (MI.hasOrderedMemoryRef())
1879  return false;
1880 
1881  // Make sure this is a reg+imm (as opposed to an address reloc).
1882  assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
1883  if (!MI.getOperand(2).isImm())
1884  return false;
1885 
1886  // Can't merge/pair if the instruction modifies the base register.
1887  // e.g., ldr x0, [x0]
1888  unsigned BaseReg = MI.getOperand(1).getReg();
1889  const TargetRegisterInfo *TRI = &getRegisterInfo();
1890  if (MI.modifiesRegister(BaseReg, TRI))
1891  return false;
1892 
1893  // Check if this load/store has a hint to avoid pair formation.
1894  // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
1895  if (isLdStPairSuppressed(MI))
1896  return false;
1897 
1898  // On some CPUs quad load/store pairs are slower than two single load/stores.
1899  if (Subtarget.isPaired128Slow()) {
1900  switch (MI.getOpcode()) {
1901  default:
1902  break;
1903  case AArch64::LDURQi:
1904  case AArch64::STURQi:
1905  case AArch64::LDRQui:
1906  case AArch64::STRQui:
1907  return false;
1908  }
1909  }
1910 
1911  return true;
1912 }
1913 
1915  MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
1916  const TargetRegisterInfo *TRI) const {
1917  unsigned Width;
1918  return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
1919 }
1920 
1922  MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
1923  const TargetRegisterInfo *TRI) const {
1924  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
1925  // Handle only loads/stores with base register followed by immediate offset.
1926  if (LdSt.getNumExplicitOperands() == 3) {
1927  // Non-paired instruction (e.g., ldr x1, [x0, #8]).
1928  if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
1929  return false;
1930  } else if (LdSt.getNumExplicitOperands() == 4) {
1931  // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
1932  if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
1933  !LdSt.getOperand(3).isImm())
1934  return false;
1935  } else
1936  return false;
1937 
1938  // Get the scaling factor for the instruction and set the width for the
1939  // instruction.
1940  unsigned Scale = 0;
1941  int64_t Dummy1, Dummy2;
1942 
1943  // If this returns false, then it's an instruction we don't want to handle.
1944  if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
1945  return false;
1946 
1947  // Compute the offset. Offset is calculated as the immediate operand
1948  // multiplied by the scaling factor. Unscaled instructions have scaling factor
1949  // set to 1.
1950  if (LdSt.getNumExplicitOperands() == 3) {
1951  BaseReg = LdSt.getOperand(1).getReg();
1952  Offset = LdSt.getOperand(2).getImm() * Scale;
1953  } else {
1954  assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
1955  BaseReg = LdSt.getOperand(2).getReg();
1956  Offset = LdSt.getOperand(3).getImm() * Scale;
1957  }
1958  return true;
1959 }
1960 
1963  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
1964  MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
1965  assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
1966  return OfsOp;
1967 }
1968 
1969 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
1970  unsigned &Width, int64_t &MinOffset,
1971  int64_t &MaxOffset) const {
1972  switch (Opcode) {
1973  // Not a memory operation or something we want to handle.
1974  default:
1975  Scale = Width = 0;
1976  MinOffset = MaxOffset = 0;
1977  return false;
1978  case AArch64::STRWpost:
1979  case AArch64::LDRWpost:
1980  Width = 32;
1981  Scale = 4;
1982  MinOffset = -256;
1983  MaxOffset = 255;
1984  break;
1985  case AArch64::LDURQi:
1986  case AArch64::STURQi:
1987  Width = 16;
1988  Scale = 1;
1989  MinOffset = -256;
1990  MaxOffset = 255;
1991  break;
1992  case AArch64::LDURXi:
1993  case AArch64::LDURDi:
1994  case AArch64::STURXi:
1995  case AArch64::STURDi:
1996  Width = 8;
1997  Scale = 1;
1998  MinOffset = -256;
1999  MaxOffset = 255;
2000  break;
2001  case AArch64::LDURWi:
2002  case AArch64::LDURSi:
2003  case AArch64::LDURSWi:
2004  case AArch64::STURWi:
2005  case AArch64::STURSi:
2006  Width = 4;
2007  Scale = 1;
2008  MinOffset = -256;
2009  MaxOffset = 255;
2010  break;
2011  case AArch64::LDURHi:
2012  case AArch64::LDURHHi:
2013  case AArch64::LDURSHXi:
2014  case AArch64::LDURSHWi:
2015  case AArch64::STURHi:
2016  case AArch64::STURHHi:
2017  Width = 2;
2018  Scale = 1;
2019  MinOffset = -256;
2020  MaxOffset = 255;
2021  break;
2022  case AArch64::LDURBi:
2023  case AArch64::LDURBBi:
2024  case AArch64::LDURSBXi:
2025  case AArch64::LDURSBWi:
2026  case AArch64::STURBi:
2027  case AArch64::STURBBi:
2028  Width = 1;
2029  Scale = 1;
2030  MinOffset = -256;
2031  MaxOffset = 255;
2032  break;
2033  case AArch64::LDPQi:
2034  case AArch64::LDNPQi:
2035  case AArch64::STPQi:
2036  case AArch64::STNPQi:
2037  Scale = 16;
2038  Width = 32;
2039  MinOffset = -64;
2040  MaxOffset = 63;
2041  break;
2042  case AArch64::LDRQui:
2043  case AArch64::STRQui:
2044  Scale = Width = 16;
2045  MinOffset = 0;
2046  MaxOffset = 4095;
2047  break;
2048  case AArch64::LDPXi:
2049  case AArch64::LDPDi:
2050  case AArch64::LDNPXi:
2051  case AArch64::LDNPDi:
2052  case AArch64::STPXi:
2053  case AArch64::STPDi:
2054  case AArch64::STNPXi:
2055  case AArch64::STNPDi:
2056  Scale = 8;
2057  Width = 16;
2058  MinOffset = -64;
2059  MaxOffset = 63;
2060  break;
2061  case AArch64::LDRXui:
2062  case AArch64::LDRDui:
2063  case AArch64::STRXui:
2064  case AArch64::STRDui:
2065  Scale = Width = 8;
2066  MinOffset = 0;
2067  MaxOffset = 4095;
2068  break;
2069  case AArch64::LDPWi:
2070  case AArch64::LDPSi:
2071  case AArch64::LDNPWi:
2072  case AArch64::LDNPSi:
2073  case AArch64::STPWi:
2074  case AArch64::STPSi:
2075  case AArch64::STNPWi:
2076  case AArch64::STNPSi:
2077  Scale = 4;
2078  Width = 8;
2079  MinOffset = -64;
2080  MaxOffset = 63;
2081  break;
2082  case AArch64::LDRWui:
2083  case AArch64::LDRSui:
2084  case AArch64::LDRSWui:
2085  case AArch64::STRWui:
2086  case AArch64::STRSui:
2087  Scale = Width = 4;
2088  MinOffset = 0;
2089  MaxOffset = 4095;
2090  break;
2091  case AArch64::LDRHui:
2092  case AArch64::LDRHHui:
2093  case AArch64::STRHui:
2094  case AArch64::STRHHui:
2095  Scale = Width = 2;
2096  MinOffset = 0;
2097  MaxOffset = 4095;
2098  break;
2099  case AArch64::LDRBui:
2100  case AArch64::LDRBBui:
2101  case AArch64::STRBui:
2102  case AArch64::STRBBui:
2103  Scale = Width = 1;
2104  MinOffset = 0;
2105  MaxOffset = 4095;
2106  break;
2107  }
2108 
2109  return true;
2110 }
2111 
2112 // Scale the unscaled offsets. Returns false if the unscaled offset can't be
2113 // scaled.
2114 static bool scaleOffset(unsigned Opc, int64_t &Offset) {
2115  unsigned OffsetStride = 1;
2116  switch (Opc) {
2117  default:
2118  return false;
2119  case AArch64::LDURQi:
2120  case AArch64::STURQi:
2121  OffsetStride = 16;
2122  break;
2123  case AArch64::LDURXi:
2124  case AArch64::LDURDi:
2125  case AArch64::STURXi:
2126  case AArch64::STURDi:
2127  OffsetStride = 8;
2128  break;
2129  case AArch64::LDURWi:
2130  case AArch64::LDURSi:
2131  case AArch64::LDURSWi:
2132  case AArch64::STURWi:
2133  case AArch64::STURSi:
2134  OffsetStride = 4;
2135  break;
2136  }
2137  // If the byte-offset isn't a multiple of the stride, we can't scale this
2138  // offset.
2139  if (Offset % OffsetStride != 0)
2140  return false;
2141 
2142  // Convert the byte-offset used by unscaled into an "element" offset used
2143  // by the scaled pair load/store instructions.
2144  Offset /= OffsetStride;
2145  return true;
2146 }
2147 
2148 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
2149  if (FirstOpc == SecondOpc)
2150  return true;
2151  // We can also pair sign-ext and zero-ext instructions.
2152  switch (FirstOpc) {
2153  default:
2154  return false;
2155  case AArch64::LDRWui:
2156  case AArch64::LDURWi:
2157  return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
2158  case AArch64::LDRSWui:
2159  case AArch64::LDURSWi:
2160  return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
2161  }
2162  // These instructions can't be paired based on their opcodes.
2163  return false;
2164 }
2165 
2166 /// Detect opportunities for ldp/stp formation.
2167 ///
2168 /// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
2170  unsigned BaseReg1,
2171  MachineInstr &SecondLdSt,
2172  unsigned BaseReg2,
2173  unsigned NumLoads) const {
2174  if (BaseReg1 != BaseReg2)
2175  return false;
2176 
2177  // Only cluster up to a single pair.
2178  if (NumLoads > 1)
2179  return false;
2180 
2181  if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
2182  return false;
2183 
2184  // Can we pair these instructions based on their opcodes?
2185  unsigned FirstOpc = FirstLdSt.getOpcode();
2186  unsigned SecondOpc = SecondLdSt.getOpcode();
2187  if (!canPairLdStOpc(FirstOpc, SecondOpc))
2188  return false;
2189 
2190  // Can't merge volatiles or load/stores that have a hint to avoid pair
2191  // formation, for example.
2192  if (!isCandidateToMergeOrPair(FirstLdSt) ||
2193  !isCandidateToMergeOrPair(SecondLdSt))
2194  return false;
2195 
2196  // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
2197  int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
2198  if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
2199  return false;
2200 
2201  int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
2202  if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
2203  return false;
2204 
2205  // Pairwise instructions have a 7-bit signed offset field.
2206  if (Offset1 > 63 || Offset1 < -64)
2207  return false;
2208 
2209  // The caller should already have ordered First/SecondLdSt by offset.
2210  assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
2211  return Offset1 + 1 == Offset2;
2212 }
2213 
2215  unsigned Reg, unsigned SubIdx,
2216  unsigned State,
2217  const TargetRegisterInfo *TRI) {
2218  if (!SubIdx)
2219  return MIB.addReg(Reg, State);
2220 
2222  return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
2223  return MIB.addReg(Reg, State, SubIdx);
2224 }
2225 
2226 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
2227  unsigned NumRegs) {
2228  // We really want the positive remainder mod 32 here, that happens to be
2229  // easily obtainable with a mask.
2230  return ((DestReg - SrcReg) & 0x1f) < NumRegs;
2231 }
2232 
2235  const DebugLoc &DL, unsigned DestReg,
2236  unsigned SrcReg, bool KillSrc,
2237  unsigned Opcode,
2238  ArrayRef<unsigned> Indices) const {
2239  assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
2240  const TargetRegisterInfo *TRI = &getRegisterInfo();
2241  uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2242  uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2243  unsigned NumRegs = Indices.size();
2244 
2245  int SubReg = 0, End = NumRegs, Incr = 1;
2246  if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
2247  SubReg = NumRegs - 1;
2248  End = -1;
2249  Incr = -1;
2250  }
2251 
2252  for (; SubReg != End; SubReg += Incr) {
2253  const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2254  AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2255  AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
2256  AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2257  }
2258 }
2259 
2262  const DebugLoc &DL, unsigned DestReg,
2263  unsigned SrcReg, bool KillSrc) const {
2264  if (AArch64::GPR32spRegClass.contains(DestReg) &&
2265  (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
2266  const TargetRegisterInfo *TRI = &getRegisterInfo();
2267 
2268  if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
2269  // If either operand is WSP, expand to ADD #0.
2270  if (Subtarget.hasZeroCycleRegMove()) {
2271  // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
2272  unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2273  &AArch64::GPR64spRegClass);
2274  unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2275  &AArch64::GPR64spRegClass);
2276  // This instruction is reading and writing X registers. This may upset
2277  // the register scavenger and machine verifier, so we need to indicate
2278  // that we are reading an undefined value from SrcRegX, but a proper
2279  // value from SrcReg.
2280  BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
2281  .addReg(SrcRegX, RegState::Undef)
2282  .addImm(0)
2284  .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2285  } else {
2286  BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
2287  .addReg(SrcReg, getKillRegState(KillSrc))
2288  .addImm(0)
2290  }
2291  } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
2292  BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
2293  .addImm(0)
2295  } else {
2296  if (Subtarget.hasZeroCycleRegMove()) {
2297  // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
2298  unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2299  &AArch64::GPR64spRegClass);
2300  unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2301  &AArch64::GPR64spRegClass);
2302  // This instruction is reading and writing X registers. This may upset
2303  // the register scavenger and machine verifier, so we need to indicate
2304  // that we are reading an undefined value from SrcRegX, but a proper
2305  // value from SrcReg.
2306  BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
2307  .addReg(AArch64::XZR)
2308  .addReg(SrcRegX, RegState::Undef)
2309  .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2310  } else {
2311  // Otherwise, expand to ORR WZR.
2312  BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
2313  .addReg(AArch64::WZR)
2314  .addReg(SrcReg, getKillRegState(KillSrc));
2315  }
2316  }
2317  return;
2318  }
2319 
2320  if (AArch64::GPR64spRegClass.contains(DestReg) &&
2321  (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
2322  if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
2323  // If either operand is SP, expand to ADD #0.
2324  BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
2325  .addReg(SrcReg, getKillRegState(KillSrc))
2326  .addImm(0)
2328  } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
2329  BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
2330  .addImm(0)
2332  } else {
2333  // Otherwise, expand to ORR XZR.
2334  BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
2335  .addReg(AArch64::XZR)
2336  .addReg(SrcReg, getKillRegState(KillSrc));
2337  }
2338  return;
2339  }
2340 
2341  // Copy a DDDD register quad by copying the individual sub-registers.
2342  if (AArch64::DDDDRegClass.contains(DestReg) &&
2343  AArch64::DDDDRegClass.contains(SrcReg)) {
2344  static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2345  AArch64::dsub2, AArch64::dsub3};
2346  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2347  Indices);
2348  return;
2349  }
2350 
2351  // Copy a DDD register triple by copying the individual sub-registers.
2352  if (AArch64::DDDRegClass.contains(DestReg) &&
2353  AArch64::DDDRegClass.contains(SrcReg)) {
2354  static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2355  AArch64::dsub2};
2356  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2357  Indices);
2358  return;
2359  }
2360 
2361  // Copy a DD register pair by copying the individual sub-registers.
2362  if (AArch64::DDRegClass.contains(DestReg) &&
2363  AArch64::DDRegClass.contains(SrcReg)) {
2364  static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
2365  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2366  Indices);
2367  return;
2368  }
2369 
2370  // Copy a QQQQ register quad by copying the individual sub-registers.
2371  if (AArch64::QQQQRegClass.contains(DestReg) &&
2372  AArch64::QQQQRegClass.contains(SrcReg)) {
2373  static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2374  AArch64::qsub2, AArch64::qsub3};
2375  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2376  Indices);
2377  return;
2378  }
2379 
2380  // Copy a QQQ register triple by copying the individual sub-registers.
2381  if (AArch64::QQQRegClass.contains(DestReg) &&
2382  AArch64::QQQRegClass.contains(SrcReg)) {
2383  static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2384  AArch64::qsub2};
2385  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2386  Indices);
2387  return;
2388  }
2389 
2390  // Copy a QQ register pair by copying the individual sub-registers.
2391  if (AArch64::QQRegClass.contains(DestReg) &&
2392  AArch64::QQRegClass.contains(SrcReg)) {
2393  static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
2394  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2395  Indices);
2396  return;
2397  }
2398 
2399  if (AArch64::FPR128RegClass.contains(DestReg) &&
2400  AArch64::FPR128RegClass.contains(SrcReg)) {
2401  if (Subtarget.hasNEON()) {
2402  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2403  .addReg(SrcReg)
2404  .addReg(SrcReg, getKillRegState(KillSrc));
2405  } else {
2406  BuildMI(MBB, I, DL, get(AArch64::STRQpre))
2407  .addReg(AArch64::SP, RegState::Define)
2408  .addReg(SrcReg, getKillRegState(KillSrc))
2409  .addReg(AArch64::SP)
2410  .addImm(-16);
2411  BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
2412  .addReg(AArch64::SP, RegState::Define)
2413  .addReg(DestReg, RegState::Define)
2414  .addReg(AArch64::SP)
2415  .addImm(16);
2416  }
2417  return;
2418  }
2419 
2420  if (AArch64::FPR64RegClass.contains(DestReg) &&
2421  AArch64::FPR64RegClass.contains(SrcReg)) {
2422  if (Subtarget.hasNEON()) {
2423  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2424  &AArch64::FPR128RegClass);
2425  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2426  &AArch64::FPR128RegClass);
2427  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2428  .addReg(SrcReg)
2429  .addReg(SrcReg, getKillRegState(KillSrc));
2430  } else {
2431  BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2432  .addReg(SrcReg, getKillRegState(KillSrc));
2433  }
2434  return;
2435  }
2436 
2437  if (AArch64::FPR32RegClass.contains(DestReg) &&
2438  AArch64::FPR32RegClass.contains(SrcReg)) {
2439  if (Subtarget.hasNEON()) {
2440  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2441  &AArch64::FPR128RegClass);
2442  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2443  &AArch64::FPR128RegClass);
2444  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2445  .addReg(SrcReg)
2446  .addReg(SrcReg, getKillRegState(KillSrc));
2447  } else {
2448  BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2449  .addReg(SrcReg, getKillRegState(KillSrc));
2450  }
2451  return;
2452  }
2453 
2454  if (AArch64::FPR16RegClass.contains(DestReg) &&
2455  AArch64::FPR16RegClass.contains(SrcReg)) {
2456  if (Subtarget.hasNEON()) {
2457  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2458  &AArch64::FPR128RegClass);
2459  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2460  &AArch64::FPR128RegClass);
2461  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2462  .addReg(SrcReg)
2463  .addReg(SrcReg, getKillRegState(KillSrc));
2464  } else {
2465  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2466  &AArch64::FPR32RegClass);
2467  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2468  &AArch64::FPR32RegClass);
2469  BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2470  .addReg(SrcReg, getKillRegState(KillSrc));
2471  }
2472  return;
2473  }
2474 
2475  if (AArch64::FPR8RegClass.contains(DestReg) &&
2476  AArch64::FPR8RegClass.contains(SrcReg)) {
2477  if (Subtarget.hasNEON()) {
2478  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2479  &AArch64::FPR128RegClass);
2480  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2481  &AArch64::FPR128RegClass);
2482  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2483  .addReg(SrcReg)
2484  .addReg(SrcReg, getKillRegState(KillSrc));
2485  } else {
2486  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2487  &AArch64::FPR32RegClass);
2488  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2489  &AArch64::FPR32RegClass);
2490  BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2491  .addReg(SrcReg, getKillRegState(KillSrc));
2492  }
2493  return;
2494  }
2495 
2496  // Copies between GPR64 and FPR64.
2497  if (AArch64::FPR64RegClass.contains(DestReg) &&
2498  AArch64::GPR64RegClass.contains(SrcReg)) {
2499  BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2500  .addReg(SrcReg, getKillRegState(KillSrc));
2501  return;
2502  }
2503  if (AArch64::GPR64RegClass.contains(DestReg) &&
2504  AArch64::FPR64RegClass.contains(SrcReg)) {
2505  BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2506  .addReg(SrcReg, getKillRegState(KillSrc));
2507  return;
2508  }
2509  // Copies between GPR32 and FPR32.
2510  if (AArch64::FPR32RegClass.contains(DestReg) &&
2511  AArch64::GPR32RegClass.contains(SrcReg)) {
2512  BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2513  .addReg(SrcReg, getKillRegState(KillSrc));
2514  return;
2515  }
2516  if (AArch64::GPR32RegClass.contains(DestReg) &&
2517  AArch64::FPR32RegClass.contains(SrcReg)) {
2518  BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2519  .addReg(SrcReg, getKillRegState(KillSrc));
2520  return;
2521  }
2522 
2523  if (DestReg == AArch64::NZCV) {
2524  assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2525  BuildMI(MBB, I, DL, get(AArch64::MSR))
2526  .addImm(AArch64SysReg::NZCV)
2527  .addReg(SrcReg, getKillRegState(KillSrc))
2528  .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
2529  return;
2530  }
2531 
2532  if (SrcReg == AArch64::NZCV) {
2533  assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
2534  BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
2535  .addImm(AArch64SysReg::NZCV)
2536  .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
2537  return;
2538  }
2539 
2540  llvm_unreachable("unimplemented reg-to-reg copy");
2541 }
2542 
2544  MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2545  bool isKill, int FI, const TargetRegisterClass *RC,
2546  const TargetRegisterInfo *TRI) const {
2547  DebugLoc DL;
2548  if (MBBI != MBB.end())
2549  DL = MBBI->getDebugLoc();
2550  MachineFunction &MF = *MBB.getParent();
2551  MachineFrameInfo &MFI = MF.getFrameInfo();
2552  unsigned Align = MFI.getObjectAlignment(FI);
2553 
2556  PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2557  unsigned Opc = 0;
2558  bool Offset = true;
2559  switch (TRI->getSpillSize(*RC)) {
2560  case 1:
2561  if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2562  Opc = AArch64::STRBui;
2563  break;
2564  case 2:
2565  if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2566  Opc = AArch64::STRHui;
2567  break;
2568  case 4:
2569  if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2570  Opc = AArch64::STRWui;
2572  MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2573  else
2574  assert(SrcReg != AArch64::WSP);
2575  } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2576  Opc = AArch64::STRSui;
2577  break;
2578  case 8:
2579  if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2580  Opc = AArch64::STRXui;
2582  MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2583  else
2584  assert(SrcReg != AArch64::SP);
2585  } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2586  Opc = AArch64::STRDui;
2587  break;
2588  case 16:
2589  if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2590  Opc = AArch64::STRQui;
2591  else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2592  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2593  Opc = AArch64::ST1Twov1d;
2594  Offset = false;
2595  }
2596  break;
2597  case 24:
2598  if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2599  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2600  Opc = AArch64::ST1Threev1d;
2601  Offset = false;
2602  }
2603  break;
2604  case 32:
2605  if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2606  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2607  Opc = AArch64::ST1Fourv1d;
2608  Offset = false;
2609  } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2610  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2611  Opc = AArch64::ST1Twov2d;
2612  Offset = false;
2613  }
2614  break;
2615  case 48:
2616  if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2617  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2618  Opc = AArch64::ST1Threev2d;
2619  Offset = false;
2620  }
2621  break;
2622  case 64:
2623  if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2624  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2625  Opc = AArch64::ST1Fourv2d;
2626  Offset = false;
2627  }
2628  break;
2629  }
2630  assert(Opc && "Unknown register class");
2631 
2632  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
2633  .addReg(SrcReg, getKillRegState(isKill))
2634  .addFrameIndex(FI);
2635 
2636  if (Offset)
2637  MI.addImm(0);
2638  MI.addMemOperand(MMO);
2639 }
2640 
2642  MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2643  int FI, const TargetRegisterClass *RC,
2644  const TargetRegisterInfo *TRI) const {
2645  DebugLoc DL;
2646  if (MBBI != MBB.end())
2647  DL = MBBI->getDebugLoc();
2648  MachineFunction &MF = *MBB.getParent();
2649  MachineFrameInfo &MFI = MF.getFrameInfo();
2650  unsigned Align = MFI.getObjectAlignment(FI);
2653  PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2654 
2655  unsigned Opc = 0;
2656  bool Offset = true;
2657  switch (TRI->getSpillSize(*RC)) {
2658  case 1:
2659  if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2660  Opc = AArch64::LDRBui;
2661  break;
2662  case 2:
2663  if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2664  Opc = AArch64::LDRHui;
2665  break;
2666  case 4:
2667  if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2668  Opc = AArch64::LDRWui;
2670  MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
2671  else
2672  assert(DestReg != AArch64::WSP);
2673  } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2674  Opc = AArch64::LDRSui;
2675  break;
2676  case 8:
2677  if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2678  Opc = AArch64::LDRXui;
2680  MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
2681  else
2682  assert(DestReg != AArch64::SP);
2683  } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2684  Opc = AArch64::LDRDui;
2685  break;
2686  case 16:
2687  if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2688  Opc = AArch64::LDRQui;
2689  else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2690  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2691  Opc = AArch64::LD1Twov1d;
2692  Offset = false;
2693  }
2694  break;
2695  case 24:
2696  if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2697  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2698  Opc = AArch64::LD1Threev1d;
2699  Offset = false;
2700  }
2701  break;
2702  case 32:
2703  if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2704  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2705  Opc = AArch64::LD1Fourv1d;
2706  Offset = false;
2707  } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2708  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2709  Opc = AArch64::LD1Twov2d;
2710  Offset = false;
2711  }
2712  break;
2713  case 48:
2714  if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2715  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2716  Opc = AArch64::LD1Threev2d;
2717  Offset = false;
2718  }
2719  break;
2720  case 64:
2721  if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2722  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2723  Opc = AArch64::LD1Fourv2d;
2724  Offset = false;
2725  }
2726  break;
2727  }
2728  assert(Opc && "Unknown register class");
2729 
2730  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
2731  .addReg(DestReg, getDefRegState(true))
2732  .addFrameIndex(FI);
2733  if (Offset)
2734  MI.addImm(0);
2735  MI.addMemOperand(MMO);
2736 }
2737 
2739  MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
2740  unsigned DestReg, unsigned SrcReg, int Offset,
2741  const TargetInstrInfo *TII,
2742  MachineInstr::MIFlag Flag, bool SetNZCV) {
2743  if (DestReg == SrcReg && Offset == 0)
2744  return;
2745 
2746  assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
2747  "SP increment/decrement not 16-byte aligned");
2748 
2749  bool isSub = Offset < 0;
2750  if (isSub)
2751  Offset = -Offset;
2752 
2753  // FIXME: If the offset won't fit in 24-bits, compute the offset into a
2754  // scratch register. If DestReg is a virtual register, use it as the
2755  // scratch register; otherwise, create a new virtual register (to be
2756  // replaced by the scavenger at the end of PEI). That case can be optimized
2757  // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
2758  // register can be loaded with offset%8 and the add/sub can use an extending
2759  // instruction with LSL#3.
2760  // Currently the function handles any offsets but generates a poor sequence
2761  // of code.
2762  // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
2763 
2764  unsigned Opc;
2765  if (SetNZCV)
2766  Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
2767  else
2768  Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
2769  const unsigned MaxEncoding = 0xfff;
2770  const unsigned ShiftSize = 12;
2771  const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
2772  while (((unsigned)Offset) >= (1 << ShiftSize)) {
2773  unsigned ThisVal;
2774  if (((unsigned)Offset) > MaxEncodableValue) {
2775  ThisVal = MaxEncodableValue;
2776  } else {
2777  ThisVal = Offset & MaxEncodableValue;
2778  }
2779  assert((ThisVal >> ShiftSize) <= MaxEncoding &&
2780  "Encoding cannot handle value that big");
2781  BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2782  .addReg(SrcReg)
2783  .addImm(ThisVal >> ShiftSize)
2785  .setMIFlag(Flag);
2786 
2787  SrcReg = DestReg;
2788  Offset -= ThisVal;
2789  if (Offset == 0)
2790  return;
2791  }
2792  BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2793  .addReg(SrcReg)
2794  .addImm(Offset)
2796  .setMIFlag(Flag);
2797 }
2798 
2802  LiveIntervals *LIS) const {
2803  // This is a bit of a hack. Consider this instruction:
2804  //
2805  // %0 = COPY %sp; GPR64all:%0
2806  //
2807  // We explicitly chose GPR64all for the virtual register so such a copy might
2808  // be eliminated by RegisterCoalescer. However, that may not be possible, and
2809  // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
2810  // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
2811  //
2812  // To prevent that, we are going to constrain the %0 register class here.
2813  //
2814  // <rdar://problem/11522048>
2815  //
2816  if (MI.isFullCopy()) {
2817  unsigned DstReg = MI.getOperand(0).getReg();
2818  unsigned SrcReg = MI.getOperand(1).getReg();
2819  if (SrcReg == AArch64::SP &&
2821  MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
2822  return nullptr;
2823  }
2824  if (DstReg == AArch64::SP &&
2826  MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2827  return nullptr;
2828  }
2829  }
2830 
2831  // Handle the case where a copy is being spilled or filled but the source
2832  // and destination register class don't match. For example:
2833  //
2834  // %0 = COPY %xzr; GPR64common:%0
2835  //
2836  // In this case we can still safely fold away the COPY and generate the
2837  // following spill code:
2838  //
2839  // STRXui %xzr, %stack.0
2840  //
2841  // This also eliminates spilled cross register class COPYs (e.g. between x and
2842  // d regs) of the same size. For example:
2843  //
2844  // %0 = COPY %1; GPR64:%0, FPR64:%1
2845  //
2846  // will be filled as
2847  //
2848  // LDRDui %0, fi<#0>
2849  //
2850  // instead of
2851  //
2852  // LDRXui %Temp, fi<#0>
2853  // %0 = FMOV %Temp
2854  //
2855  if (MI.isCopy() && Ops.size() == 1 &&
2856  // Make sure we're only folding the explicit COPY defs/uses.
2857  (Ops[0] == 0 || Ops[0] == 1)) {
2858  bool IsSpill = Ops[0] == 0;
2859  bool IsFill = !IsSpill;
2860  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
2861  const MachineRegisterInfo &MRI = MF.getRegInfo();
2862  MachineBasicBlock &MBB = *MI.getParent();
2863  const MachineOperand &DstMO = MI.getOperand(0);
2864  const MachineOperand &SrcMO = MI.getOperand(1);
2865  unsigned DstReg = DstMO.getReg();
2866  unsigned SrcReg = SrcMO.getReg();
2867  // This is slightly expensive to compute for physical regs since
2868  // getMinimalPhysRegClass is slow.
2869  auto getRegClass = [&](unsigned Reg) {
2871  ? MRI.getRegClass(Reg)
2872  : TRI.getMinimalPhysRegClass(Reg);
2873  };
2874 
2875  if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
2876  assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
2877  TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
2878  "Mismatched register size in non subreg COPY");
2879  if (IsSpill)
2880  storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
2881  getRegClass(SrcReg), &TRI);
2882  else
2883  loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
2884  getRegClass(DstReg), &TRI);
2885  return &*--InsertPt;
2886  }
2887 
2888  // Handle cases like spilling def of:
2889  //
2890  // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
2891  //
2892  // where the physical register source can be widened and stored to the full
2893  // virtual reg destination stack slot, in this case producing:
2894  //
2895  // STRXui %xzr, %stack.0
2896  //
2897  if (IsSpill && DstMO.isUndef() &&
2899  assert(SrcMO.getSubReg() == 0 &&
2900  "Unexpected subreg on physical register");
2901  const TargetRegisterClass *SpillRC;
2902  unsigned SpillSubreg;
2903  switch (DstMO.getSubReg()) {
2904  default:
2905  SpillRC = nullptr;
2906  break;
2907  case AArch64::sub_32:
2908  case AArch64::ssub:
2909  if (AArch64::GPR32RegClass.contains(SrcReg)) {
2910  SpillRC = &AArch64::GPR64RegClass;
2911  SpillSubreg = AArch64::sub_32;
2912  } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
2913  SpillRC = &AArch64::FPR64RegClass;
2914  SpillSubreg = AArch64::ssub;
2915  } else
2916  SpillRC = nullptr;
2917  break;
2918  case AArch64::dsub:
2919  if (AArch64::FPR64RegClass.contains(SrcReg)) {
2920  SpillRC = &AArch64::FPR128RegClass;
2921  SpillSubreg = AArch64::dsub;
2922  } else
2923  SpillRC = nullptr;
2924  break;
2925  }
2926 
2927  if (SpillRC)
2928  if (unsigned WidenedSrcReg =
2929  TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
2930  storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
2931  FrameIndex, SpillRC, &TRI);
2932  return &*--InsertPt;
2933  }
2934  }
2935 
2936  // Handle cases like filling use of:
2937  //
2938  // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
2939  //
2940  // where we can load the full virtual reg source stack slot, into the subreg
2941  // destination, in this case producing:
2942  //
2943  // LDRWui %0:sub_32<def,read-undef>, %stack.0
2944  //
2945  if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
2946  const TargetRegisterClass *FillRC;
2947  switch (DstMO.getSubReg()) {
2948  default:
2949  FillRC = nullptr;
2950  break;
2951  case AArch64::sub_32:
2952  FillRC = &AArch64::GPR32RegClass;
2953  break;
2954  case AArch64::ssub:
2955  FillRC = &AArch64::FPR32RegClass;
2956  break;
2957  case AArch64::dsub:
2958  FillRC = &AArch64::FPR64RegClass;
2959  break;
2960  }
2961 
2962  if (FillRC) {
2963  assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
2964  TRI.getRegSizeInBits(*FillRC) &&
2965  "Mismatched regclass size on folded subreg COPY");
2966  loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
2967  MachineInstr &LoadMI = *--InsertPt;
2968  MachineOperand &LoadDst = LoadMI.getOperand(0);
2969  assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
2970  LoadDst.setSubReg(DstMO.getSubReg());
2971  LoadDst.setIsUndef();
2972  return &LoadMI;
2973  }
2974  }
2975  }
2976 
2977  // Cannot fold.
2978  return nullptr;
2979 }
2980 
2982  bool *OutUseUnscaledOp,
2983  unsigned *OutUnscaledOp,
2984  int *EmittableOffset) {
2985  int Scale = 1;
2986  bool IsSigned = false;
2987  // The ImmIdx should be changed case by case if it is not 2.
2988  unsigned ImmIdx = 2;
2989  unsigned UnscaledOp = 0;
2990  // Set output values in case of early exit.
2991  if (EmittableOffset)
2992  *EmittableOffset = 0;
2993  if (OutUseUnscaledOp)
2994  *OutUseUnscaledOp = false;
2995  if (OutUnscaledOp)
2996  *OutUnscaledOp = 0;
2997  switch (MI.getOpcode()) {
2998  default:
2999  llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
3000  // Vector spills/fills can't take an immediate offset.
3001  case AArch64::LD1Twov2d:
3002  case AArch64::LD1Threev2d:
3003  case AArch64::LD1Fourv2d:
3004  case AArch64::LD1Twov1d:
3005  case AArch64::LD1Threev1d:
3006  case AArch64::LD1Fourv1d:
3007  case AArch64::ST1Twov2d:
3008  case AArch64::ST1Threev2d:
3009  case AArch64::ST1Fourv2d:
3010  case AArch64::ST1Twov1d:
3011  case AArch64::ST1Threev1d:
3012  case AArch64::ST1Fourv1d:
3014  case AArch64::PRFMui:
3015  Scale = 8;
3016  UnscaledOp = AArch64::PRFUMi;
3017  break;
3018  case AArch64::LDRXui:
3019  Scale = 8;
3020  UnscaledOp = AArch64::LDURXi;
3021  break;
3022  case AArch64::LDRWui:
3023  Scale = 4;
3024  UnscaledOp = AArch64::LDURWi;
3025  break;
3026  case AArch64::LDRBui:
3027  Scale = 1;
3028  UnscaledOp = AArch64::LDURBi;
3029  break;
3030  case AArch64::LDRHui:
3031  Scale = 2;
3032  UnscaledOp = AArch64::LDURHi;
3033  break;
3034  case AArch64::LDRSui:
3035  Scale = 4;
3036  UnscaledOp = AArch64::LDURSi;
3037  break;
3038  case AArch64::LDRDui:
3039  Scale = 8;
3040  UnscaledOp = AArch64::LDURDi;
3041  break;
3042  case AArch64::LDRQui:
3043  Scale = 16;
3044  UnscaledOp = AArch64::LDURQi;
3045  break;
3046  case AArch64::LDRBBui:
3047  Scale = 1;
3048  UnscaledOp = AArch64::LDURBBi;
3049  break;
3050  case AArch64::LDRHHui:
3051  Scale = 2;
3052  UnscaledOp = AArch64::LDURHHi;
3053  break;
3054  case AArch64::LDRSBXui:
3055  Scale = 1;
3056  UnscaledOp = AArch64::LDURSBXi;
3057  break;
3058  case AArch64::LDRSBWui:
3059  Scale = 1;
3060  UnscaledOp = AArch64::LDURSBWi;
3061  break;
3062  case AArch64::LDRSHXui:
3063  Scale = 2;
3064  UnscaledOp = AArch64::LDURSHXi;
3065  break;
3066  case AArch64::LDRSHWui:
3067  Scale = 2;
3068  UnscaledOp = AArch64::LDURSHWi;
3069  break;
3070  case AArch64::LDRSWui:
3071  Scale = 4;
3072  UnscaledOp = AArch64::LDURSWi;
3073  break;
3074 
3075  case AArch64::STRXui:
3076  Scale = 8;
3077  UnscaledOp = AArch64::STURXi;
3078  break;
3079  case AArch64::STRWui:
3080  Scale = 4;
3081  UnscaledOp = AArch64::STURWi;
3082  break;
3083  case AArch64::STRBui:
3084  Scale = 1;
3085  UnscaledOp = AArch64::STURBi;
3086  break;
3087  case AArch64::STRHui:
3088  Scale = 2;
3089  UnscaledOp = AArch64::STURHi;
3090  break;
3091  case AArch64::STRSui:
3092  Scale = 4;
3093  UnscaledOp = AArch64::STURSi;
3094  break;
3095  case AArch64::STRDui:
3096  Scale = 8;
3097  UnscaledOp = AArch64::STURDi;
3098  break;
3099  case AArch64::STRQui:
3100  Scale = 16;
3101  UnscaledOp = AArch64::STURQi;
3102  break;
3103  case AArch64::STRBBui:
3104  Scale = 1;
3105  UnscaledOp = AArch64::STURBBi;
3106  break;
3107  case AArch64::STRHHui:
3108  Scale = 2;
3109  UnscaledOp = AArch64::STURHHi;
3110  break;
3111 
3112  case AArch64::LDPXi:
3113  case AArch64::LDPDi:
3114  case AArch64::STPXi:
3115  case AArch64::STPDi:
3116  case AArch64::LDNPXi:
3117  case AArch64::LDNPDi:
3118  case AArch64::STNPXi:
3119  case AArch64::STNPDi:
3120  ImmIdx = 3;
3121  IsSigned = true;
3122  Scale = 8;
3123  break;
3124  case AArch64::LDPQi:
3125  case AArch64::STPQi:
3126  case AArch64::LDNPQi:
3127  case AArch64::STNPQi:
3128  ImmIdx = 3;
3129  IsSigned = true;
3130  Scale = 16;
3131  break;
3132  case AArch64::LDPWi:
3133  case AArch64::LDPSi:
3134  case AArch64::STPWi:
3135  case AArch64::STPSi:
3136  case AArch64::LDNPWi:
3137  case AArch64::LDNPSi:
3138  case AArch64::STNPWi:
3139  case AArch64::STNPSi:
3140  ImmIdx = 3;
3141  IsSigned = true;
3142  Scale = 4;
3143  break;
3144 
3145  case AArch64::LDURXi:
3146  case AArch64::LDURWi:
3147  case AArch64::LDURBi:
3148  case AArch64::LDURHi:
3149  case AArch64::LDURSi:
3150  case AArch64::LDURDi:
3151  case AArch64::LDURQi:
3152  case AArch64::LDURHHi:
3153  case AArch64::LDURBBi:
3154  case AArch64::LDURSBXi:
3155  case AArch64::LDURSBWi:
3156  case AArch64::LDURSHXi:
3157  case AArch64::LDURSHWi:
3158  case AArch64::LDURSWi:
3159  case AArch64::STURXi:
3160  case AArch64::STURWi:
3161  case AArch64::STURBi:
3162  case AArch64::STURHi:
3163  case AArch64::STURSi:
3164  case AArch64::STURDi:
3165  case AArch64::STURQi:
3166  case AArch64::STURBBi:
3167  case AArch64::STURHHi:
3168  Scale = 1;
3169  break;
3170  }
3171 
3172  Offset += MI.getOperand(ImmIdx).getImm() * Scale;
3173 
3174  bool useUnscaledOp = false;
3175  // If the offset doesn't match the scale, we rewrite the instruction to
3176  // use the unscaled instruction instead. Likewise, if we have a negative
3177  // offset (and have an unscaled op to use).
3178  if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
3179  useUnscaledOp = true;
3180 
3181  // Use an unscaled addressing mode if the instruction has a negative offset
3182  // (or if the instruction is already using an unscaled addressing mode).
3183  unsigned MaskBits;
3184  if (IsSigned) {
3185  // ldp/stp instructions.
3186  MaskBits = 7;
3187  Offset /= Scale;
3188  } else if (UnscaledOp == 0 || useUnscaledOp) {
3189  MaskBits = 9;
3190  IsSigned = true;
3191  Scale = 1;
3192  } else {
3193  MaskBits = 12;
3194  IsSigned = false;
3195  Offset /= Scale;
3196  }
3197 
3198  // Attempt to fold address computation.
3199  int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
3200  int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
3201  if (Offset >= MinOff && Offset <= MaxOff) {
3202  if (EmittableOffset)
3203  *EmittableOffset = Offset;
3204  Offset = 0;
3205  } else {
3206  int NewOff = Offset < 0 ? MinOff : MaxOff;
3207  if (EmittableOffset)
3208  *EmittableOffset = NewOff;
3209  Offset = (Offset - NewOff) * Scale;
3210  }
3211  if (OutUseUnscaledOp)
3212  *OutUseUnscaledOp = useUnscaledOp;
3213  if (OutUnscaledOp)
3214  *OutUnscaledOp = UnscaledOp;
3216  (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
3217 }
3218 
3219 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
3220  unsigned FrameReg, int &Offset,
3221  const AArch64InstrInfo *TII) {
3222  unsigned Opcode = MI.getOpcode();
3223  unsigned ImmIdx = FrameRegIdx + 1;
3224 
3225  if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
3226  Offset += MI.getOperand(ImmIdx).getImm();
3227  emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
3228  MI.getOperand(0).getReg(), FrameReg, Offset, TII,
3229  MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
3230  MI.eraseFromParent();
3231  Offset = 0;
3232  return true;
3233  }
3234 
3235  int NewOffset;
3236  unsigned UnscaledOp;
3237  bool UseUnscaledOp;
3238  int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
3239  &UnscaledOp, &NewOffset);
3240  if (Status & AArch64FrameOffsetCanUpdate) {
3241  if (Status & AArch64FrameOffsetIsLegal)
3242  // Replace the FrameIndex with FrameReg.
3243  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
3244  if (UseUnscaledOp)
3245  MI.setDesc(TII->get(UnscaledOp));
3246 
3247  MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
3248  return Offset == 0;
3249  }
3250 
3251  return false;
3252 }
3253 
3254 void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
3255  NopInst.setOpcode(AArch64::HINT);
3256  NopInst.addOperand(MCOperand::createImm(0));
3257 }
3258 
3259 // AArch64 supports MachineCombiner.
3260 bool AArch64InstrInfo::useMachineCombiner() const { return true; }
3261 
3262 // True when Opc sets flag
3263 static bool isCombineInstrSettingFlag(unsigned Opc) {
3264  switch (Opc) {
3265  case AArch64::ADDSWrr:
3266  case AArch64::ADDSWri:
3267  case AArch64::ADDSXrr:
3268  case AArch64::ADDSXri:
3269  case AArch64::SUBSWrr:
3270  case AArch64::SUBSXrr:
3271  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3272  case AArch64::SUBSWri:
3273  case AArch64::SUBSXri:
3274  return true;
3275  default:
3276  break;
3277  }
3278  return false;
3279 }
3280 
3281 // 32b Opcodes that can be combined with a MUL
3282 static bool isCombineInstrCandidate32(unsigned Opc) {
3283  switch (Opc) {
3284  case AArch64::ADDWrr:
3285  case AArch64::ADDWri:
3286  case AArch64::SUBWrr:
3287  case AArch64::ADDSWrr:
3288  case AArch64::ADDSWri:
3289  case AArch64::SUBSWrr:
3290  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3291  case AArch64::SUBWri:
3292  case AArch64::SUBSWri:
3293  return true;
3294  default:
3295  break;
3296  }
3297  return false;
3298 }
3299 
3300 // 64b Opcodes that can be combined with a MUL
3301 static bool isCombineInstrCandidate64(unsigned Opc) {
3302  switch (Opc) {
3303  case AArch64::ADDXrr:
3304  case AArch64::ADDXri:
3305  case AArch64::SUBXrr:
3306  case AArch64::ADDSXrr:
3307  case AArch64::ADDSXri:
3308  case AArch64::SUBSXrr:
3309  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3310  case AArch64::SUBXri:
3311  case AArch64::SUBSXri:
3312  return true;
3313  default:
3314  break;
3315  }
3316  return false;
3317 }
3318 
3319 // FP Opcodes that can be combined with a FMUL
3320 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
3321  switch (Inst.getOpcode()) {
3322  default:
3323  break;
3324  case AArch64::FADDSrr:
3325  case AArch64::FADDDrr:
3326  case AArch64::FADDv2f32:
3327  case AArch64::FADDv2f64:
3328  case AArch64::FADDv4f32:
3329  case AArch64::FSUBSrr:
3330  case AArch64::FSUBDrr:
3331  case AArch64::FSUBv2f32:
3332  case AArch64::FSUBv2f64:
3333  case AArch64::FSUBv4f32:
3334  TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
3335  return (Options.UnsafeFPMath ||
3336  Options.AllowFPOpFusion == FPOpFusion::Fast);
3337  }
3338  return false;
3339 }
3340 
3341 // Opcodes that can be combined with a MUL
3342 static bool isCombineInstrCandidate(unsigned Opc) {
3344 }
3345 
3346 //
3347 // Utility routine that checks if \param MO is defined by an
3348 // \param CombineOpc instruction in the basic block \param MBB
3350  unsigned CombineOpc, unsigned ZeroReg = 0,
3351  bool CheckZeroReg = false) {
3352  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3353  MachineInstr *MI = nullptr;
3354 
3356  MI = MRI.getUniqueVRegDef(MO.getReg());
3357  // And it needs to be in the trace (otherwise, it won't have a depth).
3358  if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
3359  return false;
3360  // Must only used by the user we combine with.
3361  if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
3362  return false;
3363 
3364  if (CheckZeroReg) {
3365  assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
3366  MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
3367  MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
3368  // The third input reg must be zero.
3369  if (MI->getOperand(3).getReg() != ZeroReg)
3370  return false;
3371  }
3372 
3373  return true;
3374 }
3375 
3376 //
3377 // Is \param MO defined by an integer multiply and can be combined?
3379  unsigned MulOpc, unsigned ZeroReg) {
3380  return canCombine(MBB, MO, MulOpc, ZeroReg, true);
3381 }
3382 
3383 //
3384 // Is \param MO defined by a floating-point multiply and can be combined?
3386  unsigned MulOpc) {
3387  return canCombine(MBB, MO, MulOpc);
3388 }
3389 
3390 // TODO: There are many more machine instruction opcodes to match:
3391 // 1. Other data types (integer, vectors)
3392 // 2. Other math / logic operations (xor, or)
3393 // 3. Other forms of the same operation (intrinsics and other variants)
3395  const MachineInstr &Inst) const {
3396  switch (Inst.getOpcode()) {
3397  case AArch64::FADDDrr:
3398  case AArch64::FADDSrr:
3399  case AArch64::FADDv2f32:
3400  case AArch64::FADDv2f64:
3401  case AArch64::FADDv4f32:
3402  case AArch64::FMULDrr:
3403  case AArch64::FMULSrr:
3404  case AArch64::FMULX32:
3405  case AArch64::FMULX64:
3406  case AArch64::FMULXv2f32:
3407  case AArch64::FMULXv2f64:
3408  case AArch64::FMULXv4f32:
3409  case AArch64::FMULv2f32:
3410  case AArch64::FMULv2f64:
3411  case AArch64::FMULv4f32:
3412  return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
3413  default:
3414  return false;
3415  }
3416 }
3417 
3418 /// Find instructions that can be turned into madd.
3419 static bool getMaddPatterns(MachineInstr &Root,
3421  unsigned Opc = Root.getOpcode();
3422  MachineBasicBlock &MBB = *Root.getParent();
3423  bool Found = false;
3424 
3425  if (!isCombineInstrCandidate(Opc))
3426  return false;
3427  if (isCombineInstrSettingFlag(Opc)) {
3428  int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
3429  // When NZCV is live bail out.
3430  if (Cmp_NZCV == -1)
3431  return false;
3432  unsigned NewOpc = convertToNonFlagSettingOpc(Root);
3433  // When opcode can't change bail out.
3434  // CHECKME: do we miss any cases for opcode conversion?
3435  if (NewOpc == Opc)
3436  return false;
3437  Opc = NewOpc;
3438  }
3439 
3440  switch (Opc) {
3441  default:
3442  break;
3443  case AArch64::ADDWrr:
3444  assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3445  "ADDWrr does not have register operands");
3446  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3447  AArch64::WZR)) {
3449  Found = true;
3450  }
3451  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3452  AArch64::WZR)) {
3454  Found = true;
3455  }
3456  break;
3457  case AArch64::ADDXrr:
3458  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3459  AArch64::XZR)) {
3461  Found = true;
3462  }
3463  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3464  AArch64::XZR)) {
3466  Found = true;
3467  }
3468  break;
3469  case AArch64::SUBWrr:
3470  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3471  AArch64::WZR)) {
3473  Found = true;
3474  }
3475  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3476  AArch64::WZR)) {
3478  Found = true;
3479  }
3480  break;
3481  case AArch64::SUBXrr:
3482  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3483  AArch64::XZR)) {
3485  Found = true;
3486  }
3487  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3488  AArch64::XZR)) {
3490  Found = true;
3491  }
3492  break;
3493  case AArch64::ADDWri:
3494  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3495  AArch64::WZR)) {
3497  Found = true;
3498  }
3499  break;
3500  case AArch64::ADDXri:
3501  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3502  AArch64::XZR)) {
3504  Found = true;
3505  }
3506  break;
3507  case AArch64::SUBWri:
3508  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3509  AArch64::WZR)) {
3511  Found = true;
3512  }
3513  break;
3514  case AArch64::SUBXri:
3515  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3516  AArch64::XZR)) {
3518  Found = true;
3519  }
3520  break;
3521  }
3522  return Found;
3523 }
3524 /// Floating-Point Support
3525 
3526 /// Find instructions that can be turned into madd.
3527 static bool getFMAPatterns(MachineInstr &Root,
3529 
3530  if (!isCombineInstrCandidateFP(Root))
3531  return false;
3532 
3533  MachineBasicBlock &MBB = *Root.getParent();
3534  bool Found = false;
3535 
3536  switch (Root.getOpcode()) {
3537  default:
3538  assert(false && "Unsupported FP instruction in combiner\n");
3539  break;
3540  case AArch64::FADDSrr:
3541  assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3542  "FADDWrr does not have register operands");
3543  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3545  Found = true;
3546  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3547  AArch64::FMULv1i32_indexed)) {
3549  Found = true;
3550  }
3551  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3553  Found = true;
3554  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3555  AArch64::FMULv1i32_indexed)) {
3557  Found = true;
3558  }
3559  break;
3560  case AArch64::FADDDrr:
3561  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3563  Found = true;
3564  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3565  AArch64::FMULv1i64_indexed)) {
3567  Found = true;
3568  }
3569  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3571  Found = true;
3572  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3573  AArch64::FMULv1i64_indexed)) {
3575  Found = true;
3576  }
3577  break;
3578  case AArch64::FADDv2f32:
3579  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3580  AArch64::FMULv2i32_indexed)) {
3582  Found = true;
3583  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3584  AArch64::FMULv2f32)) {
3586  Found = true;
3587  }
3588  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3589  AArch64::FMULv2i32_indexed)) {
3591  Found = true;
3592  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3593  AArch64::FMULv2f32)) {
3595  Found = true;
3596  }
3597  break;
3598  case AArch64::FADDv2f64:
3599  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3600  AArch64::FMULv2i64_indexed)) {
3602  Found = true;
3603  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3604  AArch64::FMULv2f64)) {
3606  Found = true;
3607  }
3608  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3609  AArch64::FMULv2i64_indexed)) {
3611  Found = true;
3612  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3613  AArch64::FMULv2f64)) {
3615  Found = true;
3616  }
3617  break;
3618  case AArch64::FADDv4f32:
3619  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3620  AArch64::FMULv4i32_indexed)) {
3622  Found = true;
3623  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3624  AArch64::FMULv4f32)) {
3626  Found = true;
3627  }
3628  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3629  AArch64::FMULv4i32_indexed)) {
3631  Found = true;
3632  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3633  AArch64::FMULv4f32)) {
3635  Found = true;
3636  }
3637  break;
3638 
3639  case AArch64::FSUBSrr:
3640  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3642  Found = true;
3643  }
3644  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3646  Found = true;
3647  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3648  AArch64::FMULv1i32_indexed)) {
3650  Found = true;
3651  }
3652  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) {
3654  Found = true;
3655  }
3656  break;
3657  case AArch64::FSUBDrr:
3658  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3660  Found = true;
3661  }
3662  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3664  Found = true;
3665  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3666  AArch64::FMULv1i64_indexed)) {
3668  Found = true;
3669  }
3670  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) {
3672  Found = true;
3673  }
3674  break;
3675  case AArch64::FSUBv2f32:
3676  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3677  AArch64::FMULv2i32_indexed)) {
3679  Found = true;
3680  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3681  AArch64::FMULv2f32)) {
3683  Found = true;
3684  }
3685  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3686  AArch64::FMULv2i32_indexed)) {
3688  Found = true;
3689  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3690  AArch64::FMULv2f32)) {
3692  Found = true;
3693  }
3694  break;
3695  case AArch64::FSUBv2f64:
3696  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3697  AArch64::FMULv2i64_indexed)) {
3699  Found = true;
3700  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3701  AArch64::FMULv2f64)) {
3703  Found = true;
3704  }
3705  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3706  AArch64::FMULv2i64_indexed)) {
3708  Found = true;
3709  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3710  AArch64::FMULv2f64)) {
3712  Found = true;
3713  }
3714  break;
3715  case AArch64::FSUBv4f32:
3716  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3717  AArch64::FMULv4i32_indexed)) {
3719  Found = true;
3720  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3721  AArch64::FMULv4f32)) {
3723  Found = true;
3724  }
3725  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3726  AArch64::FMULv4i32_indexed)) {
3728  Found = true;
3729  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3730  AArch64::FMULv4f32)) {
3732  Found = true;
3733  }
3734  break;
3735  }
3736  return Found;
3737 }
3738 
3739 /// Return true when a code sequence can improve throughput. It
3740 /// should be called only for instructions in loops.
3741 /// \param Pattern - combiner pattern
3743  MachineCombinerPattern Pattern) const {
3744  switch (Pattern) {
3745  default:
3746  break;
3781  return true;
3782  } // end switch (Pattern)
3783  return false;
3784 }
3785 /// Return true when there is potentially a faster code sequence for an
3786 /// instruction chain ending in \p Root. All potential patterns are listed in
3787 /// the \p Pattern vector. Pattern should be sorted in priority order since the
3788 /// pattern evaluator stops checking as soon as it finds a faster sequence.
3789 
3791  MachineInstr &Root,
3792  SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
3793  // Integer patterns
3794  if (getMaddPatterns(Root, Patterns))
3795  return true;
3796  // Floating point patterns
3797  if (getFMAPatterns(Root, Patterns))
3798  return true;
3799 
3800  return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
3801 }
3802 
3804 /// genFusedMultiply - Generate fused multiply instructions.
3805 /// This function supports both integer and floating point instructions.
3806 /// A typical example:
3807 /// F|MUL I=A,B,0
3808 /// F|ADD R,I,C
3809 /// ==> F|MADD R,A,B,C
3810 /// \param MF Containing MachineFunction
3811 /// \param MRI Register information
3812 /// \param TII Target information
3813 /// \param Root is the F|ADD instruction
3814 /// \param [out] InsInstrs is a vector of machine instructions and will
3815 /// contain the generated madd instruction
3816 /// \param IdxMulOpd is index of operand in Root that is the result of
3817 /// the F|MUL. In the example above IdxMulOpd is 1.
3818 /// \param MaddOpc the opcode fo the f|madd instruction
3819 /// \param RC Register class of operands
3820 /// \param kind of fma instruction (addressing mode) to be generated
3821 /// \param ReplacedAddend is the result register from the instruction
3822 /// replacing the non-combined operand, if any.
3823 static MachineInstr *
3825  const TargetInstrInfo *TII, MachineInstr &Root,
3826  SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
3827  unsigned MaddOpc, const TargetRegisterClass *RC,
3829  const unsigned *ReplacedAddend = nullptr) {
3830  assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3831 
3832  unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
3833  MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
3834  unsigned ResultReg = Root.getOperand(0).getReg();
3835  unsigned SrcReg0 = MUL->getOperand(1).getReg();
3836  bool Src0IsKill = MUL->getOperand(1).isKill();
3837  unsigned SrcReg1 = MUL->getOperand(2).getReg();
3838  bool Src1IsKill = MUL->getOperand(2).isKill();
3839 
3840  unsigned SrcReg2;
3841  bool Src2IsKill;
3842  if (ReplacedAddend) {
3843  // If we just generated a new addend, we must be it's only use.
3844  SrcReg2 = *ReplacedAddend;
3845  Src2IsKill = true;
3846  } else {
3847  SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
3848  Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
3849  }
3850 
3852  MRI.constrainRegClass(ResultReg, RC);
3854  MRI.constrainRegClass(SrcReg0, RC);
3856  MRI.constrainRegClass(SrcReg1, RC);
3858  MRI.constrainRegClass(SrcReg2, RC);
3859 
3860  MachineInstrBuilder MIB;
3861  if (kind == FMAInstKind::Default)
3862  MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3863  .addReg(SrcReg0, getKillRegState(Src0IsKill))
3864  .addReg(SrcReg1, getKillRegState(Src1IsKill))
3865  .addReg(SrcReg2, getKillRegState(Src2IsKill));
3866  else if (kind == FMAInstKind::Indexed)
3867  MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3868  .addReg(SrcReg2, getKillRegState(Src2IsKill))
3869  .addReg(SrcReg0, getKillRegState(Src0IsKill))
3870  .addReg(SrcReg1, getKillRegState(Src1IsKill))
3871  .addImm(MUL->getOperand(3).getImm());
3872  else if (kind == FMAInstKind::Accumulator)
3873  MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3874  .addReg(SrcReg2, getKillRegState(Src2IsKill))
3875  .addReg(SrcReg0, getKillRegState(Src0IsKill))
3876  .addReg(SrcReg1, getKillRegState(Src1IsKill));
3877  else
3878  assert(false && "Invalid FMA instruction kind \n");
3879  // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
3880  InsInstrs.push_back(MIB);
3881  return MUL;
3882 }
3883 
3884 /// genMaddR - Generate madd instruction and combine mul and add using
3885 /// an extra virtual register
3886 /// Example - an ADD intermediate needs to be stored in a register:
3887 /// MUL I=A,B,0
3888 /// ADD R,I,Imm
3889 /// ==> ORR V, ZR, Imm
3890 /// ==> MADD R,A,B,V
3891 /// \param MF Containing MachineFunction
3892 /// \param MRI Register information
3893 /// \param TII Target information
3894 /// \param Root is the ADD instruction
3895 /// \param [out] InsInstrs is a vector of machine instructions and will
3896 /// contain the generated madd instruction
3897 /// \param IdxMulOpd is index of operand in Root that is the result of
3898 /// the MUL. In the example above IdxMulOpd is 1.
3899 /// \param MaddOpc the opcode fo the madd instruction
3900 /// \param VR is a virtual register that holds the value of an ADD operand
3901 /// (V in the example above).
3902 /// \param RC Register class of operands
3904  const TargetInstrInfo *TII, MachineInstr &Root,
3906  unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
3907  const TargetRegisterClass *RC) {
3908  assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3909 
3910  MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
3911  unsigned ResultReg = Root.getOperand(0).getReg();
3912  unsigned SrcReg0 = MUL->getOperand(1).getReg();
3913  bool Src0IsKill = MUL->getOperand(1).isKill();
3914  unsigned SrcReg1 = MUL->getOperand(2).getReg();
3915  bool Src1IsKill = MUL->getOperand(2).isKill();
3916 
3918  MRI.constrainRegClass(ResultReg, RC);
3920  MRI.constrainRegClass(SrcReg0, RC);
3922  MRI.constrainRegClass(SrcReg1, RC);
3924  MRI.constrainRegClass(VR, RC);
3925 
3926  MachineInstrBuilder MIB =
3927  BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3928  .addReg(SrcReg0, getKillRegState(Src0IsKill))
3929  .addReg(SrcReg1, getKillRegState(Src1IsKill))
3930  .addReg(VR);
3931  // Insert the MADD
3932  InsInstrs.push_back(MIB);
3933  return MUL;
3934 }
3935 
3936 /// When getMachineCombinerPatterns() finds potential patterns,
3937 /// this function generates the instructions that could replace the
3938 /// original code sequence
3940  MachineInstr &Root, MachineCombinerPattern Pattern,
3943  DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
3944  MachineBasicBlock &MBB = *Root.getParent();
3945  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3946  MachineFunction &MF = *MBB.getParent();
3947  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
3948 
3949  MachineInstr *MUL;
3950  const TargetRegisterClass *RC;
3951  unsigned Opc;
3952  switch (Pattern) {
3953  default:
3954  // Reassociate instructions.
3955  TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
3956  DelInstrs, InstrIdxForVirtReg);
3957  return;
3960  // MUL I=A,B,0
3961  // ADD R,I,C
3962  // ==> MADD R,A,B,C
3963  // --- Create(MADD);
3964  if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
3965  Opc = AArch64::MADDWrrr;
3966  RC = &AArch64::GPR32RegClass;
3967  } else {
3968  Opc = AArch64::MADDXrrr;
3969  RC = &AArch64::GPR64RegClass;
3970  }
3971  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
3972  break;
3975  // MUL I=A,B,0
3976  // ADD R,C,I
3977  // ==> MADD R,A,B,C
3978  // --- Create(MADD);
3979  if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
3980  Opc = AArch64::MADDWrrr;
3981  RC = &AArch64::GPR32RegClass;
3982  } else {
3983  Opc = AArch64::MADDXrrr;
3984  RC = &AArch64::GPR64RegClass;
3985  }
3986  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3987  break;
3990  // MUL I=A,B,0
3991  // ADD R,I,Imm
3992  // ==> ORR V, ZR, Imm
3993  // ==> MADD R,A,B,V
3994  // --- Create(MADD);
3995  const TargetRegisterClass *OrrRC;
3996  unsigned BitSize, OrrOpc, ZeroReg;
3997  if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
3998  OrrOpc = AArch64::ORRWri;
3999  OrrRC = &AArch64::GPR32spRegClass;
4000  BitSize = 32;
4001  ZeroReg = AArch64::WZR;
4002  Opc = AArch64::MADDWrrr;
4003  RC = &AArch64::GPR32RegClass;
4004  } else {
4005  OrrOpc = AArch64::ORRXri;
4006  OrrRC = &AArch64::GPR64spRegClass;
4007  BitSize = 64;
4008  ZeroReg = AArch64::XZR;
4009  Opc = AArch64::MADDXrrr;
4010  RC = &AArch64::GPR64RegClass;
4011  }
4012  unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4013  uint64_t Imm = Root.getOperand(2).getImm();
4014 
4015  if (Root.getOperand(3).isImm()) {
4016  unsigned Val = Root.getOperand(3).getImm();
4017  Imm = Imm << Val;
4018  }
4019  uint64_t UImm = SignExtend64(Imm, BitSize);
4020  uint64_t Encoding;
4021  if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4022  MachineInstrBuilder MIB1 =
4023  BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4024  .addReg(ZeroReg)
4025  .addImm(Encoding);
4026  InsInstrs.push_back(MIB1);
4027  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4028  MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4029  }
4030  break;
4031  }
4034  // MUL I=A,B,0
4035  // SUB R,I, C
4036  // ==> SUB V, 0, C
4037  // ==> MADD R,A,B,V // = -C + A*B
4038  // --- Create(MADD);
4039  const TargetRegisterClass *SubRC;
4040  unsigned SubOpc, ZeroReg;
4041  if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
4042  SubOpc = AArch64::SUBWrr;
4043  SubRC = &AArch64::GPR32spRegClass;
4044  ZeroReg = AArch64::WZR;
4045  Opc = AArch64::MADDWrrr;
4046  RC = &AArch64::GPR32RegClass;
4047  } else {
4048  SubOpc = AArch64::SUBXrr;
4049  SubRC = &AArch64::GPR64spRegClass;
4050  ZeroReg = AArch64::XZR;
4051  Opc = AArch64::MADDXrrr;
4052  RC = &AArch64::GPR64RegClass;
4053  }
4054  unsigned NewVR = MRI.createVirtualRegister(SubRC);
4055  // SUB NewVR, 0, C
4056  MachineInstrBuilder MIB1 =
4057  BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
4058  .addReg(ZeroReg)
4059  .add(Root.getOperand(2));
4060  InsInstrs.push_back(MIB1);
4061  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4062  MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4063  break;
4064  }
4067  // MUL I=A,B,0
4068  // SUB R,C,I
4069  // ==> MSUB R,A,B,C (computes C - A*B)
4070  // --- Create(MSUB);
4071  if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
4072  Opc = AArch64::MSUBWrrr;
4073  RC = &AArch64::GPR32RegClass;
4074  } else {
4075  Opc = AArch64::MSUBXrrr;
4076  RC = &AArch64::GPR64RegClass;
4077  }
4078  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4079  break;
4082  // MUL I=A,B,0
4083  // SUB R,I, Imm
4084  // ==> ORR V, ZR, -Imm
4085  // ==> MADD R,A,B,V // = -Imm + A*B
4086  // --- Create(MADD);
4087  const TargetRegisterClass *OrrRC;
4088  unsigned BitSize, OrrOpc, ZeroReg;
4089  if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
4090  OrrOpc = AArch64::ORRWri;
4091  OrrRC = &AArch64::GPR32spRegClass;
4092  BitSize = 32;
4093  ZeroReg = AArch64::WZR;
4094  Opc = AArch64::MADDWrrr;
4095  RC = &AArch64::GPR32RegClass;
4096  } else {
4097  OrrOpc = AArch64::ORRXri;
4098  OrrRC = &AArch64::GPR64spRegClass;
4099  BitSize = 64;
4100  ZeroReg = AArch64::XZR;
4101  Opc = AArch64::MADDXrrr;
4102  RC = &AArch64::GPR64RegClass;
4103  }
4104  unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4105  uint64_t Imm = Root.getOperand(2).getImm();
4106  if (Root.getOperand(3).isImm()) {
4107  unsigned Val = Root.getOperand(3).getImm();
4108  Imm = Imm << Val;
4109  }
4110  uint64_t UImm = SignExtend64(-Imm, BitSize);
4111  uint64_t Encoding;
4112  if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4113  MachineInstrBuilder MIB1 =
4114  BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4115  .addReg(ZeroReg)
4116  .addImm(Encoding);
4117  InsInstrs.push_back(MIB1);
4118  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4119  MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4120  }
4121  break;
4122  }
4123  // Floating Point Support
4126  // MUL I=A,B,0
4127  // ADD R,I,C
4128  // ==> MADD R,A,B,C
4129  // --- Create(MADD);
4130  if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
4131  Opc = AArch64::FMADDSrrr;
4132  RC = &AArch64::FPR32RegClass;
4133  } else {
4134  Opc = AArch64::FMADDDrrr;
4135  RC = &AArch64::FPR64RegClass;
4136  }
4137  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4138  break;
4141  // FMUL I=A,B,0
4142  // FADD R,C,I
4143  // ==> FMADD R,A,B,C
4144  // --- Create(FMADD);
4145  if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
4146  Opc = AArch64::FMADDSrrr;
4147  RC = &AArch64::FPR32RegClass;
4148  } else {
4149  Opc = AArch64::FMADDDrrr;
4150  RC = &AArch64::FPR64RegClass;
4151  }
4152  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4153  break;
4154 
4156  Opc = AArch64::FMLAv1i32_indexed;
4157  RC = &AArch64::FPR32RegClass;
4158  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4160  break;
4162  Opc = AArch64::FMLAv1i32_indexed;
4163  RC = &AArch64::FPR32RegClass;
4164  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4166  break;
4167 
4169  Opc = AArch64::FMLAv1i64_indexed;
4170  RC = &AArch64::FPR64RegClass;
4171  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4173  break;
4175  Opc = AArch64::FMLAv1i64_indexed;
4176  RC = &AArch64::FPR64RegClass;
4177  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4179  break;
4180 
4183  RC = &AArch64::FPR64RegClass;
4185  Opc = AArch64::FMLAv2i32_indexed;
4186  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4188  } else {
4189  Opc = AArch64::FMLAv2f32;
4190  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4192  }
4193  break;
4196  RC = &AArch64::FPR64RegClass;
4198  Opc = AArch64::FMLAv2i32_indexed;
4199  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4201  } else {
4202  Opc = AArch64::FMLAv2f32;
4203  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4205  }
4206  break;
4207 
4210  RC = &AArch64::FPR128RegClass;
4212  Opc = AArch64::FMLAv2i64_indexed;
4213  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4215  } else {
4216  Opc = AArch64::FMLAv2f64;
4217  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4219  }
4220  break;
4223  RC = &AArch64::FPR128RegClass;
4225  Opc = AArch64::FMLAv2i64_indexed;
4226  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4228  } else {
4229  Opc = AArch64::FMLAv2f64;
4230  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4232  }
4233  break;
4234 
4237  RC = &AArch64::FPR128RegClass;
4239  Opc = AArch64::FMLAv4i32_indexed;
4240  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4242  } else {
4243  Opc = AArch64::FMLAv4f32;
4244  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4246  }
4247  break;
4248 
4251  RC = &AArch64::FPR128RegClass;
4253  Opc = AArch64::FMLAv4i32_indexed;
4254  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4256  } else {
4257  Opc = AArch64::FMLAv4f32;
4258  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4260  }
4261  break;
4262 
4265  // FMUL I=A,B,0
4266  // FSUB R,I,C
4267  // ==> FNMSUB R,A,B,C // = -C + A*B
4268  // --- Create(FNMSUB);
4269  if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
4270  Opc = AArch64::FNMSUBSrrr;
4271  RC = &AArch64::FPR32RegClass;
4272  } else {
4273  Opc = AArch64::FNMSUBDrrr;
4274  RC = &AArch64::FPR64RegClass;
4275  }
4276  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4277  break;
4278  }
4279 
4282  // FNMUL I=A,B,0
4283  // FSUB R,I,C
4284  // ==> FNMADD R,A,B,C // = -A*B - C
4285  // --- Create(FNMADD);
4286  if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) {
4287  Opc = AArch64::FNMADDSrrr;
4288  RC = &AArch64::FPR32RegClass;
4289  } else {
4290  Opc = AArch64::FNMADDDrrr;
4291  RC = &AArch64::FPR64RegClass;
4292  }
4293  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4294  break;
4295  }
4296 
4299  // FMUL I=A,B,0
4300  // FSUB R,C,I
4301  // ==> FMSUB R,A,B,C (computes C - A*B)
4302  // --- Create(FMSUB);
4303  if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
4304  Opc = AArch64::FMSUBSrrr;
4305  RC = &AArch64::FPR32RegClass;
4306  } else {
4307  Opc = AArch64::FMSUBDrrr;
4308  RC = &AArch64::FPR64RegClass;
4309  }
4310  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4311  break;
4312  }
4313 
4315  Opc = AArch64::FMLSv1i32_indexed;
4316  RC = &AArch64::FPR32RegClass;
4317  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4319  break;
4320 
4322  Opc = AArch64::FMLSv1i64_indexed;
4323  RC = &AArch64::FPR64RegClass;
4324  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4326  break;
4327 
4330  RC = &AArch64::FPR64RegClass;
4332  Opc = AArch64::FMLSv2i32_indexed;
4333  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4335  } else {
4336  Opc = AArch64::FMLSv2f32;
4337  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4339  }
4340  break;
4341 
4344  RC = &AArch64::FPR128RegClass;
4346  Opc = AArch64::FMLSv2i64_indexed;
4347  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4349  } else {
4350  Opc = AArch64::FMLSv2f64;
4351  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4353  }
4354  break;
4355 
4358  RC = &AArch64::FPR128RegClass;
4360  Opc = AArch64::FMLSv4i32_indexed;
4361  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4363  } else {
4364  Opc = AArch64::FMLSv4f32;
4365  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4367  }
4368  break;
4371  RC = &AArch64::FPR64RegClass;
4372  unsigned NewVR = MRI.createVirtualRegister(RC);
4373  MachineInstrBuilder MIB1 =
4374  BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
4375  .add(Root.getOperand(2));
4376  InsInstrs.push_back(MIB1);
4377  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4379  Opc = AArch64::FMLAv2i32_indexed;
4380  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4381  FMAInstKind::Indexed, &NewVR);
4382  } else {
4383  Opc = AArch64::FMLAv2f32;
4384  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4385  FMAInstKind::Accumulator, &NewVR);
4386  }
4387  break;
4388  }
4391  RC = &AArch64::FPR128RegClass;
4392  unsigned NewVR = MRI.createVirtualRegister(RC);
4393  MachineInstrBuilder MIB1 =
4394  BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
4395  .add(Root.getOperand(2));
4396  InsInstrs.push_back(MIB1);
4397  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4399  Opc = AArch64::FMLAv4i32_indexed;
4400  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4401  FMAInstKind::Indexed, &NewVR);
4402  } else {
4403  Opc = AArch64::FMLAv4f32;
4404  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4405  FMAInstKind::Accumulator, &NewVR);
4406  }
4407  break;
4408  }
4411  RC = &AArch64::FPR128RegClass;
4412  unsigned NewVR = MRI.createVirtualRegister(RC);
4413  MachineInstrBuilder MIB1 =
4414  BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
4415  .add(Root.getOperand(2));
4416  InsInstrs.push_back(MIB1);
4417  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4419  Opc = AArch64::FMLAv2i64_indexed;
4420  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4421  FMAInstKind::Indexed, &NewVR);
4422  } else {
4423  Opc = AArch64::FMLAv2f64;
4424  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4425  FMAInstKind::Accumulator, &NewVR);
4426  }
4427  break;
4428  }
4429  } // end switch (Pattern)
4430  // Record MUL and ADD/SUB for deletion
4431  DelInstrs.push_back(MUL);
4432  DelInstrs.push_back(&Root);
4433 }
4434 
4435 /// \brief Replace csincr-branch sequence by simple conditional branch
4436 ///
4437 /// Examples:
4438 /// 1. \code
4439 /// csinc w9, wzr, wzr, <condition code>
4440 /// tbnz w9, #0, 0x44
4441 /// \endcode
4442 /// to
4443 /// \code
4444 /// b.<inverted condition code>
4445 /// \endcode
4446 ///
4447 /// 2. \code
4448 /// csinc w9, wzr, wzr, <condition code>
4449 /// tbz w9, #0, 0x44
4450 /// \endcode
4451 /// to
4452 /// \code
4453 /// b.<condition code>
4454 /// \endcode
4455 ///
4456 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
4457 /// compare's constant operand is power of 2.
4458 ///
4459 /// Examples:
4460 /// \code
4461 /// and w8, w8, #0x400
4462 /// cbnz w8, L1
4463 /// \endcode
4464 /// to
4465 /// \code
4466 /// tbnz w8, #10, L1
4467 /// \endcode
4468 ///
4469 /// \param MI Conditional Branch
4470 /// \return True when the simple conditional branch is generated
4471 ///
4473  bool IsNegativeBranch = false;
4474  bool IsTestAndBranch = false;
4475  unsigned TargetBBInMI = 0;
4476  switch (MI.getOpcode()) {
4477  default:
4478  llvm_unreachable("Unknown branch instruction?");
4479  case AArch64::Bcc:
4480  return false;
4481  case AArch64::CBZW:
4482  case AArch64::CBZX:
4483  TargetBBInMI = 1;
4484  break;
4485  case AArch64::CBNZW:
4486  case AArch64::CBNZX:
4487  TargetBBInMI = 1;
4488  IsNegativeBranch = true;
4489  break;
4490  case AArch64::TBZW:
4491  case AArch64::TBZX:
4492  TargetBBInMI = 2;
4493  IsTestAndBranch = true;
4494  break;
4495  case AArch64::TBNZW:
4496  case AArch64::TBNZX:
4497  TargetBBInMI = 2;
4498  IsNegativeBranch = true;
4499  IsTestAndBranch = true;
4500  break;
4501  }
4502  // So we increment a zero register and test for bits other
4503  // than bit 0? Conservatively bail out in case the verifier
4504  // missed this case.
4505  if (IsTestAndBranch && MI.getOperand(1).getImm())
4506  return false;
4507 
4508  // Find Definition.
4509  assert(MI.getParent() && "Incomplete machine instruciton\n");
4510  MachineBasicBlock *MBB = MI.getParent();
4511  MachineFunction *MF = MBB->getParent();
4512  MachineRegisterInfo *MRI = &MF->getRegInfo();
4513  unsigned VReg = MI.getOperand(0).getReg();
4515  return false;
4516 
4517  MachineInstr *DefMI = MRI->getVRegDef(VReg);
4518 
4519  // Look through COPY instructions to find definition.
4520  while (DefMI->isCopy()) {
4521  unsigned CopyVReg = DefMI->getOperand(1).getReg();
4522  if (!MRI->hasOneNonDBGUse(CopyVReg))
4523  return false;
4524  if (!MRI->hasOneDef(CopyVReg))
4525  return false;
4526  DefMI = MRI->getVRegDef(CopyVReg);
4527  }
4528 
4529  switch (DefMI->getOpcode()) {
4530  default:
4531  return false;
4532  // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
4533  case AArch64::ANDWri:
4534  case AArch64::ANDXri: {
4535  if (IsTestAndBranch)
4536  return false;
4537  if (DefMI->getParent() != MBB)
4538  return false;
4539  if (!MRI->hasOneNonDBGUse(VReg))
4540  return false;
4541 
4542  bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
4544  DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
4545  if (!isPowerOf2_64(Mask))
4546  return false;
4547 
4548  MachineOperand &MO = DefMI->getOperand(1);
4549  unsigned NewReg = MO.getReg();
4551  return false;
4552 
4553  assert(!MRI->def_empty(NewReg) && "Register must be defined.");
4554 
4555  MachineBasicBlock &RefToMBB = *MBB;
4556  MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
4557  DebugLoc DL = MI.getDebugLoc();
4558  unsigned Imm = Log2_64(Mask);
4559  unsigned Opc = (Imm < 32)
4560  ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
4561  : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
4562  MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
4563  .addReg(NewReg)
4564  .addImm(Imm)
4565  .addMBB(TBB);
4566  // Register lives on to the CBZ now.
4567  MO.setIsKill(false);
4568 
4569  // For immediate smaller than 32, we need to use the 32-bit
4570  // variant (W) in all cases. Indeed the 64-bit variant does not
4571  // allow to encode them.
4572  // Therefore, if the input register is 64-bit, we need to take the
4573  // 32-bit sub-part.
4574  if (!Is32Bit && Imm < 32)
4575  NewMI->getOperand(0).setSubReg(AArch64::sub_32);
4576  MI.eraseFromParent();
4577  return true;
4578  }
4579  // Look for CSINC
4580  case AArch64::CSINCWr:
4581  case AArch64::CSINCXr: {
4582  if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
4583  DefMI->getOperand(2).getReg() == AArch64::WZR) &&
4584  !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
4585  DefMI->getOperand(2).getReg() == AArch64::XZR))
4586  return false;
4587 
4588  if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
4589  return false;
4590 
4592  // Convert only when the condition code is not modified between
4593  // the CSINC and the branch. The CC may be used by other
4594  // instructions in between.
4596  return false;
4597  MachineBasicBlock &RefToMBB = *MBB;
4598  MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
4599  DebugLoc DL = MI.getDebugLoc();
4600  if (IsNegativeBranch)
4602  BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
4603  MI.eraseFromParent();
4604  return true;
4605  }
4606  }
4607 }
4608 
4609 std::pair<unsigned, unsigned>
4611  const unsigned Mask = AArch64II::MO_FRAGMENT;
4612  return std::make_pair(TF & Mask, TF & ~Mask);
4613 }
4614 
4617  using namespace AArch64II;
4618 
4619  static const std::pair<unsigned, const char *> TargetFlags[] = {
4620  {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
4621  {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
4622  {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
4623  {MO_HI12, "aarch64-hi12"}};
4624  return makeArrayRef(TargetFlags);
4625 }
4626 
4629  using namespace AArch64II;
4630 
4631  static const std::pair<unsigned, const char *> TargetFlags[] = {
4632  {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"}, {MO_TLS, "aarch64-tls"}};
4633  return makeArrayRef(TargetFlags);
4634 }
4635 
4638  static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
4639  {{MOSuppressPair, "aarch64-suppress-pair"},
4640  {MOStridedAccess, "aarch64-strided-access"}};
4641  return makeArrayRef(TargetFlags);
4642 }
4643 
4644  /// Constants defining how certain sequences should be outlined.
4645  /// This encompasses how an outlined function should be called, and what kind of
4646  /// frame should be emitted for that outlined function.
4647  ///
4648  /// \p MachineOutlinerDefault implies that the function should be called with
4649  /// a save and restore of LR to the stack.
4650  ///
4651  /// That is,
4652  ///
4653  /// I1 Save LR OUTLINED_FUNCTION:
4654  /// I2 --> BL OUTLINED_FUNCTION I1
4655  /// I3 Restore LR I2
4656  /// I3
4657  /// RET
4658  ///
4659  /// * Call construction overhead: 3 (save + BL + restore)
4660  /// * Frame construction overhead: 1 (ret)
4661  /// * Requires stack fixups? Yes
4662  ///
4663  /// \p MachineOutlinerTailCall implies that the function is being created from
4664  /// a sequence of instructions ending in a return.
4665  ///
4666  /// That is,
4667  ///
4668  /// I1 OUTLINED_FUNCTION:
4669  /// I2 --> B OUTLINED_FUNCTION I1
4670  /// RET I2
4671  /// RET
4672  ///
4673  /// * Call construction overhead: 1 (B)
4674  /// * Frame construction overhead: 0 (Return included in sequence)
4675  /// * Requires stack fixups? No
4676  ///
4677  /// \p MachineOutlinerNoLRSave implies that the function should be called using
4678  /// a BL instruction, but doesn't require LR to be saved and restored. This
4679  /// happens when LR is known to be dead.
4680  ///
4681  /// That is,
4682  ///
4683  /// I1 OUTLINED_FUNCTION:
4684  /// I2 --> BL OUTLINED_FUNCTION I1
4685  /// I3 I2
4686  /// I3
4687  /// RET
4688  ///
4689  /// * Call construction overhead: 1 (BL)
4690  /// * Frame construction overhead: 1 (RET)
4691  /// * Requires stack fixups? No
4692  ///
4694  MachineOutlinerDefault, /// Emit a save, restore, call, and return.
4695  MachineOutlinerTailCall, /// Only emit a branch.
4696  MachineOutlinerNoLRSave /// Emit a call and return.
4697 };
4698 
4701  HasCalls = 0x4
4702 };
4703 
4705  MachineBasicBlock::iterator &CallInsertionPt) const {
4706  // Was LR saved in the function containing this basic block?
4707  MachineBasicBlock &MBB = *(CallInsertionPt->getParent());
4709  LRU.addLiveOuts(MBB);
4710 
4711  // Get liveness information from the end of the block to the end of the
4712  // prospective outlined region.
4713  std::for_each(MBB.rbegin(),
4714  (MachineBasicBlock::reverse_iterator)CallInsertionPt,
4715  [&LRU](MachineInstr &MI) { LRU.stepBackward(MI); });
4716 
4717  // If the link register is available at this point, then we can safely outline
4718  // the region without saving/restoring LR. Otherwise, we must emit a save and
4719  // restore.
4720  return LRU.available(AArch64::LR);
4721 }
4722 
4723 AArch64GenInstrInfo::MachineOutlinerInfo
4725  std::vector<
4726  std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
4727  &RepeatedSequenceLocs) const {
4728 
4729  unsigned CallID = MachineOutlinerDefault;
4730  unsigned FrameID = MachineOutlinerDefault;
4731  unsigned NumInstrsForCall = 3;
4732  unsigned NumInstrsToCreateFrame = 1;
4733 
4734  auto DoesntNeedLRSave =
4735  [this](std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>
4736  &I) { return canOutlineWithoutLRSave(I.second); };
4737 
4738  // If the last instruction in any candidate is a terminator, then we should
4739  // tail call all of the candidates.
4740  if (RepeatedSequenceLocs[0].second->isTerminator()) {
4741  CallID = MachineOutlinerTailCall;
4742  FrameID = MachineOutlinerTailCall;
4743  NumInstrsForCall = 1;
4744  NumInstrsToCreateFrame = 0;
4745  }
4746 
4747  else if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
4748  DoesntNeedLRSave)) {
4749  CallID = MachineOutlinerNoLRSave;
4750  FrameID = MachineOutlinerNoLRSave;
4751  NumInstrsForCall = 1;
4752  NumInstrsToCreateFrame = 1;
4753  }
4754 
4755  // Check if the range contains a call. These require a save + restore of the
4756  // link register.
4757  if (std::any_of(RepeatedSequenceLocs[0].first, RepeatedSequenceLocs[0].second,
4758  [](const MachineInstr &MI) { return MI.isCall(); }))
4759  NumInstrsToCreateFrame += 2; // Save + restore the link register.
4760 
4761  // Handle the last instruction separately. If this is a tail call, then the
4762  // last instruction is a call. We don't want to save + restore in this case.
4763  // However, it could be possible that the last instruction is a call without
4764  // it being valid to tail call this sequence. We should consider this as well.
4765  else if (RepeatedSequenceLocs[0].second->isCall() &&
4766  FrameID != MachineOutlinerTailCall)
4767  NumInstrsToCreateFrame += 2;
4768 
4769  return MachineOutlinerInfo(NumInstrsForCall, NumInstrsToCreateFrame, CallID,
4770  FrameID);
4771 }
4772 
4774  MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
4775  const Function &F = MF.getFunction();
4776 
4777  // If F uses a redzone, then don't outline from it because it might mess up
4778  // the stack.
4779  if (!F.hasFnAttribute(Attribute::NoRedZone))
4780  return false;
4781 
4782  // Can F be deduplicated by the linker? If it can, don't outline from it.
4783  if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
4784  return false;
4785 
4786  return true;
4787 }
4788 
4789 unsigned
4791  unsigned Flags = 0x0;
4792  // Check if there's a call inside this MachineBasicBlock. If there is, then
4793  // set a flag.
4794  if (std::any_of(MBB.begin(), MBB.end(),
4795  [](MachineInstr &MI) { return MI.isCall(); }))
4797 
4798  // Check if LR is available through all of the MBB. If it's not, then set
4799  // a flag.
4801  LRU.addLiveOuts(MBB);
4802 
4803  std::for_each(MBB.rbegin(),
4804  MBB.rend(),
4805  [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
4806 
4807  if (!LRU.available(AArch64::LR))
4809 
4810  return Flags;
4811 }
4812 
4813 AArch64GenInstrInfo::MachineOutlinerInstrType
4815  unsigned Flags) const {
4816  MachineInstr &MI = *MIT;
4817  MachineBasicBlock *MBB = MI.getParent();
4818  MachineFunction *MF = MBB->getParent();
4819  AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
4820 
4821  // Don't outline LOHs.
4822  if (FuncInfo->getLOHRelated().count(&MI))
4823  return MachineOutlinerInstrType::Illegal;
4824 
4825  // Don't allow debug values to impact outlining type.
4826  if (MI.isDebugValue() || MI.isIndirectDebugValue())
4827  return MachineOutlinerInstrType::Invisible;
4828 
4829  // Is this a terminator for a basic block?
4830  if (MI.isTerminator()) {
4831 
4832  // Is this the end of a function?
4833  if (MI.getParent()->succ_empty())
4834  return MachineOutlinerInstrType::Legal;
4835 
4836  // It's not, so don't outline it.
4837  return MachineOutlinerInstrType::Illegal;
4838  }
4839 
4840  // Special cases for instructions that can always be outlined, but will fail
4841  // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
4842  // be outlined because they don't require a *specific* value to be in LR.
4843  if (MI.getOpcode() == AArch64::ADRP)
4844  return MachineOutlinerInstrType::Legal;
4845 
4846  // Outline calls without stack parameters or aggregate parameters.
4847  if (MI.isCall()) {
4848  const Module *M = MF->getFunction().getParent();
4849  assert(M && "No module?");
4850 
4851  // Get the function associated with the call. Look at each operand and find
4852  // the one that represents the callee and get its name.
4853  Function *Callee = nullptr;
4854  for (const MachineOperand &MOP : MI.operands()) {
4855  if (MOP.isSymbol()) {
4856  Callee = M->getFunction(MOP.getSymbolName());
4857  break;
4858  }
4859 
4860  else if (MOP.isGlobal()) {
4861  Callee = M->getFunction(MOP.getGlobal()->getGlobalIdentifier());
4862  break;
4863  }
4864  }
4865 
4866  // Only handle functions that we have information about.
4867  if (!Callee)
4868  return MachineOutlinerInstrType::Illegal;
4869 
4870  // We have a function we have information about. Check it if it's something
4871  // can safely outline.
4872 
4873  // If the callee is vararg, it passes parameters on the stack. Don't touch
4874  // it.
4875  // FIXME: Functions like printf are very common and we should be able to
4876  // outline them.
4877  if (Callee->isVarArg())
4878  return MachineOutlinerInstrType::Illegal;
4879 
4880  // Check if any of the arguments are a pointer to a struct. We don't want
4881  // to outline these since they might be loaded in two instructions.
4882  for (Argument &Arg : Callee->args()) {
4883  if (Arg.getType()->isPointerTy() &&
4885  return MachineOutlinerInstrType::Illegal;
4886  }
4887 
4888  // If the thing we're calling doesn't access memory at all, then we're good
4889  // to go.
4890  if (Callee->doesNotAccessMemory())
4891  return MachineOutlinerInstrType::Legal;
4892 
4893 
4894  // It accesses memory. Get the machine function for the callee to see if
4895  // it's safe to outline.
4896  MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
4897 
4898  // We don't know what's going on with the callee at all. Don't touch it.
4899  if (!CalleeMF)
4900  return MachineOutlinerInstrType::Illegal;
4901 
4902  // Does it pass anything on the stack? If it does, don't outline it.
4903  if (CalleeMF->getInfo<AArch64FunctionInfo>()->getBytesInStackArgArea() != 0)
4904  return MachineOutlinerInstrType::Illegal;
4905 
4906  // It doesn't, so it's safe to outline and we're done.
4907  return MachineOutlinerInstrType::Legal;
4908  }
4909 
4910  // Don't outline positions.
4911  if (MI.isPosition())
4912  return MachineOutlinerInstrType::Illegal;
4913 
4914  // Don't touch the link register or W30.
4915  if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
4916  MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
4917  return MachineOutlinerInstrType::Illegal;
4918 
4919  // Make sure none of the operands are un-outlinable.
4920  for (const MachineOperand &MOP : MI.operands()) {
4921  if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
4922  MOP.isTargetIndex())
4923  return MachineOutlinerInstrType::Illegal;
4924 
4925  // Don't outline anything that uses the link register.
4926  if (MOP.isReg() && getRegisterInfo().regsOverlap(MOP.getReg(), AArch64::LR))
4927  return MachineOutlinerInstrType::Illegal;
4928  }
4929 
4930  // Does this use the stack?
4931  if (MI.modifiesRegister(AArch64::SP, &RI) ||
4932  MI.readsRegister(AArch64::SP, &RI)) {
4933  // True if there is no chance that any outlined candidate from this range
4934  // could require stack fixups. That is, both
4935  // * LR is available in the range (No save/restore around call)
4936  // * The range doesn't include calls (No save/restore in outlined frame)
4937  // are true.
4938  bool MightNeedStackFixUp =
4941 
4942  // If this instruction is in a range where it *never* needs to be fixed
4943  // up, then we can *always* outline it. This is true even if it's not
4944  // possible to fix that instruction up.
4945  //
4946  // Why? Consider two equivalent instructions I1, I2 where both I1 and I2
4947  // use SP. Suppose that I1 sits within a range that definitely doesn't
4948  // need stack fixups, while I2 sits in a range that does.
4949  //
4950  // First, I1 can be outlined as long as we *never* fix up the stack in
4951  // any sequence containing it. I1 is already a safe instruction in the
4952  // original program, so as long as we don't modify it we're good to go.
4953  // So this leaves us with showing that outlining I2 won't break our
4954  // program.
4955  //
4956  // Suppose I1 and I2 belong to equivalent candidate sequences. When we
4957  // look at I2, we need to see if it can be fixed up. Suppose I2, (and
4958  // thus I1) cannot be fixed up. Then I2 will be assigned an unique
4959  // integer label; thus, I2 cannot belong to any candidate sequence (a
4960  // contradiction). Suppose I2 can be fixed up. Then I1 can be fixed up
4961  // as well, so we're good. Thus, I1 is always safe to outline.
4962  //
4963  // This gives us two things: first off, it buys us some more instructions
4964  // for our search space by deeming stack instructions illegal only when
4965  // they can't be fixed up AND we might have to fix them up. Second off,
4966  // This allows us to catch tricky instructions like, say,
4967  // %xi = ADDXri %sp, n, 0. We can't safely outline these since they might
4968  // be paired with later SUBXris, which might *not* end up being outlined.
4969  // If we mess with the stack to save something, then an ADDXri messes with
4970  // it *after*, then we aren't going to restore the right something from
4971  // the stack if we don't outline the corresponding SUBXri first. ADDXris and
4972  // SUBXris are extremely common in prologue/epilogue code, so supporting
4973  // them in the outliner can be a pretty big win!
4974  if (!MightNeedStackFixUp)
4975  return MachineOutlinerInstrType::Legal;
4976 
4977  // At this point, we have a stack instruction that we might need to fix
4978  // up. We'll handle it if it's a load or store.
4979  if (MI.mayLoadOrStore()) {
4980  unsigned Base; // Filled with the base regiser of MI.
4981  int64_t Offset; // Filled with the offset of MI.
4982  unsigned DummyWidth;
4983 
4984  // Does it allow us to offset the base register and is the base SP?
4985  if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) ||
4986  Base != AArch64::SP)
4987  return MachineOutlinerInstrType::Illegal;
4988 
4989  // Find the minimum/maximum offset for this instruction and check if
4990  // fixing it up would be in range.
4991  int64_t MinOffset, MaxOffset; // Unscaled offsets for the instruction.
4992  unsigned Scale; // The scale to multiply the offsets by.
4993  getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
4994 
4995  // TODO: We should really test what happens if an instruction overflows.
4996  // This is tricky to test with IR tests, but when the outliner is moved
4997  // to a MIR test, it really ought to be checked.
4998  Offset += 16; // Update the offset to what it would be if we outlined.
4999  if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
5000  return MachineOutlinerInstrType::Illegal;
5001 
5002  // It's in range, so we can outline it.
5003  return MachineOutlinerInstrType::Legal;
5004  }
5005 
5006  // We can't fix it up, so don't outline it.
5007  return MachineOutlinerInstrType::Illegal;
5008  }
5009 
5010  return MachineOutlinerInstrType::Legal;
5011 }
5012 
5013 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
5014  for (MachineInstr &MI : MBB) {
5015  unsigned Base, Width;
5016  int64_t Offset;
5017 
5018  // Is this a load or store with an immediate offset with SP as the base?
5019  if (!MI.mayLoadOrStore() ||
5020  !getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Width, &RI) ||
5021  Base != AArch64::SP)
5022  continue;
5023 
5024  // It is, so we have to fix it up.
5025  unsigned Scale;
5026  int64_t Dummy1, Dummy2;
5027 
5028  MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
5029  assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
5030  getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
5031  assert(Scale != 0 && "Unexpected opcode!");
5032 
5033  // We've pushed the return address to the stack, so add 16 to the offset.
5034  // This is safe, since we already checked if it would overflow when we
5035  // checked if this instruction was legal to outline.
5036  int64_t NewImm = (Offset + 16) / Scale;
5037  StackOffsetOperand.setImm(NewImm);
5038  }
5039 }
5040 
5043  const MachineOutlinerInfo &MInfo) const {
5044 
5045  // Is there a call in the outlined range?
5046  if (std::any_of(MBB.instr_begin(), MBB.instr_end(),
5047  [](MachineInstr &MI) { return MI.isCall(); })) {
5048  // Fix up the instructions in the range, since we're going to modify the
5049  // stack.
5050  fixupPostOutline(MBB);
5051 
5052  // LR has to be a live in so that we can save it.
5053  MBB.addLiveIn(AArch64::LR);
5054 
5056  MachineBasicBlock::iterator Et = MBB.end();
5057 
5058  if (MInfo.FrameConstructionID == MachineOutlinerTailCall)
5059  Et = std::prev(MBB.end());
5060 
5061  // Insert a save before the outlined region
5062  MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
5063  .addReg(AArch64::SP, RegState::Define)
5064  .addReg(AArch64::LR)
5065  .addReg(AArch64::SP)
5066  .addImm(-16);
5067  It = MBB.insert(It, STRXpre);
5068 
5069  // Insert a restore before the terminator for the function.
5070  MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
5071  .addReg(AArch64::SP, RegState::Define)
5072  .addReg(AArch64::LR, RegState::Define)
5073  .addReg(AArch64::SP)
5074  .addImm(16);
5075  Et = MBB.insert(Et, LDRXpost);
5076  }
5077 
5078  // If this is a tail call outlined function, then there's already a return.
5079  if (MInfo.FrameConstructionID == MachineOutlinerTailCall)
5080  return;
5081 
5082  // It's not a tail call, so we have to insert the return ourselves.
5083  MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
5084  .addReg(AArch64::LR, RegState::Undef);
5085  MBB.insert(MBB.end(), ret);
5086 
5087  // Did we have to modify the stack by saving the link register?
5088  if (MInfo.FrameConstructionID == MachineOutlinerNoLRSave)
5089  return;
5090 
5091  // We modified the stack.
5092  // Walk over the basic block and fix up all the stack accesses.
5093  fixupPostOutline(MBB);
5094 }
5095 
5098  const MachineOutlinerInfo &MInfo) const {}
5099 
5102  MachineFunction &MF, const MachineOutlinerInfo &MInfo) const {
5103 
5104  // Are we tail calling?
5105  if (MInfo.CallConstructionID == MachineOutlinerTailCall) {
5106  // If yes, then we can just branch to the label.
5107  It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::B))
5108  .addGlobalAddress(M.getNamedValue(MF.getName())));
5109  return It;
5110  }
5111 
5112  // Are we saving the link register?
5113  if (MInfo.CallConstructionID == MachineOutlinerNoLRSave) {
5114  // No, so just insert the call.
5115  It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
5116  .addGlobalAddress(M.getNamedValue(MF.getName())));
5117  return It;
5118  }
5119 
5120  // We have a default call. Save the link register.
5121  MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
5122  .addReg(AArch64::SP, RegState::Define)
5123  .addReg(AArch64::LR)
5124  .addReg(AArch64::SP)
5125  .addImm(-16);
5126  It = MBB.insert(It, STRXpre);
5127  It++;
5128 
5129  // Insert the call.
5130  It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
5131  .addGlobalAddress(M.getNamedValue(MF.getName())));
5132 
5133  It++;
5134 
5135  // Restore the link register.
5136  MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
5137  .addReg(AArch64::SP, RegState::Define)
5138  .addReg(AArch64::LR, RegState::Define)
5139  .addReg(AArch64::SP)
5140  .addImm(16);
5141  It = MBB.insert(It, LDRXpost);
5142 
5143  return It;
5144 }
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:158
uint64_t CallInst * C
ArrayRef< std::pair< unsigned, const char * > > getSerializableBitmaskMachineOperandTargetFlags() const override
const MachineInstrBuilder & add(const MachineOperand &MO) const
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
virtual void genAlternativeCodeSequence(MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl< MachineInstr *> &InsInstrs, SmallVectorImpl< MachineInstr *> &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address...
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
Definition: MachineInstr.h:968
instr_iterator instr_begin()
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:461
instr_iterator instr_end()
MachineBasicBlock * getMBB() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
bool isScaledAddr(const MachineInstr &MI) const
Return true if this is load/store scales or extends its register offset.
bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) const override
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
bool hasExtendedReg(const MachineInstr &MI) const
Returns true if there is an extendable register and that the extending value is non-zero.
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern > &Patterns)
Floating-Point Support.
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
bool isGPRCopy(const MachineInstr &MI) const
Does this instruction rename a GPR without modifying bits?
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:271
bool hasZeroCycleZeroing() const
static CondCode getInvertedCondCode(CondCode Code)
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
void setIsUndef(bool Val=true)
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static bool isSUBSRegImm(unsigned Opcode)
static bool processLogicalImmediate(uint64_t Imm, unsigned RegSize, uint64_t &Encoding)
processLogicalImmediate - Determine if an immediate value can be encoded as the immediate operand of ...
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr) const override
unsigned getSubReg() const
Offset can apply, at least partly.
const SetOfInstructions & getLOHRelated() const
Emit a save, restore, call, and return.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:652
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:270
unsigned second
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:814
A debug info location.
Definition: DebugLoc.h:34
F(f)
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address...
MachineModuleInfo & getMMI() const
bool hasOneDef(unsigned RegNo) const
Return true if there is exactly one operand defining the specified register.
iterator_range< mmo_iterator > memoperands()
Definition: MachineInstr.h:399
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:335
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode...
bool isFPRCopy(const MachineInstr &MI) const
Does this instruction rename an FPR without modifying bits?
bool hasCustomCheapAsMoveHandling() const
unsigned getSpillSize(const TargetRegisterClass &RC) const
Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
return AArch64::GPR64RegClass contains(Reg)
iterator_range< succ_iterator > successors()
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address...
static bool getMemDoShift(unsigned Imm)
getMemDoShift - Extract the "do shift" flag value for load/store instructions.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
A description of a memory reference used in the backend.
bool isAssociativeAndCommutative(const MachineInstr &Inst) const override
Return true when Inst is associative and commutative so that it can be reassociated.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
Type * getPointerElementType() const
Definition: Type.h:373
unsigned getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const override
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
const HexagonInstrInfo * TII
bool isStridedAccess(const MachineInstr &MI) const
Return true if the given load or store is a strided memory access.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize)
Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:452
const TargetRegisterClass * getRegClassConstraint(unsigned OpIdx, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
Compute the static register class constraint for operand OpIdx.
void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:296
bool getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width, int64_t &MinOffset, int64_t &MaxOffset) const
Returns true if opcode Opc is a memory operation.
bool hasShiftedReg(const MachineInstr &MI) const
Returns true if there is a shiftable register and that the shift value is non-zero.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned SubReg
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
bool useMachineCombiner() const override
AArch64 supports MachineCombiner.
Reg
All possible values of the reg field in the ModR/M byte.
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:477
static int getRegClass(RegisterKind Is, unsigned RegWidth)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:293
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
const char * getSymbolName() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
zlib-gnu style compression
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:635
static bool isCombineInstrCandidate64(unsigned Opc)
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
int isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
void RemoveOperand(unsigned i)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:290
AArch64InstrInfo(const AArch64Subtarget &STI)
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool isFullCopy() const
Definition: MachineInstr.h:864
void clearKillFlags(unsigned Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
static AArch64_AM::ShiftExtendType getMemExtendType(unsigned Imm)
getExtendType - Extract the extend type for the offset operand of loads/stores.
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
bool isUnscaledLdSt(unsigned Opc) const
Return true if this is an unscaled load/store.
GlobalValue * getNamedValue(StringRef Name) const
Return the global value in the module with the specified name, of arbitrary type. ...
Definition: Module.cpp:112
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern > &Patterns) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
iterator getLastNonDebugInstr()
Returns an iterator to the last non-debug instruction in the basic block, or end().
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, unsigned, unsigned, int &, int &, int &) const override
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc, unsigned ZeroReg=0, bool CheckZeroReg=false)
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
virtual const TargetInstrInfo * getInstrInfo() const
static const MachineMemOperand::Flags MOSuppressPair
bool shouldClusterMemOps(MachineInstr &FirstLdSt, unsigned BaseReg1, MachineInstr &SecondLdSt, unsigned BaseReg2, unsigned NumLoads) const override
Detect opportunities for ldp/stp formation.
bool isLdStPairSuppressed(const MachineInstr &MI) const
Return true if pairing the given load or store is hinted to be unprofitable.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
bool isExynosShiftLeftFast(const MachineInstr &MI) const
Returns true if the instruction has a shift left that can be executed more efficiently.
Definition: