LLVM  4.0.0
AArch64InstrInfo.cpp
Go to the documentation of this file.
1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the AArch64 implementation of the TargetInstrInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64InstrInfo.h"
15 #include "AArch64Subtarget.h"
17 #include "Utils/AArch64BaseInfo.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/CodeGen/StackMaps.h"
30 #include "llvm/IR/DebugLoc.h"
31 #include "llvm/IR/GlobalValue.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/CodeGen.h"
37 #include "llvm/Support/Compiler.h"
44 #include <cassert>
45 #include <cstdint>
46 #include <iterator>
47 #include <utility>
48 
49 using namespace llvm;
50 
51 #define GET_INSTRINFO_CTOR_DTOR
52 #include "AArch64GenInstrInfo.inc"
53 
56 
57 static cl::opt<unsigned>
58 TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
59  cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
60 
61 static cl::opt<unsigned>
62 CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
63  cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
64 
65 static cl::opt<unsigned>
66 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
67  cl::desc("Restrict range of Bcc instructions (DEBUG)"));
68 
70  : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
71  RI(STI.getTargetTriple()), Subtarget(STI) {}
72 
73 /// GetInstSize - Return the number of bytes of code the specified
74 /// instruction may be. This returns the maximum number of bytes.
76  const MachineBasicBlock &MBB = *MI.getParent();
77  const MachineFunction *MF = MBB.getParent();
78  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
79 
80  if (MI.getOpcode() == AArch64::INLINEASM)
81  return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
82 
83  // FIXME: We currently only handle pseudoinstructions that don't get expanded
84  // before the assembly printer.
85  unsigned NumBytes = 0;
86  const MCInstrDesc &Desc = MI.getDesc();
87  switch (Desc.getOpcode()) {
88  default:
89  // Anything not explicitly designated otherwise is a normal 4-byte insn.
90  NumBytes = 4;
91  break;
92  case TargetOpcode::DBG_VALUE:
94  case TargetOpcode::IMPLICIT_DEF:
95  case TargetOpcode::KILL:
96  NumBytes = 0;
97  break;
98  case TargetOpcode::STACKMAP:
99  // The upper bound for a stackmap intrinsic is the full length of its shadow
100  NumBytes = StackMapOpers(&MI).getNumPatchBytes();
101  assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
102  break;
103  case TargetOpcode::PATCHPOINT:
104  // The size of the patchpoint intrinsic is the number of bytes requested
105  NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
106  assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
107  break;
109  // This gets lowered to an instruction sequence which takes 16 bytes
110  NumBytes = 16;
111  break;
112  }
113 
114  return NumBytes;
115 }
116 
119  // Block ends with fall-through condbranch.
120  switch (LastInst->getOpcode()) {
121  default:
122  llvm_unreachable("Unknown branch instruction?");
123  case AArch64::Bcc:
124  Target = LastInst->getOperand(1).getMBB();
125  Cond.push_back(LastInst->getOperand(0));
126  break;
127  case AArch64::CBZW:
128  case AArch64::CBZX:
129  case AArch64::CBNZW:
130  case AArch64::CBNZX:
131  Target = LastInst->getOperand(1).getMBB();
133  Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
134  Cond.push_back(LastInst->getOperand(0));
135  break;
136  case AArch64::TBZW:
137  case AArch64::TBZX:
138  case AArch64::TBNZW:
139  case AArch64::TBNZX:
140  Target = LastInst->getOperand(2).getMBB();
142  Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
143  Cond.push_back(LastInst->getOperand(0));
144  Cond.push_back(LastInst->getOperand(1));
145  }
146 }
147 
148 static unsigned getBranchDisplacementBits(unsigned Opc) {
149  switch (Opc) {
150  default:
151  llvm_unreachable("unexpected opcode!");
152  case AArch64::B:
153  return 64;
154  case AArch64::TBNZW:
155  case AArch64::TBZW:
156  case AArch64::TBNZX:
157  case AArch64::TBZX:
158  return TBZDisplacementBits;
159  case AArch64::CBNZW:
160  case AArch64::CBZW:
161  case AArch64::CBNZX:
162  case AArch64::CBZX:
163  return CBZDisplacementBits;
164  case AArch64::Bcc:
165  return BCCDisplacementBits;
166  }
167 }
168 
170  int64_t BrOffset) const {
171  unsigned Bits = getBranchDisplacementBits(BranchOp);
172  assert(Bits >= 3 && "max branch displacement must be enough to jump"
173  "over conditional branch expansion");
174  return isIntN(Bits, BrOffset / 4);
175 }
176 
178  const MachineInstr &MI) const {
179  switch (MI.getOpcode()) {
180  default:
181  llvm_unreachable("unexpected opcode!");
182  case AArch64::B:
183  return MI.getOperand(0).getMBB();
184  case AArch64::TBZW:
185  case AArch64::TBNZW:
186  case AArch64::TBZX:
187  case AArch64::TBNZX:
188  return MI.getOperand(2).getMBB();
189  case AArch64::CBZW:
190  case AArch64::CBNZW:
191  case AArch64::CBZX:
192  case AArch64::CBNZX:
193  case AArch64::Bcc:
194  return MI.getOperand(1).getMBB();
195  }
196 }
197 
198 // Branch analysis.
200  MachineBasicBlock *&TBB,
201  MachineBasicBlock *&FBB,
203  bool AllowModify) const {
204  // If the block has no terminators, it just falls into the block after it.
206  if (I == MBB.end())
207  return false;
208 
209  if (!isUnpredicatedTerminator(*I))
210  return false;
211 
212  // Get the last instruction in the block.
213  MachineInstr *LastInst = &*I;
214 
215  // If there is only one terminator instruction, process it.
216  unsigned LastOpc = LastInst->getOpcode();
217  if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
218  if (isUncondBranchOpcode(LastOpc)) {
219  TBB = LastInst->getOperand(0).getMBB();
220  return false;
221  }
222  if (isCondBranchOpcode(LastOpc)) {
223  // Block ends with fall-through condbranch.
224  parseCondBranch(LastInst, TBB, Cond);
225  return false;
226  }
227  return true; // Can't handle indirect branch.
228  }
229 
230  // Get the instruction before it if it is a terminator.
231  MachineInstr *SecondLastInst = &*I;
232  unsigned SecondLastOpc = SecondLastInst->getOpcode();
233 
234  // If AllowModify is true and the block ends with two or more unconditional
235  // branches, delete all but the first unconditional branch.
236  if (AllowModify && isUncondBranchOpcode(LastOpc)) {
237  while (isUncondBranchOpcode(SecondLastOpc)) {
238  LastInst->eraseFromParent();
239  LastInst = SecondLastInst;
240  LastOpc = LastInst->getOpcode();
241  if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
242  // Return now the only terminator is an unconditional branch.
243  TBB = LastInst->getOperand(0).getMBB();
244  return false;
245  } else {
246  SecondLastInst = &*I;
247  SecondLastOpc = SecondLastInst->getOpcode();
248  }
249  }
250  }
251 
252  // If there are three terminators, we don't know what sort of block this is.
253  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
254  return true;
255 
256  // If the block ends with a B and a Bcc, handle it.
257  if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
258  parseCondBranch(SecondLastInst, TBB, Cond);
259  FBB = LastInst->getOperand(0).getMBB();
260  return false;
261  }
262 
263  // If the block ends with two unconditional branches, handle it. The second
264  // one is not executed, so remove it.
265  if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
266  TBB = SecondLastInst->getOperand(0).getMBB();
267  I = LastInst;
268  if (AllowModify)
269  I->eraseFromParent();
270  return false;
271  }
272 
273  // ...likewise if it ends with an indirect branch followed by an unconditional
274  // branch.
275  if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
276  I = LastInst;
277  if (AllowModify)
278  I->eraseFromParent();
279  return true;
280  }
281 
282  // Otherwise, can't handle this.
283  return true;
284 }
285 
287  SmallVectorImpl<MachineOperand> &Cond) const {
288  if (Cond[0].getImm() != -1) {
289  // Regular Bcc
290  AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
291  Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
292  } else {
293  // Folded compare-and-branch
294  switch (Cond[1].getImm()) {
295  default:
296  llvm_unreachable("Unknown conditional branch!");
297  case AArch64::CBZW:
298  Cond[1].setImm(AArch64::CBNZW);
299  break;
300  case AArch64::CBNZW:
301  Cond[1].setImm(AArch64::CBZW);
302  break;
303  case AArch64::CBZX:
304  Cond[1].setImm(AArch64::CBNZX);
305  break;
306  case AArch64::CBNZX:
307  Cond[1].setImm(AArch64::CBZX);
308  break;
309  case AArch64::TBZW:
310  Cond[1].setImm(AArch64::TBNZW);
311  break;
312  case AArch64::TBNZW:
313  Cond[1].setImm(AArch64::TBZW);
314  break;
315  case AArch64::TBZX:
316  Cond[1].setImm(AArch64::TBNZX);
317  break;
318  case AArch64::TBNZX:
319  Cond[1].setImm(AArch64::TBZX);
320  break;
321  }
322  }
323 
324  return false;
325 }
326 
328  int *BytesRemoved) const {
330  if (I == MBB.end())
331  return 0;
332 
333  if (!isUncondBranchOpcode(I->getOpcode()) &&
334  !isCondBranchOpcode(I->getOpcode()))
335  return 0;
336 
337  // Remove the branch.
338  I->eraseFromParent();
339 
340  I = MBB.end();
341 
342  if (I == MBB.begin()) {
343  if (BytesRemoved)
344  *BytesRemoved = 4;
345  return 1;
346  }
347  --I;
348  if (!isCondBranchOpcode(I->getOpcode())) {
349  if (BytesRemoved)
350  *BytesRemoved = 4;
351  return 1;
352  }
353 
354  // Remove the branch.
355  I->eraseFromParent();
356  if (BytesRemoved)
357  *BytesRemoved = 8;
358 
359  return 2;
360 }
361 
362 void AArch64InstrInfo::instantiateCondBranch(
364  ArrayRef<MachineOperand> Cond) const {
365  if (Cond[0].getImm() != -1) {
366  // Regular Bcc
367  BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
368  } else {
369  // Folded compare-and-branch
370  // Note that we use addOperand instead of addReg to keep the flags.
371  const MachineInstrBuilder MIB =
372  BuildMI(&MBB, DL, get(Cond[1].getImm())).addOperand(Cond[2]);
373  if (Cond.size() > 3)
374  MIB.addImm(Cond[3].getImm());
375  MIB.addMBB(TBB);
376  }
377 }
378 
380  MachineBasicBlock *TBB,
381  MachineBasicBlock *FBB,
383  const DebugLoc &DL,
384  int *BytesAdded) const {
385  // Shouldn't be a fall through.
386  assert(TBB && "insertBranch must not be told to insert a fallthrough");
387 
388  if (!FBB) {
389  if (Cond.empty()) // Unconditional branch?
390  BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
391  else
392  instantiateCondBranch(MBB, DL, TBB, Cond);
393 
394  if (BytesAdded)
395  *BytesAdded = 4;
396 
397  return 1;
398  }
399 
400  // Two-way conditional branch.
401  instantiateCondBranch(MBB, DL, TBB, Cond);
402  BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
403 
404  if (BytesAdded)
405  *BytesAdded = 8;
406 
407  return 2;
408 }
409 
410 // Find the original register that VReg is copied from.
411 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
413  const MachineInstr *DefMI = MRI.getVRegDef(VReg);
414  if (!DefMI->isFullCopy())
415  return VReg;
416  VReg = DefMI->getOperand(1).getReg();
417  }
418  return VReg;
419 }
420 
421 // Determine if VReg is defined by an instruction that can be folded into a
422 // csel instruction. If so, return the folded opcode, and the replacement
423 // register.
424 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
425  unsigned *NewVReg = nullptr) {
426  VReg = removeCopies(MRI, VReg);
428  return 0;
429 
430  bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
431  const MachineInstr *DefMI = MRI.getVRegDef(VReg);
432  unsigned Opc = 0;
433  unsigned SrcOpNum = 0;
434  switch (DefMI->getOpcode()) {
435  case AArch64::ADDSXri:
436  case AArch64::ADDSWri:
437  // if NZCV is used, do not fold.
438  if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
439  return 0;
440  // fall-through to ADDXri and ADDWri.
442  case AArch64::ADDXri:
443  case AArch64::ADDWri:
444  // add x, 1 -> csinc.
445  if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
446  DefMI->getOperand(3).getImm() != 0)
447  return 0;
448  SrcOpNum = 1;
449  Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
450  break;
451 
452  case AArch64::ORNXrr:
453  case AArch64::ORNWrr: {
454  // not x -> csinv, represented as orn dst, xzr, src.
455  unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
456  if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
457  return 0;
458  SrcOpNum = 2;
459  Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
460  break;
461  }
462 
463  case AArch64::SUBSXrr:
464  case AArch64::SUBSWrr:
465  // if NZCV is used, do not fold.
466  if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
467  return 0;
468  // fall-through to SUBXrr and SUBWrr.
470  case AArch64::SUBXrr:
471  case AArch64::SUBWrr: {
472  // neg x -> csneg, represented as sub dst, xzr, src.
473  unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
474  if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
475  return 0;
476  SrcOpNum = 2;
477  Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
478  break;
479  }
480  default:
481  return 0;
482  }
483  assert(Opc && SrcOpNum && "Missing parameters");
484 
485  if (NewVReg)
486  *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
487  return Opc;
488 }
489 
492  unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles,
493  int &FalseCycles) const {
494  // Check register classes.
495  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
496  const TargetRegisterClass *RC =
497  RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
498  if (!RC)
499  return false;
500 
501  // Expanding cbz/tbz requires an extra cycle of latency on the condition.
502  unsigned ExtraCondLat = Cond.size() != 1;
503 
504  // GPRs are handled by csel.
505  // FIXME: Fold in x+1, -x, and ~x when applicable.
506  if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
507  AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
508  // Single-cycle csel, csinc, csinv, and csneg.
509  CondCycles = 1 + ExtraCondLat;
510  TrueCycles = FalseCycles = 1;
511  if (canFoldIntoCSel(MRI, TrueReg))
512  TrueCycles = 0;
513  else if (canFoldIntoCSel(MRI, FalseReg))
514  FalseCycles = 0;
515  return true;
516  }
517 
518  // Scalar floating point is handled by fcsel.
519  // FIXME: Form fabs, fmin, and fmax when applicable.
520  if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
521  AArch64::FPR32RegClass.hasSubClassEq(RC)) {
522  CondCycles = 5 + ExtraCondLat;
523  TrueCycles = FalseCycles = 2;
524  return true;
525  }
526 
527  // Can't do vectors.
528  return false;
529 }
530 
533  const DebugLoc &DL, unsigned DstReg,
535  unsigned TrueReg, unsigned FalseReg) const {
537 
538  // Parse the condition code, see parseCondBranch() above.
540  switch (Cond.size()) {
541  default:
542  llvm_unreachable("Unknown condition opcode in Cond");
543  case 1: // b.cc
544  CC = AArch64CC::CondCode(Cond[0].getImm());
545  break;
546  case 3: { // cbz/cbnz
547  // We must insert a compare against 0.
548  bool Is64Bit;
549  switch (Cond[1].getImm()) {
550  default:
551  llvm_unreachable("Unknown branch opcode in Cond");
552  case AArch64::CBZW:
553  Is64Bit = false;
554  CC = AArch64CC::EQ;
555  break;
556  case AArch64::CBZX:
557  Is64Bit = true;
558  CC = AArch64CC::EQ;
559  break;
560  case AArch64::CBNZW:
561  Is64Bit = false;
562  CC = AArch64CC::NE;
563  break;
564  case AArch64::CBNZX:
565  Is64Bit = true;
566  CC = AArch64CC::NE;
567  break;
568  }
569  unsigned SrcReg = Cond[2].getReg();
570  if (Is64Bit) {
571  // cmp reg, #0 is actually subs xzr, reg, #0.
572  MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
573  BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
574  .addReg(SrcReg)
575  .addImm(0)
576  .addImm(0);
577  } else {
578  MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
579  BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
580  .addReg(SrcReg)
581  .addImm(0)
582  .addImm(0);
583  }
584  break;
585  }
586  case 4: { // tbz/tbnz
587  // We must insert a tst instruction.
588  switch (Cond[1].getImm()) {
589  default:
590  llvm_unreachable("Unknown branch opcode in Cond");
591  case AArch64::TBZW:
592  case AArch64::TBZX:
593  CC = AArch64CC::EQ;
594  break;
595  case AArch64::TBNZW:
596  case AArch64::TBNZX:
597  CC = AArch64CC::NE;
598  break;
599  }
600  // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
601  if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
602  BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
603  .addReg(Cond[2].getReg())
604  .addImm(
605  AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
606  else
607  BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
608  .addReg(Cond[2].getReg())
609  .addImm(
610  AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
611  break;
612  }
613  }
614 
615  unsigned Opc = 0;
616  const TargetRegisterClass *RC = nullptr;
617  bool TryFold = false;
618  if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
619  RC = &AArch64::GPR64RegClass;
620  Opc = AArch64::CSELXr;
621  TryFold = true;
622  } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
623  RC = &AArch64::GPR32RegClass;
624  Opc = AArch64::CSELWr;
625  TryFold = true;
626  } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
627  RC = &AArch64::FPR64RegClass;
628  Opc = AArch64::FCSELDrrr;
629  } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
630  RC = &AArch64::FPR32RegClass;
631  Opc = AArch64::FCSELSrrr;
632  }
633  assert(RC && "Unsupported regclass");
634 
635  // Try folding simple instructions into the csel.
636  if (TryFold) {
637  unsigned NewVReg = 0;
638  unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
639  if (FoldedOpc) {
640  // The folded opcodes csinc, csinc and csneg apply the operation to
641  // FalseReg, so we need to invert the condition.
643  TrueReg = FalseReg;
644  } else
645  FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
646 
647  // Fold the operation. Leave any dead instructions for DCE to clean up.
648  if (FoldedOpc) {
649  FalseReg = NewVReg;
650  Opc = FoldedOpc;
651  // The extends the live range of NewVReg.
652  MRI.clearKillFlags(NewVReg);
653  }
654  }
655 
656  // Pull all virtual register into the appropriate class.
657  MRI.constrainRegClass(TrueReg, RC);
658  MRI.constrainRegClass(FalseReg, RC);
659 
660  // Insert the csel.
661  BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm(
662  CC);
663 }
664 
665 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
666 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
667  uint64_t Imm = MI.getOperand(1).getImm();
668  uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
669  uint64_t Encoding;
670  return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
671 }
672 
673 // FIXME: this implementation should be micro-architecture dependent, so a
674 // micro-architecture target hook should be introduced here in future.
676  if (!Subtarget.hasCustomCheapAsMoveHandling())
677  return MI.isAsCheapAsAMove();
678 
679  unsigned Imm;
680 
681  switch (MI.getOpcode()) {
682  default:
683  return false;
684 
685  // add/sub on register without shift
686  case AArch64::ADDWri:
687  case AArch64::ADDXri:
688  case AArch64::SUBWri:
689  case AArch64::SUBXri:
690  return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 ||
691  MI.getOperand(3).getImm() == 0);
692 
693  // add/sub on register with shift
694  case AArch64::ADDWrs:
695  case AArch64::ADDXrs:
696  case AArch64::SUBWrs:
697  case AArch64::SUBXrs:
698  Imm = MI.getOperand(3).getImm();
699  return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
701 
702  // logical ops on immediate
703  case AArch64::ANDWri:
704  case AArch64::ANDXri:
705  case AArch64::EORWri:
706  case AArch64::EORXri:
707  case AArch64::ORRWri:
708  case AArch64::ORRXri:
709  return true;
710 
711  // logical ops on register without shift
712  case AArch64::ANDWrr:
713  case AArch64::ANDXrr:
714  case AArch64::BICWrr:
715  case AArch64::BICXrr:
716  case AArch64::EONWrr:
717  case AArch64::EONXrr:
718  case AArch64::EORWrr:
719  case AArch64::EORXrr:
720  case AArch64::ORNWrr:
721  case AArch64::ORNXrr:
722  case AArch64::ORRWrr:
723  case AArch64::ORRXrr:
724  return true;
725 
726  // logical ops on register with shift
727  case AArch64::ANDWrs:
728  case AArch64::ANDXrs:
729  case AArch64::BICWrs:
730  case AArch64::BICXrs:
731  case AArch64::EONWrs:
732  case AArch64::EONXrs:
733  case AArch64::EORWrs:
734  case AArch64::EORXrs:
735  case AArch64::ORNWrs:
736  case AArch64::ORNXrs:
737  case AArch64::ORRWrs:
738  case AArch64::ORRXrs:
739  Imm = MI.getOperand(3).getImm();
740  return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
741  AArch64_AM::getShiftValue(Imm) < 4 &&
743 
744  // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
745  // ORRXri, it is as cheap as MOV
746  case AArch64::MOVi32imm:
747  return canBeExpandedToORR(MI, 32);
748  case AArch64::MOVi64imm:
749  return canBeExpandedToORR(MI, 64);
750 
751  // It is cheap to zero out registers if the subtarget has ZeroCycleZeroing
752  // feature.
753  case AArch64::FMOVS0:
754  case AArch64::FMOVD0:
755  return Subtarget.hasZeroCycleZeroing();
756  case TargetOpcode::COPY:
757  return (Subtarget.hasZeroCycleZeroing() &&
758  (MI.getOperand(1).getReg() == AArch64::WZR ||
759  MI.getOperand(1).getReg() == AArch64::XZR));
760  }
761 
762  llvm_unreachable("Unknown opcode to check as cheap as a move!");
763 }
764 
766  unsigned &SrcReg, unsigned &DstReg,
767  unsigned &SubIdx) const {
768  switch (MI.getOpcode()) {
769  default:
770  return false;
771  case AArch64::SBFMXri: // aka sxtw
772  case AArch64::UBFMXri: // aka uxtw
773  // Check for the 32 -> 64 bit extension case, these instructions can do
774  // much more.
775  if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
776  return false;
777  // This is a signed or unsigned 32 -> 64 bit extension.
778  SrcReg = MI.getOperand(1).getReg();
779  DstReg = MI.getOperand(0).getReg();
780  SubIdx = AArch64::sub_32;
781  return true;
782  }
783 }
784 
786  MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
787  const TargetRegisterInfo *TRI = &getRegisterInfo();
788  unsigned BaseRegA = 0, BaseRegB = 0;
789  int64_t OffsetA = 0, OffsetB = 0;
790  unsigned WidthA = 0, WidthB = 0;
791 
792  assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
793  assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
794 
797  return false;
798 
799  // Retrieve the base register, offset from the base register and width. Width
800  // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
801  // base registers are identical, and the offset of a lower memory access +
802  // the width doesn't overlap the offset of a higher memory access,
803  // then the memory accesses are different.
804  if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
805  getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
806  if (BaseRegA == BaseRegB) {
807  int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
808  int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
809  int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
810  if (LowOffset + LowWidth <= HighOffset)
811  return true;
812  }
813  }
814  return false;
815 }
816 
817 /// analyzeCompare - For a comparison instruction, return the source registers
818 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
819 /// Return true if the comparison instruction can be analyzed.
820 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
821  unsigned &SrcReg2, int &CmpMask,
822  int &CmpValue) const {
823  switch (MI.getOpcode()) {
824  default:
825  break;
826  case AArch64::SUBSWrr:
827  case AArch64::SUBSWrs:
828  case AArch64::SUBSWrx:
829  case AArch64::SUBSXrr:
830  case AArch64::SUBSXrs:
831  case AArch64::SUBSXrx:
832  case AArch64::ADDSWrr:
833  case AArch64::ADDSWrs:
834  case AArch64::ADDSWrx:
835  case AArch64::ADDSXrr:
836  case AArch64::ADDSXrs:
837  case AArch64::ADDSXrx:
838  // Replace SUBSWrr with SUBWrr if NZCV is not used.
839  SrcReg = MI.getOperand(1).getReg();
840  SrcReg2 = MI.getOperand(2).getReg();
841  CmpMask = ~0;
842  CmpValue = 0;
843  return true;
844  case AArch64::SUBSWri:
845  case AArch64::ADDSWri:
846  case AArch64::SUBSXri:
847  case AArch64::ADDSXri:
848  SrcReg = MI.getOperand(1).getReg();
849  SrcReg2 = 0;
850  CmpMask = ~0;
851  // FIXME: In order to convert CmpValue to 0 or 1
852  CmpValue = MI.getOperand(2).getImm() != 0;
853  return true;
854  case AArch64::ANDSWri:
855  case AArch64::ANDSXri:
856  // ANDS does not use the same encoding scheme as the others xxxS
857  // instructions.
858  SrcReg = MI.getOperand(1).getReg();
859  SrcReg2 = 0;
860  CmpMask = ~0;
861  // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
862  // while the type of CmpValue is int. When converting uint64_t to int,
863  // the high 32 bits of uint64_t will be lost.
864  // In fact it causes a bug in spec2006-483.xalancbmk
865  // CmpValue is only used to compare with zero in OptimizeCompareInstr
867  MI.getOperand(2).getImm(),
868  MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
869  return true;
870  }
871 
872  return false;
873 }
874 
875 static bool UpdateOperandRegClass(MachineInstr &Instr) {
876  MachineBasicBlock *MBB = Instr.getParent();
877  assert(MBB && "Can't get MachineBasicBlock here");
878  MachineFunction *MF = MBB->getParent();
879  assert(MF && "Can't get MachineFunction here");
880  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
881  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
883 
884  for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
885  ++OpIdx) {
886  MachineOperand &MO = Instr.getOperand(OpIdx);
887  const TargetRegisterClass *OpRegCstraints =
888  Instr.getRegClassConstraint(OpIdx, TII, TRI);
889 
890  // If there's no constraint, there's nothing to do.
891  if (!OpRegCstraints)
892  continue;
893  // If the operand is a frame index, there's nothing to do here.
894  // A frame index operand will resolve correctly during PEI.
895  if (MO.isFI())
896  continue;
897 
898  assert(MO.isReg() &&
899  "Operand has register constraints without being a register!");
900 
901  unsigned Reg = MO.getReg();
903  if (!OpRegCstraints->contains(Reg))
904  return false;
905  } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
906  !MRI->constrainRegClass(Reg, OpRegCstraints))
907  return false;
908  }
909 
910  return true;
911 }
912 
913 /// \brief Return the opcode that does not set flags when possible - otherwise
914 /// return the original opcode. The caller is responsible to do the actual
915 /// substitution and legality checking.
916 static unsigned convertFlagSettingOpcode(const MachineInstr &MI) {
917  // Don't convert all compare instructions, because for some the zero register
918  // encoding becomes the sp register.
919  bool MIDefinesZeroReg = false;
920  if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
921  MIDefinesZeroReg = true;
922 
923  switch (MI.getOpcode()) {
924  default:
925  return MI.getOpcode();
926  case AArch64::ADDSWrr:
927  return AArch64::ADDWrr;
928  case AArch64::ADDSWri:
929  return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
930  case AArch64::ADDSWrs:
931  return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
932  case AArch64::ADDSWrx:
933  return AArch64::ADDWrx;
934  case AArch64::ADDSXrr:
935  return AArch64::ADDXrr;
936  case AArch64::ADDSXri:
937  return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
938  case AArch64::ADDSXrs:
939  return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
940  case AArch64::ADDSXrx:
941  return AArch64::ADDXrx;
942  case AArch64::SUBSWrr:
943  return AArch64::SUBWrr;
944  case AArch64::SUBSWri:
945  return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
946  case AArch64::SUBSWrs:
947  return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
948  case AArch64::SUBSWrx:
949  return AArch64::SUBWrx;
950  case AArch64::SUBSXrr:
951  return AArch64::SUBXrr;
952  case AArch64::SUBSXri:
953  return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
954  case AArch64::SUBSXrs:
955  return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
956  case AArch64::SUBSXrx:
957  return AArch64::SUBXrx;
958  }
959 }
960 
962  AK_Write = 0x01,
963  AK_Read = 0x10,
964  AK_All = 0x11
965 };
966 
967 /// True when condition flags are accessed (either by writing or reading)
968 /// on the instruction trace starting at From and ending at To.
969 ///
970 /// Note: If From and To are from different blocks it's assumed CC are accessed
971 /// on the path.
974  const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
975  // Early exit if To is at the beginning of the BB.
976  if (To == To->getParent()->begin())
977  return true;
978 
979  // Check whether the instructions are in the same basic block
980  // If not, assume the condition flags might get modified somewhere.
981  if (To->getParent() != From->getParent())
982  return true;
983 
984  // From must be above To.
985  assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
986  [From](MachineInstr &MI) {
987  return MI.getIterator() == From;
988  }) != To->getParent()->rend());
989 
990  // We iterate backward starting \p To until we hit \p From.
991  for (--To; To != From; --To) {
992  const MachineInstr &Instr = *To;
993 
994  if ( ((AccessToCheck & AK_Write) && Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
995  ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
996  return true;
997  }
998  return false;
999 }
1000 
1001 /// Try to optimize a compare instruction. A compare instruction is an
1002 /// instruction which produces AArch64::NZCV. It can be truly compare instruction
1003 /// when there are no uses of its destination register.
1004 ///
1005 /// The following steps are tried in order:
1006 /// 1. Convert CmpInstr into an unconditional version.
1007 /// 2. Remove CmpInstr if above there is an instruction producing a needed
1008 /// condition code or an instruction which can be converted into such an instruction.
1009 /// Only comparison with zero is supported.
1011  MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
1012  int CmpValue, const MachineRegisterInfo *MRI) const {
1013  assert(CmpInstr.getParent());
1014  assert(MRI);
1015 
1016  // Replace SUBSWrr with SUBWrr if NZCV is not used.
1017  int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
1018  if (DeadNZCVIdx != -1) {
1019  if (CmpInstr.definesRegister(AArch64::WZR) ||
1020  CmpInstr.definesRegister(AArch64::XZR)) {
1021  CmpInstr.eraseFromParent();
1022  return true;
1023  }
1024  unsigned Opc = CmpInstr.getOpcode();
1025  unsigned NewOpc = convertFlagSettingOpcode(CmpInstr);
1026  if (NewOpc == Opc)
1027  return false;
1028  const MCInstrDesc &MCID = get(NewOpc);
1029  CmpInstr.setDesc(MCID);
1030  CmpInstr.RemoveOperand(DeadNZCVIdx);
1031  bool succeeded = UpdateOperandRegClass(CmpInstr);
1032  (void)succeeded;
1033  assert(succeeded && "Some operands reg class are incompatible!");
1034  return true;
1035  }
1036 
1037  // Continue only if we have a "ri" where immediate is zero.
1038  // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1039  // function.
1040  assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
1041  if (CmpValue != 0 || SrcReg2 != 0)
1042  return false;
1043 
1044  // CmpInstr is a Compare instruction if destination register is not used.
1045  if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1046  return false;
1047 
1048  return substituteCmpToZero(CmpInstr, SrcReg, MRI);
1049 }
1050 
1051 /// Get opcode of S version of Instr.
1052 /// If Instr is S version its opcode is returned.
1053 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1054 /// or we are not interested in it.
1055 static unsigned sForm(MachineInstr &Instr) {
1056  switch (Instr.getOpcode()) {
1057  default:
1058  return AArch64::INSTRUCTION_LIST_END;
1059 
1060  case AArch64::ADDSWrr:
1061  case AArch64::ADDSWri:
1062  case AArch64::ADDSXrr:
1063  case AArch64::ADDSXri:
1064  case AArch64::SUBSWrr:
1065  case AArch64::SUBSWri:
1066  case AArch64::SUBSXrr:
1067  case AArch64::SUBSXri:
1068  return Instr.getOpcode();
1069 
1070  case AArch64::ADDWrr: return AArch64::ADDSWrr;
1071  case AArch64::ADDWri: return AArch64::ADDSWri;
1072  case AArch64::ADDXrr: return AArch64::ADDSXrr;
1073  case AArch64::ADDXri: return AArch64::ADDSXri;
1074  case AArch64::ADCWr: return AArch64::ADCSWr;
1075  case AArch64::ADCXr: return AArch64::ADCSXr;
1076  case AArch64::SUBWrr: return AArch64::SUBSWrr;
1077  case AArch64::SUBWri: return AArch64::SUBSWri;
1078  case AArch64::SUBXrr: return AArch64::SUBSXrr;
1079  case AArch64::SUBXri: return AArch64::SUBSXri;
1080  case AArch64::SBCWr: return AArch64::SBCSWr;
1081  case AArch64::SBCXr: return AArch64::SBCSXr;
1082  case AArch64::ANDWri: return AArch64::ANDSWri;
1083  case AArch64::ANDXri: return AArch64::ANDSXri;
1084  }
1085 }
1086 
1087 /// Check if AArch64::NZCV should be alive in successors of MBB.
1089  for (auto *BB : MBB->successors())
1090  if (BB->isLiveIn(AArch64::NZCV))
1091  return true;
1092  return false;
1093 }
1094 
1095 namespace {
1096 
1097 struct UsedNZCV {
1098  bool N = false;
1099  bool Z = false;
1100  bool C = false;
1101  bool V = false;
1102 
1103  UsedNZCV() = default;
1104 
1105  UsedNZCV& operator |=(const UsedNZCV& UsedFlags) {
1106  this->N |= UsedFlags.N;
1107  this->Z |= UsedFlags.Z;
1108  this->C |= UsedFlags.C;
1109  this->V |= UsedFlags.V;
1110  return *this;
1111  }
1112 };
1113 
1114 } // end anonymous namespace
1115 
1116 /// Find a condition code used by the instruction.
1117 /// Returns AArch64CC::Invalid if either the instruction does not use condition
1118 /// codes or we don't optimize CmpInstr in the presence of such instructions.
1120  switch (Instr.getOpcode()) {
1121  default:
1122  return AArch64CC::Invalid;
1123 
1124  case AArch64::Bcc: {
1125  int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1126  assert(Idx >= 2);
1127  return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1128  }
1129 
1130  case AArch64::CSINVWr:
1131  case AArch64::CSINVXr:
1132  case AArch64::CSINCWr:
1133  case AArch64::CSINCXr:
1134  case AArch64::CSELWr:
1135  case AArch64::CSELXr:
1136  case AArch64::CSNEGWr:
1137  case AArch64::CSNEGXr:
1138  case AArch64::FCSELSrrr:
1139  case AArch64::FCSELDrrr: {
1140  int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1141  assert(Idx >= 1);
1142  return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1143  }
1144  }
1145 }
1146 
1147 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1148  assert(CC != AArch64CC::Invalid);
1149  UsedNZCV UsedFlags;
1150  switch (CC) {
1151  default:
1152  break;
1153 
1154  case AArch64CC::EQ: // Z set
1155  case AArch64CC::NE: // Z clear
1156  UsedFlags.Z = true;
1157  break;
1158 
1159  case AArch64CC::HI: // Z clear and C set
1160  case AArch64CC::LS: // Z set or C clear
1161  UsedFlags.Z = true;
1162  case AArch64CC::HS: // C set
1163  case AArch64CC::LO: // C clear
1164  UsedFlags.C = true;
1165  break;
1166 
1167  case AArch64CC::MI: // N set
1168  case AArch64CC::PL: // N clear
1169  UsedFlags.N = true;
1170  break;
1171 
1172  case AArch64CC::VS: // V set
1173  case AArch64CC::VC: // V clear
1174  UsedFlags.V = true;
1175  break;
1176 
1177  case AArch64CC::GT: // Z clear, N and V the same
1178  case AArch64CC::LE: // Z set, N and V differ
1179  UsedFlags.Z = true;
1180  case AArch64CC::GE: // N and V the same
1181  case AArch64CC::LT: // N and V differ
1182  UsedFlags.N = true;
1183  UsedFlags.V = true;
1184  break;
1185  }
1186  return UsedFlags;
1187 }
1188 
1189 static bool isADDSRegImm(unsigned Opcode) {
1190  return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1191 }
1192 
1193 static bool isSUBSRegImm(unsigned Opcode) {
1194  return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1195 }
1196 
1197 /// Check if CmpInstr can be substituted by MI.
1198 ///
1199 /// CmpInstr can be substituted:
1200 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1201 /// - and, MI and CmpInstr are from the same MachineBB
1202 /// - and, condition flags are not alive in successors of the CmpInstr parent
1203 /// - and, if MI opcode is the S form there must be no defs of flags between
1204 /// MI and CmpInstr
1205 /// or if MI opcode is not the S form there must be neither defs of flags
1206 /// nor uses of flags between MI and CmpInstr.
1207 /// - and C/V flags are not used after CmpInstr
1209  const TargetRegisterInfo *TRI) {
1210  assert(MI);
1211  assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1212  assert(CmpInstr);
1213 
1214  const unsigned CmpOpcode = CmpInstr->getOpcode();
1215  if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1216  return false;
1217 
1218  if (MI->getParent() != CmpInstr->getParent())
1219  return false;
1220 
1221  if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1222  return false;
1223 
1224  AccessKind AccessToCheck = AK_Write;
1225  if (sForm(*MI) != MI->getOpcode())
1226  AccessToCheck = AK_All;
1227  if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1228  return false;
1229 
1230  UsedNZCV NZCVUsedAfterCmp;
1231  for (auto I = std::next(CmpInstr->getIterator()), E = CmpInstr->getParent()->instr_end();
1232  I != E; ++I) {
1233  const MachineInstr &Instr = *I;
1234  if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1236  if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1237  return false;
1238  NZCVUsedAfterCmp |= getUsedNZCV(CC);
1239  }
1240 
1241  if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1242  break;
1243  }
1244 
1245  return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1246 }
1247 
1248 /// Substitute an instruction comparing to zero with another instruction
1249 /// which produces needed condition flags.
1250 ///
1251 /// Return true on success.
1252 bool AArch64InstrInfo::substituteCmpToZero(
1253  MachineInstr &CmpInstr, unsigned SrcReg,
1254  const MachineRegisterInfo *MRI) const {
1255  assert(MRI);
1256  // Get the unique definition of SrcReg.
1257  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1258  if (!MI)
1259  return false;
1260 
1261  const TargetRegisterInfo *TRI = &getRegisterInfo();
1262 
1263  unsigned NewOpc = sForm(*MI);
1264  if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1265  return false;
1266 
1267  if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
1268  return false;
1269 
1270  // Update the instruction to set NZCV.
1271  MI->setDesc(get(NewOpc));
1272  CmpInstr.eraseFromParent();
1273  bool succeeded = UpdateOperandRegClass(*MI);
1274  (void)succeeded;
1275  assert(succeeded && "Some operands reg class are incompatible!");
1276  MI->addRegisterDefined(AArch64::NZCV, TRI);
1277  return true;
1278 }
1279 
1281  if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
1282  return false;
1283 
1284  MachineBasicBlock &MBB = *MI.getParent();
1285  DebugLoc DL = MI.getDebugLoc();
1286  unsigned Reg = MI.getOperand(0).getReg();
1287  const GlobalValue *GV =
1288  cast<GlobalValue>((*MI.memoperands_begin())->getValue());
1289  const TargetMachine &TM = MBB.getParent()->getTarget();
1290  unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1291  const unsigned char MO_NC = AArch64II::MO_NC;
1292 
1293  if ((OpFlags & AArch64II::MO_GOT) != 0) {
1294  BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1295  .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
1296  BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1297  .addReg(Reg, RegState::Kill)
1298  .addImm(0)
1300  } else if (TM.getCodeModel() == CodeModel::Large) {
1301  BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1303  BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1304  .addReg(Reg, RegState::Kill)
1305  .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32);
1306  BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1307  .addReg(Reg, RegState::Kill)
1308  .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16);
1309  BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1310  .addReg(Reg, RegState::Kill)
1311  .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0);
1312  BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1313  .addReg(Reg, RegState::Kill)
1314  .addImm(0)
1316  } else {
1317  BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1318  .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1319  unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1320  BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1321  .addReg(Reg, RegState::Kill)
1322  .addGlobalAddress(GV, 0, LoFlags)
1324  }
1325 
1326  MBB.erase(MI);
1327 
1328  return true;
1329 }
1330 
1331 /// Return true if this is this instruction has a non-zero immediate
1333  switch (MI.getOpcode()) {
1334  default:
1335  break;
1336  case AArch64::ADDSWrs:
1337  case AArch64::ADDSXrs:
1338  case AArch64::ADDWrs:
1339  case AArch64::ADDXrs:
1340  case AArch64::ANDSWrs:
1341  case AArch64::ANDSXrs:
1342  case AArch64::ANDWrs:
1343  case AArch64::ANDXrs:
1344  case AArch64::BICSWrs:
1345  case AArch64::BICSXrs:
1346  case AArch64::BICWrs:
1347  case AArch64::BICXrs:
1348  case AArch64::CRC32Brr:
1349  case AArch64::CRC32CBrr:
1350  case AArch64::CRC32CHrr:
1351  case AArch64::CRC32CWrr:
1352  case AArch64::CRC32CXrr:
1353  case AArch64::CRC32Hrr:
1354  case AArch64::CRC32Wrr:
1355  case AArch64::CRC32Xrr:
1356  case AArch64::EONWrs:
1357  case AArch64::EONXrs:
1358  case AArch64::EORWrs:
1359  case AArch64::EORXrs:
1360  case AArch64::ORNWrs:
1361  case AArch64::ORNXrs:
1362  case AArch64::ORRWrs:
1363  case AArch64::ORRXrs:
1364  case AArch64::SUBSWrs:
1365  case AArch64::SUBSXrs:
1366  case AArch64::SUBWrs:
1367  case AArch64::SUBXrs:
1368  if (MI.getOperand(3).isImm()) {
1369  unsigned val = MI.getOperand(3).getImm();
1370  return (val != 0);
1371  }
1372  break;
1373  }
1374  return false;
1375 }
1376 
1377 /// Return true if this is this instruction has a non-zero immediate
1379  switch (MI.getOpcode()) {
1380  default:
1381  break;
1382  case AArch64::ADDSWrx:
1383  case AArch64::ADDSXrx:
1384  case AArch64::ADDSXrx64:
1385  case AArch64::ADDWrx:
1386  case AArch64::ADDXrx:
1387  case AArch64::ADDXrx64:
1388  case AArch64::SUBSWrx:
1389  case AArch64::SUBSXrx:
1390  case AArch64::SUBSXrx64:
1391  case AArch64::SUBWrx:
1392  case AArch64::SUBXrx:
1393  case AArch64::SUBXrx64:
1394  if (MI.getOperand(3).isImm()) {
1395  unsigned val = MI.getOperand(3).getImm();
1396  return (val != 0);
1397  }
1398  break;
1399  }
1400 
1401  return false;
1402 }
1403 
1404 // Return true if this instruction simply sets its single destination register
1405 // to zero. This is equivalent to a register rename of the zero-register.
1407  switch (MI.getOpcode()) {
1408  default:
1409  break;
1410  case AArch64::MOVZWi:
1411  case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
1412  if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1413  assert(MI.getDesc().getNumOperands() == 3 &&
1414  MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1415  return true;
1416  }
1417  break;
1418  case AArch64::ANDWri: // and Rd, Rzr, #imm
1419  return MI.getOperand(1).getReg() == AArch64::WZR;
1420  case AArch64::ANDXri:
1421  return MI.getOperand(1).getReg() == AArch64::XZR;
1422  case TargetOpcode::COPY:
1423  return MI.getOperand(1).getReg() == AArch64::WZR;
1424  }
1425  return false;
1426 }
1427 
1428 // Return true if this instruction simply renames a general register without
1429 // modifying bits.
1431  switch (MI.getOpcode()) {
1432  default:
1433  break;
1434  case TargetOpcode::COPY: {
1435  // GPR32 copies will by lowered to ORRXrs
1436  unsigned DstReg = MI.getOperand(0).getReg();
1437  return (AArch64::GPR32RegClass.contains(DstReg) ||
1438  AArch64::GPR64RegClass.contains(DstReg));
1439  }
1440  case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
1441  if (MI.getOperand(1).getReg() == AArch64::XZR) {
1442  assert(MI.getDesc().getNumOperands() == 4 &&
1443  MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1444  return true;
1445  }
1446  break;
1447  case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
1448  if (MI.getOperand(2).getImm() == 0) {
1449  assert(MI.getDesc().getNumOperands() == 4 &&
1450  MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1451  return true;
1452  }
1453  break;
1454  }
1455  return false;
1456 }
1457 
1458 // Return true if this instruction simply renames a general register without
1459 // modifying bits.
1461  switch (MI.getOpcode()) {
1462  default:
1463  break;
1464  case TargetOpcode::COPY: {
1465  // FPR64 copies will by lowered to ORR.16b
1466  unsigned DstReg = MI.getOperand(0).getReg();
1467  return (AArch64::FPR64RegClass.contains(DstReg) ||
1468  AArch64::FPR128RegClass.contains(DstReg));
1469  }
1470  case AArch64::ORRv16i8:
1471  if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1472  assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
1473  "invalid ORRv16i8 operands");
1474  return true;
1475  }
1476  break;
1477  }
1478  return false;
1479 }
1480 
1482  int &FrameIndex) const {
1483  switch (MI.getOpcode()) {
1484  default:
1485  break;
1486  case AArch64::LDRWui:
1487  case AArch64::LDRXui:
1488  case AArch64::LDRBui:
1489  case AArch64::LDRHui:
1490  case AArch64::LDRSui:
1491  case AArch64::LDRDui:
1492  case AArch64::LDRQui:
1493  if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1494  MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1495  FrameIndex = MI.getOperand(1).getIndex();
1496  return MI.getOperand(0).getReg();
1497  }
1498  break;
1499  }
1500 
1501  return 0;
1502 }
1503 
1505  int &FrameIndex) const {
1506  switch (MI.getOpcode()) {
1507  default:
1508  break;
1509  case AArch64::STRWui:
1510  case AArch64::STRXui:
1511  case AArch64::STRBui:
1512  case AArch64::STRHui:
1513  case AArch64::STRSui:
1514  case AArch64::STRDui:
1515  case AArch64::STRQui:
1516  if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1517  MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1518  FrameIndex = MI.getOperand(1).getIndex();
1519  return MI.getOperand(0).getReg();
1520  }
1521  break;
1522  }
1523  return 0;
1524 }
1525 
1526 /// Return true if this is load/store scales or extends its register offset.
1527 /// This refers to scaling a dynamic index as opposed to scaled immediates.
1528 /// MI should be a memory op that allows scaled addressing.
1530  switch (MI.getOpcode()) {
1531  default:
1532  break;
1533  case AArch64::LDRBBroW:
1534  case AArch64::LDRBroW:
1535  case AArch64::LDRDroW:
1536  case AArch64::LDRHHroW:
1537  case AArch64::LDRHroW:
1538  case AArch64::LDRQroW:
1539  case AArch64::LDRSBWroW:
1540  case AArch64::LDRSBXroW:
1541  case AArch64::LDRSHWroW:
1542  case AArch64::LDRSHXroW:
1543  case AArch64::LDRSWroW:
1544  case AArch64::LDRSroW:
1545  case AArch64::LDRWroW:
1546  case AArch64::LDRXroW:
1547  case AArch64::STRBBroW:
1548  case AArch64::STRBroW:
1549  case AArch64::STRDroW:
1550  case AArch64::STRHHroW:
1551  case AArch64::STRHroW:
1552  case AArch64::STRQroW:
1553  case AArch64::STRSroW:
1554  case AArch64::STRWroW:
1555  case AArch64::STRXroW:
1556  case AArch64::LDRBBroX:
1557  case AArch64::LDRBroX:
1558  case AArch64::LDRDroX:
1559  case AArch64::LDRHHroX:
1560  case AArch64::LDRHroX:
1561  case AArch64::LDRQroX:
1562  case AArch64::LDRSBWroX:
1563  case AArch64::LDRSBXroX:
1564  case AArch64::LDRSHWroX:
1565  case AArch64::LDRSHXroX:
1566  case AArch64::LDRSWroX:
1567  case AArch64::LDRSroX:
1568  case AArch64::LDRWroX:
1569  case AArch64::LDRXroX:
1570  case AArch64::STRBBroX:
1571  case AArch64::STRBroX:
1572  case AArch64::STRDroX:
1573  case AArch64::STRHHroX:
1574  case AArch64::STRHroX:
1575  case AArch64::STRQroX:
1576  case AArch64::STRSroX:
1577  case AArch64::STRWroX:
1578  case AArch64::STRXroX:
1579 
1580  unsigned Val = MI.getOperand(3).getImm();
1582  return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
1583  }
1584  return false;
1585 }
1586 
1587 /// Check all MachineMemOperands for a hint to suppress pairing.
1589  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1590  return MMO->getFlags() & MOSuppressPair;
1591  });
1592 }
1593 
1594 /// Set a flag on the first MachineMemOperand to suppress pairing.
1596  if (MI.memoperands_empty())
1597  return;
1598  (*MI.memoperands_begin())->setFlags(MOSuppressPair);
1599 }
1600 
1601 bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) const {
1602  switch (Opc) {
1603  default:
1604  return false;
1605  case AArch64::STURSi:
1606  case AArch64::STURDi:
1607  case AArch64::STURQi:
1608  case AArch64::STURBBi:
1609  case AArch64::STURHHi:
1610  case AArch64::STURWi:
1611  case AArch64::STURXi:
1612  case AArch64::LDURSi:
1613  case AArch64::LDURDi:
1614  case AArch64::LDURQi:
1615  case AArch64::LDURWi:
1616  case AArch64::LDURXi:
1617  case AArch64::LDURSWi:
1618  case AArch64::LDURHHi:
1619  case AArch64::LDURBBi:
1620  case AArch64::LDURSBWi:
1621  case AArch64::LDURSHWi:
1622  return true;
1623  }
1624 }
1625 
1627  return isUnscaledLdSt(MI.getOpcode());
1628 }
1629 
1630 // Is this a candidate for ld/st merging or pairing? For example, we don't
1631 // touch volatiles or load/stores that have a hint to avoid pair formation.
1633  // If this is a volatile load/store, don't mess with it.
1634  if (MI.hasOrderedMemoryRef())
1635  return false;
1636 
1637  // Make sure this is a reg+imm (as opposed to an address reloc).
1638  assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
1639  if (!MI.getOperand(2).isImm())
1640  return false;
1641 
1642  // Can't merge/pair if the instruction modifies the base register.
1643  // e.g., ldr x0, [x0]
1644  unsigned BaseReg = MI.getOperand(1).getReg();
1645  const TargetRegisterInfo *TRI = &getRegisterInfo();
1646  if (MI.modifiesRegister(BaseReg, TRI))
1647  return false;
1648 
1649  // Check if this load/store has a hint to avoid pair formation.
1650  // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
1651  if (isLdStPairSuppressed(MI))
1652  return false;
1653 
1654  // On some CPUs quad load/store pairs are slower than two single load/stores.
1655  if (Subtarget.isPaired128Slow()) {
1656  switch (MI.getOpcode()) {
1657  default:
1658  break;
1659  case AArch64::LDURQi:
1660  case AArch64::STURQi:
1661  case AArch64::LDRQui:
1662  case AArch64::STRQui:
1663  return false;
1664  }
1665  }
1666 
1667  return true;
1668 }
1669 
1671  MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
1672  const TargetRegisterInfo *TRI) const {
1673  unsigned Width;
1674  return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
1675 }
1676 
1678  MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
1679  const TargetRegisterInfo *TRI) const {
1680  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
1681  // Handle only loads/stores with base register followed by immediate offset.
1682  if (LdSt.getNumExplicitOperands() == 3) {
1683  // Non-paired instruction (e.g., ldr x1, [x0, #8]).
1684  if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
1685  return false;
1686  } else if (LdSt.getNumExplicitOperands() == 4) {
1687  // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
1688  if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
1689  !LdSt.getOperand(3).isImm())
1690  return false;
1691  } else
1692  return false;
1693 
1694  // Offset is calculated as the immediate operand multiplied by the scaling factor.
1695  // Unscaled instructions have scaling factor set to 1.
1696  unsigned Scale = 0;
1697  switch (LdSt.getOpcode()) {
1698  default:
1699  return false;
1700  case AArch64::LDURQi:
1701  case AArch64::STURQi:
1702  Width = 16;
1703  Scale = 1;
1704  break;
1705  case AArch64::LDURXi:
1706  case AArch64::LDURDi:
1707  case AArch64::STURXi:
1708  case AArch64::STURDi:
1709  Width = 8;
1710  Scale = 1;
1711  break;
1712  case AArch64::LDURWi:
1713  case AArch64::LDURSi:
1714  case AArch64::LDURSWi:
1715  case AArch64::STURWi:
1716  case AArch64::STURSi:
1717  Width = 4;
1718  Scale = 1;
1719  break;
1720  case AArch64::LDURHi:
1721  case AArch64::LDURHHi:
1722  case AArch64::LDURSHXi:
1723  case AArch64::LDURSHWi:
1724  case AArch64::STURHi:
1725  case AArch64::STURHHi:
1726  Width = 2;
1727  Scale = 1;
1728  break;
1729  case AArch64::LDURBi:
1730  case AArch64::LDURBBi:
1731  case AArch64::LDURSBXi:
1732  case AArch64::LDURSBWi:
1733  case AArch64::STURBi:
1734  case AArch64::STURBBi:
1735  Width = 1;
1736  Scale = 1;
1737  break;
1738  case AArch64::LDPQi:
1739  case AArch64::LDNPQi:
1740  case AArch64::STPQi:
1741  case AArch64::STNPQi:
1742  Scale = 16;
1743  Width = 32;
1744  break;
1745  case AArch64::LDRQui:
1746  case AArch64::STRQui:
1747  Scale = Width = 16;
1748  break;
1749  case AArch64::LDPXi:
1750  case AArch64::LDPDi:
1751  case AArch64::LDNPXi:
1752  case AArch64::LDNPDi:
1753  case AArch64::STPXi:
1754  case AArch64::STPDi:
1755  case AArch64::STNPXi:
1756  case AArch64::STNPDi:
1757  Scale = 8;
1758  Width = 16;
1759  break;
1760  case AArch64::LDRXui:
1761  case AArch64::LDRDui:
1762  case AArch64::STRXui:
1763  case AArch64::STRDui:
1764  Scale = Width = 8;
1765  break;
1766  case AArch64::LDPWi:
1767  case AArch64::LDPSi:
1768  case AArch64::LDNPWi:
1769  case AArch64::LDNPSi:
1770  case AArch64::STPWi:
1771  case AArch64::STPSi:
1772  case AArch64::STNPWi:
1773  case AArch64::STNPSi:
1774  Scale = 4;
1775  Width = 8;
1776  break;
1777  case AArch64::LDRWui:
1778  case AArch64::LDRSui:
1779  case AArch64::LDRSWui:
1780  case AArch64::STRWui:
1781  case AArch64::STRSui:
1782  Scale = Width = 4;
1783  break;
1784  case AArch64::LDRHui:
1785  case AArch64::LDRHHui:
1786  case AArch64::STRHui:
1787  case AArch64::STRHHui:
1788  Scale = Width = 2;
1789  break;
1790  case AArch64::LDRBui:
1791  case AArch64::LDRBBui:
1792  case AArch64::STRBui:
1793  case AArch64::STRBBui:
1794  Scale = Width = 1;
1795  break;
1796  }
1797 
1798  if (LdSt.getNumExplicitOperands() == 3) {
1799  BaseReg = LdSt.getOperand(1).getReg();
1800  Offset = LdSt.getOperand(2).getImm() * Scale;
1801  } else {
1802  assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
1803  BaseReg = LdSt.getOperand(2).getReg();
1804  Offset = LdSt.getOperand(3).getImm() * Scale;
1805  }
1806  return true;
1807 }
1808 
1809 // Scale the unscaled offsets. Returns false if the unscaled offset can't be
1810 // scaled.
1811 static bool scaleOffset(unsigned Opc, int64_t &Offset) {
1812  unsigned OffsetStride = 1;
1813  switch (Opc) {
1814  default:
1815  return false;
1816  case AArch64::LDURQi:
1817  case AArch64::STURQi:
1818  OffsetStride = 16;
1819  break;
1820  case AArch64::LDURXi:
1821  case AArch64::LDURDi:
1822  case AArch64::STURXi:
1823  case AArch64::STURDi:
1824  OffsetStride = 8;
1825  break;
1826  case AArch64::LDURWi:
1827  case AArch64::LDURSi:
1828  case AArch64::LDURSWi:
1829  case AArch64::STURWi:
1830  case AArch64::STURSi:
1831  OffsetStride = 4;
1832  break;
1833  }
1834  // If the byte-offset isn't a multiple of the stride, we can't scale this
1835  // offset.
1836  if (Offset % OffsetStride != 0)
1837  return false;
1838 
1839  // Convert the byte-offset used by unscaled into an "element" offset used
1840  // by the scaled pair load/store instructions.
1841  Offset /= OffsetStride;
1842  return true;
1843 }
1844 
1845 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
1846  if (FirstOpc == SecondOpc)
1847  return true;
1848  // We can also pair sign-ext and zero-ext instructions.
1849  switch (FirstOpc) {
1850  default:
1851  return false;
1852  case AArch64::LDRWui:
1853  case AArch64::LDURWi:
1854  return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
1855  case AArch64::LDRSWui:
1856  case AArch64::LDURSWi:
1857  return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
1858  }
1859  // These instructions can't be paired based on their opcodes.
1860  return false;
1861 }
1862 
1863 /// Detect opportunities for ldp/stp formation.
1864 ///
1865 /// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
1867  MachineInstr &SecondLdSt,
1868  unsigned NumLoads) const {
1869  // Only cluster up to a single pair.
1870  if (NumLoads > 1)
1871  return false;
1872 
1873  if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
1874  return false;
1875 
1876  // Can we pair these instructions based on their opcodes?
1877  unsigned FirstOpc = FirstLdSt.getOpcode();
1878  unsigned SecondOpc = SecondLdSt.getOpcode();
1879  if (!canPairLdStOpc(FirstOpc, SecondOpc))
1880  return false;
1881 
1882  // Can't merge volatiles or load/stores that have a hint to avoid pair
1883  // formation, for example.
1884  if (!isCandidateToMergeOrPair(FirstLdSt) ||
1885  !isCandidateToMergeOrPair(SecondLdSt))
1886  return false;
1887 
1888  // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
1889  int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
1890  if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
1891  return false;
1892 
1893  int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
1894  if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
1895  return false;
1896 
1897  // Pairwise instructions have a 7-bit signed offset field.
1898  if (Offset1 > 63 || Offset1 < -64)
1899  return false;
1900 
1901  // The caller should already have ordered First/SecondLdSt by offset.
1902  assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
1903  return Offset1 + 1 == Offset2;
1904 }
1905 
1907  const MachineInstr &First, const MachineInstr &Second) const {
1908  if (Subtarget.hasArithmeticBccFusion()) {
1909  // Fuse CMN, CMP, TST followed by Bcc.
1910  unsigned SecondOpcode = Second.getOpcode();
1911  if (SecondOpcode == AArch64::Bcc) {
1912  switch (First.getOpcode()) {
1913  default:
1914  return false;
1915  case AArch64::ADDSWri:
1916  case AArch64::ADDSWrr:
1917  case AArch64::ADDSXri:
1918  case AArch64::ADDSXrr:
1919  case AArch64::ANDSWri:
1920  case AArch64::ANDSWrr:
1921  case AArch64::ANDSXri:
1922  case AArch64::ANDSXrr:
1923  case AArch64::SUBSWri:
1924  case AArch64::SUBSWrr:
1925  case AArch64::SUBSXri:
1926  case AArch64::SUBSXrr:
1927  case AArch64::BICSWrr:
1928  case AArch64::BICSXrr:
1929  return true;
1930  case AArch64::ADDSWrs:
1931  case AArch64::ADDSXrs:
1932  case AArch64::ANDSWrs:
1933  case AArch64::ANDSXrs:
1934  case AArch64::SUBSWrs:
1935  case AArch64::SUBSXrs:
1936  case AArch64::BICSWrs:
1937  case AArch64::BICSXrs:
1938  // Shift value can be 0 making these behave like the "rr" variant...
1939  return !hasShiftedReg(Second);
1940  }
1941  }
1942  }
1943  if (Subtarget.hasArithmeticCbzFusion()) {
1944  // Fuse ALU operations followed by CBZ/CBNZ.
1945  unsigned SecondOpcode = Second.getOpcode();
1946  if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
1947  SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {
1948  switch (First.getOpcode()) {
1949  default:
1950  return false;
1951  case AArch64::ADDWri:
1952  case AArch64::ADDWrr:
1953  case AArch64::ADDXri:
1954  case AArch64::ADDXrr:
1955  case AArch64::ANDWri:
1956  case AArch64::ANDWrr:
1957  case AArch64::ANDXri:
1958  case AArch64::ANDXrr:
1959  case AArch64::EORWri:
1960  case AArch64::EORWrr:
1961  case AArch64::EORXri:
1962  case AArch64::EORXrr:
1963  case AArch64::ORRWri:
1964  case AArch64::ORRWrr:
1965  case AArch64::ORRXri:
1966  case AArch64::ORRXrr:
1967  case AArch64::SUBWri:
1968  case AArch64::SUBWrr:
1969  case AArch64::SUBXri:
1970  case AArch64::SUBXrr:
1971  return true;
1972  case AArch64::ADDWrs:
1973  case AArch64::ADDXrs:
1974  case AArch64::ANDWrs:
1975  case AArch64::ANDXrs:
1976  case AArch64::SUBWrs:
1977  case AArch64::SUBXrs:
1978  case AArch64::BICWrs:
1979  case AArch64::BICXrs:
1980  // Shift value can be 0 making these behave like the "rr" variant...
1981  return !hasShiftedReg(Second);
1982  }
1983  }
1984  }
1985  return false;
1986 }
1987 
1989  MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var,
1990  const MDNode *Expr, const DebugLoc &DL) const {
1991  MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE))
1992  .addFrameIndex(FrameIx)
1993  .addImm(0)
1994  .addImm(Offset)
1995  .addMetadata(Var)
1996  .addMetadata(Expr);
1997  return &*MIB;
1998 }
1999 
2001  unsigned Reg, unsigned SubIdx,
2002  unsigned State,
2003  const TargetRegisterInfo *TRI) {
2004  if (!SubIdx)
2005  return MIB.addReg(Reg, State);
2006 
2008  return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
2009  return MIB.addReg(Reg, State, SubIdx);
2010 }
2011 
2012 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
2013  unsigned NumRegs) {
2014  // We really want the positive remainder mod 32 here, that happens to be
2015  // easily obtainable with a mask.
2016  return ((DestReg - SrcReg) & 0x1f) < NumRegs;
2017 }
2018 
2021  unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode,
2022  ArrayRef<unsigned> Indices) const {
2023  assert(Subtarget.hasNEON() &&
2024  "Unexpected register copy without NEON");
2025  const TargetRegisterInfo *TRI = &getRegisterInfo();
2026  uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2027  uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2028  unsigned NumRegs = Indices.size();
2029 
2030  int SubReg = 0, End = NumRegs, Incr = 1;
2031  if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
2032  SubReg = NumRegs - 1;
2033  End = -1;
2034  Incr = -1;
2035  }
2036 
2037  for (; SubReg != End; SubReg += Incr) {
2038  const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2039  AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2040  AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
2041  AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2042  }
2043 }
2044 
2047  const DebugLoc &DL, unsigned DestReg,
2048  unsigned SrcReg, bool KillSrc) const {
2049  if (AArch64::GPR32spRegClass.contains(DestReg) &&
2050  (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
2051  const TargetRegisterInfo *TRI = &getRegisterInfo();
2052 
2053  if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
2054  // If either operand is WSP, expand to ADD #0.
2055  if (Subtarget.hasZeroCycleRegMove()) {
2056  // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
2057  unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2058  &AArch64::GPR64spRegClass);
2059  unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2060  &AArch64::GPR64spRegClass);
2061  // This instruction is reading and writing X registers. This may upset
2062  // the register scavenger and machine verifier, so we need to indicate
2063  // that we are reading an undefined value from SrcRegX, but a proper
2064  // value from SrcReg.
2065  BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
2066  .addReg(SrcRegX, RegState::Undef)
2067  .addImm(0)
2069  .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2070  } else {
2071  BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
2072  .addReg(SrcReg, getKillRegState(KillSrc))
2073  .addImm(0)
2075  }
2076  } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
2077  BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm(
2079  } else {
2080  if (Subtarget.hasZeroCycleRegMove()) {
2081  // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
2082  unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2083  &AArch64::GPR64spRegClass);
2084  unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2085  &AArch64::GPR64spRegClass);
2086  // This instruction is reading and writing X registers. This may upset
2087  // the register scavenger and machine verifier, so we need to indicate
2088  // that we are reading an undefined value from SrcRegX, but a proper
2089  // value from SrcReg.
2090  BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
2091  .addReg(AArch64::XZR)
2092  .addReg(SrcRegX, RegState::Undef)
2093  .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2094  } else {
2095  // Otherwise, expand to ORR WZR.
2096  BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
2097  .addReg(AArch64::WZR)
2098  .addReg(SrcReg, getKillRegState(KillSrc));
2099  }
2100  }
2101  return;
2102  }
2103 
2104  if (AArch64::GPR64spRegClass.contains(DestReg) &&
2105  (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
2106  if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
2107  // If either operand is SP, expand to ADD #0.
2108  BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
2109  .addReg(SrcReg, getKillRegState(KillSrc))
2110  .addImm(0)
2112  } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
2113  BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm(
2115  } else {
2116  // Otherwise, expand to ORR XZR.
2117  BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
2118  .addReg(AArch64::XZR)
2119  .addReg(SrcReg, getKillRegState(KillSrc));
2120  }
2121  return;
2122  }
2123 
2124  // Copy a DDDD register quad by copying the individual sub-registers.
2125  if (AArch64::DDDDRegClass.contains(DestReg) &&
2126  AArch64::DDDDRegClass.contains(SrcReg)) {
2127  static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
2128  AArch64::dsub2, AArch64::dsub3 };
2129  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2130  Indices);
2131  return;
2132  }
2133 
2134  // Copy a DDD register triple by copying the individual sub-registers.
2135  if (AArch64::DDDRegClass.contains(DestReg) &&
2136  AArch64::DDDRegClass.contains(SrcReg)) {
2137  static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
2138  AArch64::dsub2 };
2139  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2140  Indices);
2141  return;
2142  }
2143 
2144  // Copy a DD register pair by copying the individual sub-registers.
2145  if (AArch64::DDRegClass.contains(DestReg) &&
2146  AArch64::DDRegClass.contains(SrcReg)) {
2147  static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 };
2148  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2149  Indices);
2150  return;
2151  }
2152 
2153  // Copy a QQQQ register quad by copying the individual sub-registers.
2154  if (AArch64::QQQQRegClass.contains(DestReg) &&
2155  AArch64::QQQQRegClass.contains(SrcReg)) {
2156  static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
2157  AArch64::qsub2, AArch64::qsub3 };
2158  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2159  Indices);
2160  return;
2161  }
2162 
2163  // Copy a QQQ register triple by copying the individual sub-registers.
2164  if (AArch64::QQQRegClass.contains(DestReg) &&
2165  AArch64::QQQRegClass.contains(SrcReg)) {
2166  static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
2167  AArch64::qsub2 };
2168  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2169  Indices);
2170  return;
2171  }
2172 
2173  // Copy a QQ register pair by copying the individual sub-registers.
2174  if (AArch64::QQRegClass.contains(DestReg) &&
2175  AArch64::QQRegClass.contains(SrcReg)) {
2176  static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 };
2177  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2178  Indices);
2179  return;
2180  }
2181 
2182  if (AArch64::FPR128RegClass.contains(DestReg) &&
2183  AArch64::FPR128RegClass.contains(SrcReg)) {
2184  if(Subtarget.hasNEON()) {
2185  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2186  .addReg(SrcReg)
2187  .addReg(SrcReg, getKillRegState(KillSrc));
2188  } else {
2189  BuildMI(MBB, I, DL, get(AArch64::STRQpre))
2190  .addReg(AArch64::SP, RegState::Define)
2191  .addReg(SrcReg, getKillRegState(KillSrc))
2192  .addReg(AArch64::SP)
2193  .addImm(-16);
2194  BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
2195  .addReg(AArch64::SP, RegState::Define)
2196  .addReg(DestReg, RegState::Define)
2197  .addReg(AArch64::SP)
2198  .addImm(16);
2199  }
2200  return;
2201  }
2202 
2203  if (AArch64::FPR64RegClass.contains(DestReg) &&
2204  AArch64::FPR64RegClass.contains(SrcReg)) {
2205  if(Subtarget.hasNEON()) {
2206  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2207  &AArch64::FPR128RegClass);
2208  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2209  &AArch64::FPR128RegClass);
2210  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2211  .addReg(SrcReg)
2212  .addReg(SrcReg, getKillRegState(KillSrc));
2213  } else {
2214  BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2215  .addReg(SrcReg, getKillRegState(KillSrc));
2216  }
2217  return;
2218  }
2219 
2220  if (AArch64::FPR32RegClass.contains(DestReg) &&
2221  AArch64::FPR32RegClass.contains(SrcReg)) {
2222  if(Subtarget.hasNEON()) {
2223  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2224  &AArch64::FPR128RegClass);
2225  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2226  &AArch64::FPR128RegClass);
2227  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2228  .addReg(SrcReg)
2229  .addReg(SrcReg, getKillRegState(KillSrc));
2230  } else {
2231  BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2232  .addReg(SrcReg, getKillRegState(KillSrc));
2233  }
2234  return;
2235  }
2236 
2237  if (AArch64::FPR16RegClass.contains(DestReg) &&
2238  AArch64::FPR16RegClass.contains(SrcReg)) {
2239  if(Subtarget.hasNEON()) {
2240  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2241  &AArch64::FPR128RegClass);
2242  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2243  &AArch64::FPR128RegClass);
2244  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2245  .addReg(SrcReg)
2246  .addReg(SrcReg, getKillRegState(KillSrc));
2247  } else {
2248  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2249  &AArch64::FPR32RegClass);
2250  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2251  &AArch64::FPR32RegClass);
2252  BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2253  .addReg(SrcReg, getKillRegState(KillSrc));
2254  }
2255  return;
2256  }
2257 
2258  if (AArch64::FPR8RegClass.contains(DestReg) &&
2259  AArch64::FPR8RegClass.contains(SrcReg)) {
2260  if(Subtarget.hasNEON()) {
2261  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2262  &AArch64::FPR128RegClass);
2263  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2264  &AArch64::FPR128RegClass);
2265  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2266  .addReg(SrcReg)
2267  .addReg(SrcReg, getKillRegState(KillSrc));
2268  } else {
2269  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2270  &AArch64::FPR32RegClass);
2271  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2272  &AArch64::FPR32RegClass);
2273  BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2274  .addReg(SrcReg, getKillRegState(KillSrc));
2275  }
2276  return;
2277  }
2278 
2279  // Copies between GPR64 and FPR64.
2280  if (AArch64::FPR64RegClass.contains(DestReg) &&
2281  AArch64::GPR64RegClass.contains(SrcReg)) {
2282  BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2283  .addReg(SrcReg, getKillRegState(KillSrc));
2284  return;
2285  }
2286  if (AArch64::GPR64RegClass.contains(DestReg) &&
2287  AArch64::FPR64RegClass.contains(SrcReg)) {
2288  BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2289  .addReg(SrcReg, getKillRegState(KillSrc));
2290  return;
2291  }
2292  // Copies between GPR32 and FPR32.
2293  if (AArch64::FPR32RegClass.contains(DestReg) &&
2294  AArch64::GPR32RegClass.contains(SrcReg)) {
2295  BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2296  .addReg(SrcReg, getKillRegState(KillSrc));
2297  return;
2298  }
2299  if (AArch64::GPR32RegClass.contains(DestReg) &&
2300  AArch64::FPR32RegClass.contains(SrcReg)) {
2301  BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2302  .addReg(SrcReg, getKillRegState(KillSrc));
2303  return;
2304  }
2305 
2306  if (DestReg == AArch64::NZCV) {
2307  assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2308  BuildMI(MBB, I, DL, get(AArch64::MSR))
2309  .addImm(AArch64SysReg::NZCV)
2310  .addReg(SrcReg, getKillRegState(KillSrc))
2311  .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
2312  return;
2313  }
2314 
2315  if (SrcReg == AArch64::NZCV) {
2316  assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
2317  BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
2318  .addImm(AArch64SysReg::NZCV)
2319  .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
2320  return;
2321  }
2322 
2323  llvm_unreachable("unimplemented reg-to-reg copy");
2324 }
2325 
2327  MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2328  bool isKill, int FI, const TargetRegisterClass *RC,
2329  const TargetRegisterInfo *TRI) const {
2330  DebugLoc DL;
2331  if (MBBI != MBB.end())
2332  DL = MBBI->getDebugLoc();
2333  MachineFunction &MF = *MBB.getParent();
2334  MachineFrameInfo &MFI = MF.getFrameInfo();
2335  unsigned Align = MFI.getObjectAlignment(FI);
2336 
2339  PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2340  unsigned Opc = 0;
2341  bool Offset = true;
2342  switch (RC->getSize()) {
2343  case 1:
2344  if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2345  Opc = AArch64::STRBui;
2346  break;
2347  case 2:
2348  if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2349  Opc = AArch64::STRHui;
2350  break;
2351  case 4:
2352  if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2353  Opc = AArch64::STRWui;
2355  MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2356  else
2357  assert(SrcReg != AArch64::WSP);
2358  } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2359  Opc = AArch64::STRSui;
2360  break;
2361  case 8:
2362  if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2363  Opc = AArch64::STRXui;
2365  MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2366  else
2367  assert(SrcReg != AArch64::SP);
2368  } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2369  Opc = AArch64::STRDui;
2370  break;
2371  case 16:
2372  if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2373  Opc = AArch64::STRQui;
2374  else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2375  assert(Subtarget.hasNEON() &&
2376  "Unexpected register store without NEON");
2377  Opc = AArch64::ST1Twov1d;
2378  Offset = false;
2379  }
2380  break;
2381  case 24:
2382  if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2383  assert(Subtarget.hasNEON() &&
2384  "Unexpected register store without NEON");
2385  Opc = AArch64::ST1Threev1d;
2386  Offset = false;
2387  }
2388  break;
2389  case 32:
2390  if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2391  assert(Subtarget.hasNEON() &&
2392  "Unexpected register store without NEON");
2393  Opc = AArch64::ST1Fourv1d;
2394  Offset = false;
2395  } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2396  assert(Subtarget.hasNEON() &&
2397  "Unexpected register store without NEON");
2398  Opc = AArch64::ST1Twov2d;
2399  Offset = false;
2400  }
2401  break;
2402  case 48:
2403  if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2404  assert(Subtarget.hasNEON() &&
2405  "Unexpected register store without NEON");
2406  Opc = AArch64::ST1Threev2d;
2407  Offset = false;
2408  }
2409  break;
2410  case 64:
2411  if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2412  assert(Subtarget.hasNEON() &&
2413  "Unexpected register store without NEON");
2414  Opc = AArch64::ST1Fourv2d;
2415  Offset = false;
2416  }
2417  break;
2418  }
2419  assert(Opc && "Unknown register class");
2420 
2421  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
2422  .addReg(SrcReg, getKillRegState(isKill))
2423  .addFrameIndex(FI);
2424 
2425  if (Offset)
2426  MI.addImm(0);
2427  MI.addMemOperand(MMO);
2428 }
2429 
2431  MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2432  int FI, const TargetRegisterClass *RC,
2433  const TargetRegisterInfo *TRI) const {
2434  DebugLoc DL;
2435  if (MBBI != MBB.end())
2436  DL = MBBI->getDebugLoc();
2437  MachineFunction &MF = *MBB.getParent();
2438  MachineFrameInfo &MFI = MF.getFrameInfo();
2439  unsigned Align = MFI.getObjectAlignment(FI);
2442  PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2443 
2444  unsigned Opc = 0;
2445  bool Offset = true;
2446  switch (RC->getSize()) {
2447  case 1:
2448  if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2449  Opc = AArch64::LDRBui;
2450  break;
2451  case 2:
2452  if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2453  Opc = AArch64::LDRHui;
2454  break;
2455  case 4:
2456  if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2457  Opc = AArch64::LDRWui;
2459  MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
2460  else
2461  assert(DestReg != AArch64::WSP);
2462  } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2463  Opc = AArch64::LDRSui;
2464  break;
2465  case 8:
2466  if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2467  Opc = AArch64::LDRXui;
2469  MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
2470  else
2471  assert(DestReg != AArch64::SP);
2472  } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2473  Opc = AArch64::LDRDui;
2474  break;
2475  case 16:
2476  if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2477  Opc = AArch64::LDRQui;
2478  else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2479  assert(Subtarget.hasNEON() &&
2480  "Unexpected register load without NEON");
2481  Opc = AArch64::LD1Twov1d;
2482  Offset = false;
2483  }
2484  break;
2485  case 24:
2486  if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2487  assert(Subtarget.hasNEON() &&
2488  "Unexpected register load without NEON");
2489  Opc = AArch64::LD1Threev1d;
2490  Offset = false;
2491  }
2492  break;
2493  case 32:
2494  if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2495  assert(Subtarget.hasNEON() &&
2496  "Unexpected register load without NEON");
2497  Opc = AArch64::LD1Fourv1d;
2498  Offset = false;
2499  } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2500  assert(Subtarget.hasNEON() &&
2501  "Unexpected register load without NEON");
2502  Opc = AArch64::LD1Twov2d;
2503  Offset = false;
2504  }
2505  break;
2506  case 48:
2507  if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2508  assert(Subtarget.hasNEON() &&
2509  "Unexpected register load without NEON");
2510  Opc = AArch64::LD1Threev2d;
2511  Offset = false;
2512  }
2513  break;
2514  case 64:
2515  if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2516  assert(Subtarget.hasNEON() &&
2517  "Unexpected register load without NEON");
2518  Opc = AArch64::LD1Fourv2d;
2519  Offset = false;
2520  }
2521  break;
2522  }
2523  assert(Opc && "Unknown register class");
2524 
2525  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
2526  .addReg(DestReg, getDefRegState(true))
2527  .addFrameIndex(FI);
2528  if (Offset)
2529  MI.addImm(0);
2530  MI.addMemOperand(MMO);
2531 }
2532 
2534  MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
2535  unsigned DestReg, unsigned SrcReg, int Offset,
2536  const TargetInstrInfo *TII,
2537  MachineInstr::MIFlag Flag, bool SetNZCV) {
2538  if (DestReg == SrcReg && Offset == 0)
2539  return;
2540 
2541  assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
2542  "SP increment/decrement not 16-byte aligned");
2543 
2544  bool isSub = Offset < 0;
2545  if (isSub)
2546  Offset = -Offset;
2547 
2548  // FIXME: If the offset won't fit in 24-bits, compute the offset into a
2549  // scratch register. If DestReg is a virtual register, use it as the
2550  // scratch register; otherwise, create a new virtual register (to be
2551  // replaced by the scavenger at the end of PEI). That case can be optimized
2552  // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
2553  // register can be loaded with offset%8 and the add/sub can use an extending
2554  // instruction with LSL#3.
2555  // Currently the function handles any offsets but generates a poor sequence
2556  // of code.
2557  // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
2558 
2559  unsigned Opc;
2560  if (SetNZCV)
2561  Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
2562  else
2563  Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
2564  const unsigned MaxEncoding = 0xfff;
2565  const unsigned ShiftSize = 12;
2566  const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
2567  while (((unsigned)Offset) >= (1 << ShiftSize)) {
2568  unsigned ThisVal;
2569  if (((unsigned)Offset) > MaxEncodableValue) {
2570  ThisVal = MaxEncodableValue;
2571  } else {
2572  ThisVal = Offset & MaxEncodableValue;
2573  }
2574  assert((ThisVal >> ShiftSize) <= MaxEncoding &&
2575  "Encoding cannot handle value that big");
2576  BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2577  .addReg(SrcReg)
2578  .addImm(ThisVal >> ShiftSize)
2580  .setMIFlag(Flag);
2581 
2582  SrcReg = DestReg;
2583  Offset -= ThisVal;
2584  if (Offset == 0)
2585  return;
2586  }
2587  BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2588  .addReg(SrcReg)
2589  .addImm(Offset)
2591  .setMIFlag(Flag);
2592 }
2593 
2597  LiveIntervals *LIS) const {
2598  // This is a bit of a hack. Consider this instruction:
2599  //
2600  // %vreg0<def> = COPY %SP; GPR64all:%vreg0
2601  //
2602  // We explicitly chose GPR64all for the virtual register so such a copy might
2603  // be eliminated by RegisterCoalescer. However, that may not be possible, and
2604  // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all
2605  // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
2606  //
2607  // To prevent that, we are going to constrain the %vreg0 register class here.
2608  //
2609  // <rdar://problem/11522048>
2610  //
2611  if (MI.isFullCopy()) {
2612  unsigned DstReg = MI.getOperand(0).getReg();
2613  unsigned SrcReg = MI.getOperand(1).getReg();
2614  if (SrcReg == AArch64::SP &&
2616  MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
2617  return nullptr;
2618  }
2619  if (DstReg == AArch64::SP &&
2621  MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2622  return nullptr;
2623  }
2624  }
2625 
2626  // Handle the case where a copy is being spilled or filled but the source
2627  // and destination register class don't match. For example:
2628  //
2629  // %vreg0<def> = COPY %XZR; GPR64common:%vreg0
2630  //
2631  // In this case we can still safely fold away the COPY and generate the
2632  // following spill code:
2633  //
2634  // STRXui %XZR, <fi#0>
2635  //
2636  // This also eliminates spilled cross register class COPYs (e.g. between x and
2637  // d regs) of the same size. For example:
2638  //
2639  // %vreg0<def> = COPY %vreg1; GPR64:%vreg0, FPR64:%vreg1
2640  //
2641  // will be filled as
2642  //
2643  // LDRDui %vreg0, fi<#0>
2644  //
2645  // instead of
2646  //
2647  // LDRXui %vregTemp, fi<#0>
2648  // %vreg0 = FMOV %vregTemp
2649  //
2650  if (MI.isCopy() && Ops.size() == 1 &&
2651  // Make sure we're only folding the explicit COPY defs/uses.
2652  (Ops[0] == 0 || Ops[0] == 1)) {
2653  bool IsSpill = Ops[0] == 0;
2654  bool IsFill = !IsSpill;
2655  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
2656  const MachineRegisterInfo &MRI = MF.getRegInfo();
2657  MachineBasicBlock &MBB = *MI.getParent();
2658  const MachineOperand &DstMO = MI.getOperand(0);
2659  const MachineOperand &SrcMO = MI.getOperand(1);
2660  unsigned DstReg = DstMO.getReg();
2661  unsigned SrcReg = SrcMO.getReg();
2662  // This is slightly expensive to compute for physical regs since
2663  // getMinimalPhysRegClass is slow.
2664  auto getRegClass = [&](unsigned Reg) {
2666  ? MRI.getRegClass(Reg)
2667  : TRI.getMinimalPhysRegClass(Reg);
2668  };
2669 
2670  if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
2671  assert(getRegClass(DstReg)->getSize() == getRegClass(SrcReg)->getSize() &&
2672  "Mismatched register size in non subreg COPY");
2673  if (IsSpill)
2674  storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
2675  getRegClass(SrcReg), &TRI);
2676  else
2677  loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
2678  getRegClass(DstReg), &TRI);
2679  return &*--InsertPt;
2680  }
2681 
2682  // Handle cases like spilling def of:
2683  //
2684  // %vreg0:sub_32<def,read-undef> = COPY %WZR; GPR64common:%vreg0
2685  //
2686  // where the physical register source can be widened and stored to the full
2687  // virtual reg destination stack slot, in this case producing:
2688  //
2689  // STRXui %XZR, <fi#0>
2690  //
2691  if (IsSpill && DstMO.isUndef() &&
2693  assert(SrcMO.getSubReg() == 0 &&
2694  "Unexpected subreg on physical register");
2695  const TargetRegisterClass *SpillRC;
2696  unsigned SpillSubreg;
2697  switch (DstMO.getSubReg()) {
2698  default:
2699  SpillRC = nullptr;
2700  break;
2701  case AArch64::sub_32:
2702  case AArch64::ssub:
2703  if (AArch64::GPR32RegClass.contains(SrcReg)) {
2704  SpillRC = &AArch64::GPR64RegClass;
2705  SpillSubreg = AArch64::sub_32;
2706  } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
2707  SpillRC = &AArch64::FPR64RegClass;
2708  SpillSubreg = AArch64::ssub;
2709  } else
2710  SpillRC = nullptr;
2711  break;
2712  case AArch64::dsub:
2713  if (AArch64::FPR64RegClass.contains(SrcReg)) {
2714  SpillRC = &AArch64::FPR128RegClass;
2715  SpillSubreg = AArch64::dsub;
2716  } else
2717  SpillRC = nullptr;
2718  break;
2719  }
2720 
2721  if (SpillRC)
2722  if (unsigned WidenedSrcReg =
2723  TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
2724  storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
2725  FrameIndex, SpillRC, &TRI);
2726  return &*--InsertPt;
2727  }
2728  }
2729 
2730  // Handle cases like filling use of:
2731  //
2732  // %vreg0:sub_32<def,read-undef> = COPY %vreg1; GPR64:%vreg0, GPR32:%vreg1
2733  //
2734  // where we can load the full virtual reg source stack slot, into the subreg
2735  // destination, in this case producing:
2736  //
2737  // LDRWui %vreg0:sub_32<def,read-undef>, <fi#0>
2738  //
2739  if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
2740  const TargetRegisterClass *FillRC;
2741  switch (DstMO.getSubReg()) {
2742  default:
2743  FillRC = nullptr;
2744  break;
2745  case AArch64::sub_32:
2746  FillRC = &AArch64::GPR32RegClass;
2747  break;
2748  case AArch64::ssub:
2749  FillRC = &AArch64::FPR32RegClass;
2750  break;
2751  case AArch64::dsub:
2752  FillRC = &AArch64::FPR64RegClass;
2753  break;
2754  }
2755 
2756  if (FillRC) {
2757  assert(getRegClass(SrcReg)->getSize() == FillRC->getSize() &&
2758  "Mismatched regclass size on folded subreg COPY");
2759  loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
2760  MachineInstr &LoadMI = *--InsertPt;
2761  MachineOperand &LoadDst = LoadMI.getOperand(0);
2762  assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
2763  LoadDst.setSubReg(DstMO.getSubReg());
2764  LoadDst.setIsUndef();
2765  return &LoadMI;
2766  }
2767  }
2768  }
2769 
2770  // Cannot fold.
2771  return nullptr;
2772 }
2773 
2775  bool *OutUseUnscaledOp,
2776  unsigned *OutUnscaledOp,
2777  int *EmittableOffset) {
2778  int Scale = 1;
2779  bool IsSigned = false;
2780  // The ImmIdx should be changed case by case if it is not 2.
2781  unsigned ImmIdx = 2;
2782  unsigned UnscaledOp = 0;
2783  // Set output values in case of early exit.
2784  if (EmittableOffset)
2785  *EmittableOffset = 0;
2786  if (OutUseUnscaledOp)
2787  *OutUseUnscaledOp = false;
2788  if (OutUnscaledOp)
2789  *OutUnscaledOp = 0;
2790  switch (MI.getOpcode()) {
2791  default:
2792  llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
2793  // Vector spills/fills can't take an immediate offset.
2794  case AArch64::LD1Twov2d:
2795  case AArch64::LD1Threev2d:
2796  case AArch64::LD1Fourv2d:
2797  case AArch64::LD1Twov1d:
2798  case AArch64::LD1Threev1d:
2799  case AArch64::LD1Fourv1d:
2800  case AArch64::ST1Twov2d:
2801  case AArch64::ST1Threev2d:
2802  case AArch64::ST1Fourv2d:
2803  case AArch64::ST1Twov1d:
2804  case AArch64::ST1Threev1d:
2805  case AArch64::ST1Fourv1d:
2807  case AArch64::PRFMui:
2808  Scale = 8;
2809  UnscaledOp = AArch64::PRFUMi;
2810  break;
2811  case AArch64::LDRXui:
2812  Scale = 8;
2813  UnscaledOp = AArch64::LDURXi;
2814  break;
2815  case AArch64::LDRWui:
2816  Scale = 4;
2817  UnscaledOp = AArch64::LDURWi;
2818  break;
2819  case AArch64::LDRBui:
2820  Scale = 1;
2821  UnscaledOp = AArch64::LDURBi;
2822  break;
2823  case AArch64::LDRHui:
2824  Scale = 2;
2825  UnscaledOp = AArch64::LDURHi;
2826  break;
2827  case AArch64::LDRSui:
2828  Scale = 4;
2829  UnscaledOp = AArch64::LDURSi;
2830  break;
2831  case AArch64::LDRDui:
2832  Scale = 8;
2833  UnscaledOp = AArch64::LDURDi;
2834  break;
2835  case AArch64::LDRQui:
2836  Scale = 16;
2837  UnscaledOp = AArch64::LDURQi;
2838  break;
2839  case AArch64::LDRBBui:
2840  Scale = 1;
2841  UnscaledOp = AArch64::LDURBBi;
2842  break;
2843  case AArch64::LDRHHui:
2844  Scale = 2;
2845  UnscaledOp = AArch64::LDURHHi;
2846  break;
2847  case AArch64::LDRSBXui:
2848  Scale = 1;
2849  UnscaledOp = AArch64::LDURSBXi;
2850  break;
2851  case AArch64::LDRSBWui:
2852  Scale = 1;
2853  UnscaledOp = AArch64::LDURSBWi;
2854  break;
2855  case AArch64::LDRSHXui:
2856  Scale = 2;
2857  UnscaledOp = AArch64::LDURSHXi;
2858  break;
2859  case AArch64::LDRSHWui:
2860  Scale = 2;
2861  UnscaledOp = AArch64::LDURSHWi;
2862  break;
2863  case AArch64::LDRSWui:
2864  Scale = 4;
2865  UnscaledOp = AArch64::LDURSWi;
2866  break;
2867 
2868  case AArch64::STRXui:
2869  Scale = 8;
2870  UnscaledOp = AArch64::STURXi;
2871  break;
2872  case AArch64::STRWui:
2873  Scale = 4;
2874  UnscaledOp = AArch64::STURWi;
2875  break;
2876  case AArch64::STRBui:
2877  Scale = 1;
2878  UnscaledOp = AArch64::STURBi;
2879  break;
2880  case AArch64::STRHui:
2881  Scale = 2;
2882  UnscaledOp = AArch64::STURHi;
2883  break;
2884  case AArch64::STRSui:
2885  Scale = 4;
2886  UnscaledOp = AArch64::STURSi;
2887  break;
2888  case AArch64::STRDui:
2889  Scale = 8;
2890  UnscaledOp = AArch64::STURDi;
2891  break;
2892  case AArch64::STRQui:
2893  Scale = 16;
2894  UnscaledOp = AArch64::STURQi;
2895  break;
2896  case AArch64::STRBBui:
2897  Scale = 1;
2898  UnscaledOp = AArch64::STURBBi;
2899  break;
2900  case AArch64::STRHHui:
2901  Scale = 2;
2902  UnscaledOp = AArch64::STURHHi;
2903  break;
2904 
2905  case AArch64::LDPXi:
2906  case AArch64::LDPDi:
2907  case AArch64::STPXi:
2908  case AArch64::STPDi:
2909  case AArch64::LDNPXi:
2910  case AArch64::LDNPDi:
2911  case AArch64::STNPXi:
2912  case AArch64::STNPDi:
2913  ImmIdx = 3;
2914  IsSigned = true;
2915  Scale = 8;
2916  break;
2917  case AArch64::LDPQi:
2918  case AArch64::STPQi:
2919  case AArch64::LDNPQi:
2920  case AArch64::STNPQi:
2921  ImmIdx = 3;
2922  IsSigned = true;
2923  Scale = 16;
2924  break;
2925  case AArch64::LDPWi:
2926  case AArch64::LDPSi:
2927  case AArch64::STPWi:
2928  case AArch64::STPSi:
2929  case AArch64::LDNPWi:
2930  case AArch64::LDNPSi:
2931  case AArch64::STNPWi:
2932  case AArch64::STNPSi:
2933  ImmIdx = 3;
2934  IsSigned = true;
2935  Scale = 4;
2936  break;
2937 
2938  case AArch64::LDURXi:
2939  case AArch64::LDURWi:
2940  case AArch64::LDURBi:
2941  case AArch64::LDURHi:
2942  case AArch64::LDURSi:
2943  case AArch64::LDURDi:
2944  case AArch64::LDURQi:
2945  case AArch64::LDURHHi:
2946  case AArch64::LDURBBi:
2947  case AArch64::LDURSBXi:
2948  case AArch64::LDURSBWi:
2949  case AArch64::LDURSHXi:
2950  case AArch64::LDURSHWi:
2951  case AArch64::LDURSWi:
2952  case AArch64::STURXi:
2953  case AArch64::STURWi:
2954  case AArch64::STURBi:
2955  case AArch64::STURHi:
2956  case AArch64::STURSi:
2957  case AArch64::STURDi:
2958  case AArch64::STURQi:
2959  case AArch64::STURBBi:
2960  case AArch64::STURHHi:
2961  Scale = 1;
2962  break;
2963  }
2964 
2965  Offset += MI.getOperand(ImmIdx).getImm() * Scale;
2966 
2967  bool useUnscaledOp = false;
2968  // If the offset doesn't match the scale, we rewrite the instruction to
2969  // use the unscaled instruction instead. Likewise, if we have a negative
2970  // offset (and have an unscaled op to use).
2971  if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
2972  useUnscaledOp = true;
2973 
2974  // Use an unscaled addressing mode if the instruction has a negative offset
2975  // (or if the instruction is already using an unscaled addressing mode).
2976  unsigned MaskBits;
2977  if (IsSigned) {
2978  // ldp/stp instructions.
2979  MaskBits = 7;
2980  Offset /= Scale;
2981  } else if (UnscaledOp == 0 || useUnscaledOp) {
2982  MaskBits = 9;
2983  IsSigned = true;
2984  Scale = 1;
2985  } else {
2986  MaskBits = 12;
2987  IsSigned = false;
2988  Offset /= Scale;
2989  }
2990 
2991  // Attempt to fold address computation.
2992  int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
2993  int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
2994  if (Offset >= MinOff && Offset <= MaxOff) {
2995  if (EmittableOffset)
2996  *EmittableOffset = Offset;
2997  Offset = 0;
2998  } else {
2999  int NewOff = Offset < 0 ? MinOff : MaxOff;
3000  if (EmittableOffset)
3001  *EmittableOffset = NewOff;
3002  Offset = (Offset - NewOff) * Scale;
3003  }
3004  if (OutUseUnscaledOp)
3005  *OutUseUnscaledOp = useUnscaledOp;
3006  if (OutUnscaledOp)
3007  *OutUnscaledOp = UnscaledOp;
3009  (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
3010 }
3011 
3012 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
3013  unsigned FrameReg, int &Offset,
3014  const AArch64InstrInfo *TII) {
3015  unsigned Opcode = MI.getOpcode();
3016  unsigned ImmIdx = FrameRegIdx + 1;
3017 
3018  if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
3019  Offset += MI.getOperand(ImmIdx).getImm();
3020  emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
3021  MI.getOperand(0).getReg(), FrameReg, Offset, TII,
3022  MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
3023  MI.eraseFromParent();
3024  Offset = 0;
3025  return true;
3026  }
3027 
3028  int NewOffset;
3029  unsigned UnscaledOp;
3030  bool UseUnscaledOp;
3031  int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
3032  &UnscaledOp, &NewOffset);
3033  if (Status & AArch64FrameOffsetCanUpdate) {
3034  if (Status & AArch64FrameOffsetIsLegal)
3035  // Replace the FrameIndex with FrameReg.
3036  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
3037  if (UseUnscaledOp)
3038  MI.setDesc(TII->get(UnscaledOp));
3039 
3040  MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
3041  return Offset == 0;
3042  }
3043 
3044  return false;
3045 }
3046 
3048  NopInst.setOpcode(AArch64::HINT);
3049  NopInst.addOperand(MCOperand::createImm(0));
3050 }
3051 
3052 // AArch64 supports MachineCombiner.
3054 
3055  return true;
3056 }
3057 
3058 // True when Opc sets flag
3059 static bool isCombineInstrSettingFlag(unsigned Opc) {
3060  switch (Opc) {
3061  case AArch64::ADDSWrr:
3062  case AArch64::ADDSWri:
3063  case AArch64::ADDSXrr:
3064  case AArch64::ADDSXri:
3065  case AArch64::SUBSWrr:
3066  case AArch64::SUBSXrr:
3067  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3068  case AArch64::SUBSWri:
3069  case AArch64::SUBSXri:
3070  return true;
3071  default:
3072  break;
3073  }
3074  return false;
3075 }
3076 
3077 // 32b Opcodes that can be combined with a MUL
3078 static bool isCombineInstrCandidate32(unsigned Opc) {
3079  switch (Opc) {
3080  case AArch64::ADDWrr:
3081  case AArch64::ADDWri:
3082  case AArch64::SUBWrr:
3083  case AArch64::ADDSWrr:
3084  case AArch64::ADDSWri:
3085  case AArch64::SUBSWrr:
3086  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3087  case AArch64::SUBWri:
3088  case AArch64::SUBSWri:
3089  return true;
3090  default:
3091  break;
3092  }
3093  return false;
3094 }
3095 
3096 // 64b Opcodes that can be combined with a MUL
3097 static bool isCombineInstrCandidate64(unsigned Opc) {
3098  switch (Opc) {
3099  case AArch64::ADDXrr:
3100  case AArch64::ADDXri:
3101  case AArch64::SUBXrr:
3102  case AArch64::ADDSXrr:
3103  case AArch64::ADDSXri:
3104  case AArch64::SUBSXrr:
3105  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3106  case AArch64::SUBXri:
3107  case AArch64::SUBSXri:
3108  return true;
3109  default:
3110  break;
3111  }
3112  return false;
3113 }
3114 
3115 // FP Opcodes that can be combined with a FMUL
3116 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
3117  switch (Inst.getOpcode()) {
3118  default:
3119  break;
3120  case AArch64::FADDSrr:
3121  case AArch64::FADDDrr:
3122  case AArch64::FADDv2f32:
3123  case AArch64::FADDv2f64:
3124  case AArch64::FADDv4f32:
3125  case AArch64::FSUBSrr:
3126  case AArch64::FSUBDrr:
3127  case AArch64::FSUBv2f32:
3128  case AArch64::FSUBv2f64:
3129  case AArch64::FSUBv4f32:
3130  TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
3131  return (Options.UnsafeFPMath ||
3132  Options.AllowFPOpFusion == FPOpFusion::Fast);
3133  }
3134  return false;
3135 }
3136 
3137 // Opcodes that can be combined with a MUL
3138 static bool isCombineInstrCandidate(unsigned Opc) {
3140 }
3141 
3142 //
3143 // Utility routine that checks if \param MO is defined by an
3144 // \param CombineOpc instruction in the basic block \param MBB
3146  unsigned CombineOpc, unsigned ZeroReg = 0,
3147  bool CheckZeroReg = false) {
3148  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3149  MachineInstr *MI = nullptr;
3150 
3152  MI = MRI.getUniqueVRegDef(MO.getReg());
3153  // And it needs to be in the trace (otherwise, it won't have a depth).
3154  if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
3155  return false;
3156  // Must only used by the user we combine with.
3157  if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
3158  return false;
3159 
3160  if (CheckZeroReg) {
3161  assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
3162  MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
3163  MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
3164  // The third input reg must be zero.
3165  if (MI->getOperand(3).getReg() != ZeroReg)
3166  return false;
3167  }
3168 
3169  return true;
3170 }
3171 
3172 //
3173 // Is \param MO defined by an integer multiply and can be combined?
3175  unsigned MulOpc, unsigned ZeroReg) {
3176  return canCombine(MBB, MO, MulOpc, ZeroReg, true);
3177 }
3178 
3179 //
3180 // Is \param MO defined by a floating-point multiply and can be combined?
3182  unsigned MulOpc) {
3183  return canCombine(MBB, MO, MulOpc);
3184 }
3185 
3186 // TODO: There are many more machine instruction opcodes to match:
3187 // 1. Other data types (integer, vectors)
3188 // 2. Other math / logic operations (xor, or)
3189 // 3. Other forms of the same operation (intrinsics and other variants)
3191  switch (Inst.getOpcode()) {
3192  case AArch64::FADDDrr:
3193  case AArch64::FADDSrr:
3194  case AArch64::FADDv2f32:
3195  case AArch64::FADDv2f64:
3196  case AArch64::FADDv4f32:
3197  case AArch64::FMULDrr:
3198  case AArch64::FMULSrr:
3199  case AArch64::FMULX32:
3200  case AArch64::FMULX64:
3201  case AArch64::FMULXv2f32:
3202  case AArch64::FMULXv2f64:
3203  case AArch64::FMULXv4f32:
3204  case AArch64::FMULv2f32:
3205  case AArch64::FMULv2f64:
3206  case AArch64::FMULv4f32:
3207  return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
3208  default:
3209  return false;
3210  }
3211 }
3212 
3213 /// Find instructions that can be turned into madd.
3214 static bool getMaddPatterns(MachineInstr &Root,
3216  unsigned Opc = Root.getOpcode();
3217  MachineBasicBlock &MBB = *Root.getParent();
3218  bool Found = false;
3219 
3220  if (!isCombineInstrCandidate(Opc))
3221  return false;
3222  if (isCombineInstrSettingFlag(Opc)) {
3223  int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
3224  // When NZCV is live bail out.
3225  if (Cmp_NZCV == -1)
3226  return false;
3227  unsigned NewOpc = convertFlagSettingOpcode(Root);
3228  // When opcode can't change bail out.
3229  // CHECKME: do we miss any cases for opcode conversion?
3230  if (NewOpc == Opc)
3231  return false;
3232  Opc = NewOpc;
3233  }
3234 
3235  switch (Opc) {
3236  default:
3237  break;
3238  case AArch64::ADDWrr:
3239  assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3240  "ADDWrr does not have register operands");
3241  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3242  AArch64::WZR)) {
3244  Found = true;
3245  }
3246  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3247  AArch64::WZR)) {
3249  Found = true;
3250  }
3251  break;
3252  case AArch64::ADDXrr:
3253  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3254  AArch64::XZR)) {
3256  Found = true;
3257  }
3258  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3259  AArch64::XZR)) {
3261  Found = true;
3262  }
3263  break;
3264  case AArch64::SUBWrr:
3265  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3266  AArch64::WZR)) {
3268  Found = true;
3269  }
3270  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3271  AArch64::WZR)) {
3273  Found = true;
3274  }
3275  break;
3276  case AArch64::SUBXrr:
3277  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3278  AArch64::XZR)) {
3280  Found = true;
3281  }
3282  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3283  AArch64::XZR)) {
3285  Found = true;
3286  }
3287  break;
3288  case AArch64::ADDWri:
3289  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3290  AArch64::WZR)) {
3292  Found = true;
3293  }
3294  break;
3295  case AArch64::ADDXri:
3296  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3297  AArch64::XZR)) {
3299  Found = true;
3300  }
3301  break;
3302  case AArch64::SUBWri:
3303  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3304  AArch64::WZR)) {
3306  Found = true;
3307  }
3308  break;
3309  case AArch64::SUBXri:
3310  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3311  AArch64::XZR)) {
3313  Found = true;
3314  }
3315  break;
3316  }
3317  return Found;
3318 }
3319 /// Floating-Point Support
3320 
3321 /// Find instructions that can be turned into madd.
3322 static bool getFMAPatterns(MachineInstr &Root,
3324 
3325  if (!isCombineInstrCandidateFP(Root))
3326  return false;
3327 
3328  MachineBasicBlock &MBB = *Root.getParent();
3329  bool Found = false;
3330 
3331  switch (Root.getOpcode()) {
3332  default:
3333  assert(false && "Unsupported FP instruction in combiner\n");
3334  break;
3335  case AArch64::FADDSrr:
3336  assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3337  "FADDWrr does not have register operands");
3338  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3340  Found = true;
3341  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3342  AArch64::FMULv1i32_indexed)) {
3344  Found = true;
3345  }
3346  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3348  Found = true;
3349  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3350  AArch64::FMULv1i32_indexed)) {
3352  Found = true;
3353  }
3354  break;
3355  case AArch64::FADDDrr:
3356  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3358  Found = true;
3359  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3360  AArch64::FMULv1i64_indexed)) {
3362  Found = true;
3363  }
3364  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3366  Found = true;
3367  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3368  AArch64::FMULv1i64_indexed)) {
3370  Found = true;
3371  }
3372  break;
3373  case AArch64::FADDv2f32:
3374  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3375  AArch64::FMULv2i32_indexed)) {
3377  Found = true;
3378  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3379  AArch64::FMULv2f32)) {
3381  Found = true;
3382  }
3383  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3384  AArch64::FMULv2i32_indexed)) {
3386  Found = true;
3387  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3388  AArch64::FMULv2f32)) {
3390  Found = true;
3391  }
3392  break;
3393  case AArch64::FADDv2f64:
3394  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3395  AArch64::FMULv2i64_indexed)) {
3397  Found = true;
3398  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3399  AArch64::FMULv2f64)) {
3401  Found = true;
3402  }
3403  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3404  AArch64::FMULv2i64_indexed)) {
3406  Found = true;
3407  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3408  AArch64::FMULv2f64)) {
3410  Found = true;
3411  }
3412  break;
3413  case AArch64::FADDv4f32:
3414  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3415  AArch64::FMULv4i32_indexed)) {
3417  Found = true;
3418  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3419  AArch64::FMULv4f32)) {
3421  Found = true;
3422  }
3423  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3424  AArch64::FMULv4i32_indexed)) {
3426  Found = true;
3427  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3428  AArch64::FMULv4f32)) {
3430  Found = true;
3431  }
3432  break;
3433 
3434  case AArch64::FSUBSrr:
3435  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3437  Found = true;
3438  }
3439  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3441  Found = true;
3442  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3443  AArch64::FMULv1i32_indexed)) {
3445  Found = true;
3446  }
3447  break;
3448  case AArch64::FSUBDrr:
3449  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3451  Found = true;
3452  }
3453  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3455  Found = true;
3456  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3457  AArch64::FMULv1i64_indexed)) {
3459  Found = true;
3460  }
3461  break;
3462  case AArch64::FSUBv2f32:
3463  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3464  AArch64::FMULv2i32_indexed)) {
3466  Found = true;
3467  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3468  AArch64::FMULv2f32)) {
3470  Found = true;
3471  }
3472  break;
3473  case AArch64::FSUBv2f64:
3474  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3475  AArch64::FMULv2i64_indexed)) {
3477  Found = true;
3478  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3479  AArch64::FMULv2f64)) {
3481  Found = true;
3482  }
3483  break;
3484  case AArch64::FSUBv4f32:
3485  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3486  AArch64::FMULv4i32_indexed)) {
3488  Found = true;
3489  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3490  AArch64::FMULv4f32)) {
3492  Found = true;
3493  }
3494  break;
3495  }
3496  return Found;
3497 }
3498 
3499 /// Return true when a code sequence can improve throughput. It
3500 /// should be called only for instructions in loops.
3501 /// \param Pattern - combiner pattern
3502 bool
3504  switch (Pattern) {
3505  default:
3506  break;
3539  return true;
3540  } // end switch (Pattern)
3541  return false;
3542 }
3543 /// Return true when there is potentially a faster code sequence for an
3544 /// instruction chain ending in \p Root. All potential patterns are listed in
3545 /// the \p Pattern vector. Pattern should be sorted in priority order since the
3546 /// pattern evaluator stops checking as soon as it finds a faster sequence.
3547 
3549  MachineInstr &Root,
3550  SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
3551  // Integer patterns
3552  if (getMaddPatterns(Root, Patterns))
3553  return true;
3554  // Floating point patterns
3555  if (getFMAPatterns(Root, Patterns))
3556  return true;
3557 
3558  return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
3559 }
3560 
3562 /// genFusedMultiply - Generate fused multiply instructions.
3563 /// This function supports both integer and floating point instructions.
3564 /// A typical example:
3565 /// F|MUL I=A,B,0
3566 /// F|ADD R,I,C
3567 /// ==> F|MADD R,A,B,C
3568 /// \param Root is the F|ADD instruction
3569 /// \param [out] InsInstrs is a vector of machine instructions and will
3570 /// contain the generated madd instruction
3571 /// \param IdxMulOpd is index of operand in Root that is the result of
3572 /// the F|MUL. In the example above IdxMulOpd is 1.
3573 /// \param MaddOpc the opcode fo the f|madd instruction
3574 static MachineInstr *
3576  const TargetInstrInfo *TII, MachineInstr &Root,
3577  SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
3578  unsigned MaddOpc, const TargetRegisterClass *RC,
3580  assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3581 
3582  unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
3583  MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
3584  unsigned ResultReg = Root.getOperand(0).getReg();
3585  unsigned SrcReg0 = MUL->getOperand(1).getReg();
3586  bool Src0IsKill = MUL->getOperand(1).isKill();
3587  unsigned SrcReg1 = MUL->getOperand(2).getReg();
3588  bool Src1IsKill = MUL->getOperand(2).isKill();
3589  unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
3590  bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
3591 
3593  MRI.constrainRegClass(ResultReg, RC);
3595  MRI.constrainRegClass(SrcReg0, RC);
3597  MRI.constrainRegClass(SrcReg1, RC);
3599  MRI.constrainRegClass(SrcReg2, RC);
3600 
3601  MachineInstrBuilder MIB;
3602  if (kind == FMAInstKind::Default)
3603  MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3604  .addReg(SrcReg0, getKillRegState(Src0IsKill))
3605  .addReg(SrcReg1, getKillRegState(Src1IsKill))
3606  .addReg(SrcReg2, getKillRegState(Src2IsKill));
3607  else if (kind == FMAInstKind::Indexed)
3608  MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3609  .addReg(SrcReg2, getKillRegState(Src2IsKill))
3610  .addReg(SrcReg0, getKillRegState(Src0IsKill))
3611  .addReg(SrcReg1, getKillRegState(Src1IsKill))
3612  .addImm(MUL->getOperand(3).getImm());
3613  else if (kind == FMAInstKind::Accumulator)
3614  MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3615  .addReg(SrcReg2, getKillRegState(Src2IsKill))
3616  .addReg(SrcReg0, getKillRegState(Src0IsKill))
3617  .addReg(SrcReg1, getKillRegState(Src1IsKill));
3618  else
3619  assert(false && "Invalid FMA instruction kind \n");
3620  // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
3621  InsInstrs.push_back(MIB);
3622  return MUL;
3623 }
3624 
3625 /// genMaddR - Generate madd instruction and combine mul and add using
3626 /// an extra virtual register
3627 /// Example - an ADD intermediate needs to be stored in a register:
3628 /// MUL I=A,B,0
3629 /// ADD R,I,Imm
3630 /// ==> ORR V, ZR, Imm
3631 /// ==> MADD R,A,B,V
3632 /// \param Root is the ADD instruction
3633 /// \param [out] InsInstrs is a vector of machine instructions and will
3634 /// contain the generated madd instruction
3635 /// \param IdxMulOpd is index of operand in Root that is the result of
3636 /// the MUL. In the example above IdxMulOpd is 1.
3637 /// \param MaddOpc the opcode fo the madd instruction
3638 /// \param VR is a virtual register that holds the value of an ADD operand
3639 /// (V in the example above).
3641  const TargetInstrInfo *TII, MachineInstr &Root,
3643  unsigned IdxMulOpd, unsigned MaddOpc,
3644  unsigned VR, const TargetRegisterClass *RC) {
3645  assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3646 
3647  MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
3648  unsigned ResultReg = Root.getOperand(0).getReg();
3649  unsigned SrcReg0 = MUL->getOperand(1).getReg();
3650  bool Src0IsKill = MUL->getOperand(1).isKill();
3651  unsigned SrcReg1 = MUL->getOperand(2).getReg();
3652  bool Src1IsKill = MUL->getOperand(2).isKill();
3653 
3655  MRI.constrainRegClass(ResultReg, RC);
3657  MRI.constrainRegClass(SrcReg0, RC);
3659  MRI.constrainRegClass(SrcReg1, RC);
3661  MRI.constrainRegClass(VR, RC);
3662 
3663  MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc),
3664  ResultReg)
3665  .addReg(SrcReg0, getKillRegState(Src0IsKill))
3666  .addReg(SrcReg1, getKillRegState(Src1IsKill))
3667  .addReg(VR);
3668  // Insert the MADD
3669  InsInstrs.push_back(MIB);
3670  return MUL;
3671 }
3672 
3673 /// When getMachineCombinerPatterns() finds potential patterns,
3674 /// this function generates the instructions that could replace the
3675 /// original code sequence
3677  MachineInstr &Root, MachineCombinerPattern Pattern,
3680  DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
3681  MachineBasicBlock &MBB = *Root.getParent();
3682  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3683  MachineFunction &MF = *MBB.getParent();
3684  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
3685 
3686  MachineInstr *MUL;
3687  const TargetRegisterClass *RC;
3688  unsigned Opc;
3689  switch (Pattern) {
3690  default:
3691  // Reassociate instructions.
3692  TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
3693  DelInstrs, InstrIdxForVirtReg);
3694  return;
3697  // MUL I=A,B,0
3698  // ADD R,I,C
3699  // ==> MADD R,A,B,C
3700  // --- Create(MADD);
3701  if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
3702  Opc = AArch64::MADDWrrr;
3703  RC = &AArch64::GPR32RegClass;
3704  } else {
3705  Opc = AArch64::MADDXrrr;
3706  RC = &AArch64::GPR64RegClass;
3707  }
3708  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
3709  break;
3712  // MUL I=A,B,0
3713  // ADD R,C,I
3714  // ==> MADD R,A,B,C
3715  // --- Create(MADD);
3716  if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
3717  Opc = AArch64::MADDWrrr;
3718  RC = &AArch64::GPR32RegClass;
3719  } else {
3720  Opc = AArch64::MADDXrrr;
3721  RC = &AArch64::GPR64RegClass;
3722  }
3723  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3724  break;
3727  // MUL I=A,B,0
3728  // ADD R,I,Imm
3729  // ==> ORR V, ZR, Imm
3730  // ==> MADD R,A,B,V
3731  // --- Create(MADD);
3732  const TargetRegisterClass *OrrRC;
3733  unsigned BitSize, OrrOpc, ZeroReg;
3734  if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
3735  OrrOpc = AArch64::ORRWri;
3736  OrrRC = &AArch64::GPR32spRegClass;
3737  BitSize = 32;
3738  ZeroReg = AArch64::WZR;
3739  Opc = AArch64::MADDWrrr;
3740  RC = &AArch64::GPR32RegClass;
3741  } else {
3742  OrrOpc = AArch64::ORRXri;
3743  OrrRC = &AArch64::GPR64spRegClass;
3744  BitSize = 64;
3745  ZeroReg = AArch64::XZR;
3746  Opc = AArch64::MADDXrrr;
3747  RC = &AArch64::GPR64RegClass;
3748  }
3749  unsigned NewVR = MRI.createVirtualRegister(OrrRC);
3750  uint64_t Imm = Root.getOperand(2).getImm();
3751 
3752  if (Root.getOperand(3).isImm()) {
3753  unsigned Val = Root.getOperand(3).getImm();
3754  Imm = Imm << Val;
3755  }
3756  uint64_t UImm = SignExtend64(Imm, BitSize);
3757  uint64_t Encoding;
3758  if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
3759  MachineInstrBuilder MIB1 =
3760  BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
3761  .addReg(ZeroReg)
3762  .addImm(Encoding);
3763  InsInstrs.push_back(MIB1);
3764  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
3765  MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
3766  }
3767  break;
3768  }
3771  // MUL I=A,B,0
3772  // SUB R,I, C
3773  // ==> SUB V, 0, C
3774  // ==> MADD R,A,B,V // = -C + A*B
3775  // --- Create(MADD);
3776  const TargetRegisterClass *SubRC;
3777  unsigned SubOpc, ZeroReg;
3778  if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
3779  SubOpc = AArch64::SUBWrr;
3780  SubRC = &AArch64::GPR32spRegClass;
3781  ZeroReg = AArch64::WZR;
3782  Opc = AArch64::MADDWrrr;
3783  RC = &AArch64::GPR32RegClass;
3784  } else {
3785  SubOpc = AArch64::SUBXrr;
3786  SubRC = &AArch64::GPR64spRegClass;
3787  ZeroReg = AArch64::XZR;
3788  Opc = AArch64::MADDXrrr;
3789  RC = &AArch64::GPR64RegClass;
3790  }
3791  unsigned NewVR = MRI.createVirtualRegister(SubRC);
3792  // SUB NewVR, 0, C
3793  MachineInstrBuilder MIB1 =
3794  BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
3795  .addReg(ZeroReg)
3796  .addOperand(Root.getOperand(2));
3797  InsInstrs.push_back(MIB1);
3798  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
3799  MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
3800  break;
3801  }
3804  // MUL I=A,B,0
3805  // SUB R,C,I
3806  // ==> MSUB R,A,B,C (computes C - A*B)
3807  // --- Create(MSUB);
3808  if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
3809  Opc = AArch64::MSUBWrrr;
3810  RC = &AArch64::GPR32RegClass;
3811  } else {
3812  Opc = AArch64::MSUBXrrr;
3813  RC = &AArch64::GPR64RegClass;
3814  }
3815  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3816  break;
3819  // MUL I=A,B,0
3820  // SUB R,I, Imm
3821  // ==> ORR V, ZR, -Imm
3822  // ==> MADD R,A,B,V // = -Imm + A*B
3823  // --- Create(MADD);
3824  const TargetRegisterClass *OrrRC;
3825  unsigned BitSize, OrrOpc, ZeroReg;
3826  if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
3827  OrrOpc = AArch64::ORRWri;
3828  OrrRC = &AArch64::GPR32spRegClass;
3829  BitSize = 32;
3830  ZeroReg = AArch64::WZR;
3831  Opc = AArch64::MADDWrrr;
3832  RC = &AArch64::GPR32RegClass;
3833  } else {
3834  OrrOpc = AArch64::ORRXri;
3835  OrrRC = &AArch64::GPR64spRegClass;
3836  BitSize = 64;
3837  ZeroReg = AArch64::XZR;
3838  Opc = AArch64::MADDXrrr;
3839  RC = &AArch64::GPR64RegClass;
3840  }
3841  unsigned NewVR = MRI.createVirtualRegister(OrrRC);
3842  uint64_t Imm = Root.getOperand(2).getImm();
3843  if (Root.getOperand(3).isImm()) {
3844  unsigned Val = Root.getOperand(3).getImm();
3845  Imm = Imm << Val;
3846  }
3847  uint64_t UImm = SignExtend64(-Imm, BitSize);
3848  uint64_t Encoding;
3849  if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
3850  MachineInstrBuilder MIB1 =
3851  BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
3852  .addReg(ZeroReg)
3853  .addImm(Encoding);
3854  InsInstrs.push_back(MIB1);
3855  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
3856  MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
3857  }
3858  break;
3859  }
3860  // Floating Point Support
3863  // MUL I=A,B,0
3864  // ADD R,I,C
3865  // ==> MADD R,A,B,C
3866  // --- Create(MADD);
3867  if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
3868  Opc = AArch64::FMADDSrrr;
3869  RC = &AArch64::FPR32RegClass;
3870  } else {
3871  Opc = AArch64::FMADDDrrr;
3872  RC = &AArch64::FPR64RegClass;
3873  }
3874  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
3875  break;
3878  // FMUL I=A,B,0
3879  // FADD R,C,I
3880  // ==> FMADD R,A,B,C
3881  // --- Create(FMADD);
3882  if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
3883  Opc = AArch64::FMADDSrrr;
3884  RC = &AArch64::FPR32RegClass;
3885  } else {
3886  Opc = AArch64::FMADDDrrr;
3887  RC = &AArch64::FPR64RegClass;
3888  }
3889  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3890  break;
3891 
3893  Opc = AArch64::FMLAv1i32_indexed;
3894  RC = &AArch64::FPR32RegClass;
3895  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3897  break;
3899  Opc = AArch64::FMLAv1i32_indexed;
3900  RC = &AArch64::FPR32RegClass;
3901  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3903  break;
3904 
3906  Opc = AArch64::FMLAv1i64_indexed;
3907  RC = &AArch64::FPR64RegClass;
3908  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3910  break;
3912  Opc = AArch64::FMLAv1i64_indexed;
3913  RC = &AArch64::FPR64RegClass;
3914  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3916  break;
3917 
3920  RC = &AArch64::FPR64RegClass;
3922  Opc = AArch64::FMLAv2i32_indexed;
3923  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3925  } else {
3926  Opc = AArch64::FMLAv2f32;
3927  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3929  }
3930  break;
3933  RC = &AArch64::FPR64RegClass;
3935  Opc = AArch64::FMLAv2i32_indexed;
3936  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3938  } else {
3939  Opc = AArch64::FMLAv2f32;
3940  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3942  }
3943  break;
3944 
3947  RC = &AArch64::FPR128RegClass;
3949  Opc = AArch64::FMLAv2i64_indexed;
3950  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3952  } else {
3953  Opc = AArch64::FMLAv2f64;
3954  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3956  }
3957  break;
3960  RC = &AArch64::FPR128RegClass;
3962  Opc = AArch64::FMLAv2i64_indexed;
3963  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3965  } else {
3966  Opc = AArch64::FMLAv2f64;
3967  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3969  }
3970  break;
3971 
3974  RC = &AArch64::FPR128RegClass;
3976  Opc = AArch64::FMLAv4i32_indexed;
3977  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3979  } else {
3980  Opc = AArch64::FMLAv4f32;
3981  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3983  }
3984  break;
3985 
3988  RC = &AArch64::FPR128RegClass;
3990  Opc = AArch64::FMLAv4i32_indexed;
3991  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3993  } else {
3994  Opc = AArch64::FMLAv4f32;
3995  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3997  }
3998  break;
3999 
4002  // FMUL I=A,B,0
4003  // FSUB R,I,C
4004  // ==> FNMSUB R,A,B,C // = -C + A*B
4005  // --- Create(FNMSUB);
4006  if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
4007  Opc = AArch64::FNMSUBSrrr;
4008  RC = &AArch64::FPR32RegClass;
4009  } else {
4010  Opc = AArch64::FNMSUBDrrr;
4011  RC = &AArch64::FPR64RegClass;
4012  }
4013  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4014  break;
4015  }
4018  // FMUL I=A,B,0
4019  // FSUB R,C,I
4020  // ==> FMSUB R,A,B,C (computes C - A*B)
4021  // --- Create(FMSUB);
4022  if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
4023  Opc = AArch64::FMSUBSrrr;
4024  RC = &AArch64::FPR32RegClass;
4025  } else {
4026  Opc = AArch64::FMSUBDrrr;
4027  RC = &AArch64::FPR64RegClass;
4028  }
4029  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4030  break;
4031 
4033  Opc = AArch64::FMLSv1i32_indexed;
4034  RC = &AArch64::FPR32RegClass;
4035  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4037  break;
4038 
4040  Opc = AArch64::FMLSv1i64_indexed;
4041  RC = &AArch64::FPR64RegClass;
4042  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4044  break;
4045 
4048  RC = &AArch64::FPR64RegClass;
4050  Opc = AArch64::FMLSv2i32_indexed;
4051  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4053  } else {
4054  Opc = AArch64::FMLSv2f32;
4055  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4057  }
4058  break;
4059 
4062  RC = &AArch64::FPR128RegClass;
4064  Opc = AArch64::FMLSv2i64_indexed;
4065  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4067  } else {
4068  Opc = AArch64::FMLSv2f64;
4069  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4071  }
4072  break;
4073 
4076  RC = &AArch64::FPR128RegClass;
4078  Opc = AArch64::FMLSv4i32_indexed;
4079  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4081  } else {
4082  Opc = AArch64::FMLSv4f32;
4083  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4085  }
4086  break;
4087  }
4088  } // end switch (Pattern)
4089  // Record MUL and ADD/SUB for deletion
4090  DelInstrs.push_back(MUL);
4091  DelInstrs.push_back(&Root);
4092 }
4093 
4094 /// \brief Replace csincr-branch sequence by simple conditional branch
4095 ///
4096 /// Examples:
4097 /// 1.
4098 /// csinc w9, wzr, wzr, <condition code>
4099 /// tbnz w9, #0, 0x44
4100 /// to
4101 /// b.<inverted condition code>
4102 ///
4103 /// 2.
4104 /// csinc w9, wzr, wzr, <condition code>
4105 /// tbz w9, #0, 0x44
4106 /// to
4107 /// b.<condition code>
4108 ///
4109 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
4110 /// compare's constant operand is power of 2.
4111 ///
4112 /// Examples:
4113 /// and w8, w8, #0x400
4114 /// cbnz w8, L1
4115 /// to
4116 /// tbnz w8, #10, L1
4117 ///
4118 /// \param MI Conditional Branch
4119 /// \return True when the simple conditional branch is generated
4120 ///
4122  bool IsNegativeBranch = false;
4123  bool IsTestAndBranch = false;
4124  unsigned TargetBBInMI = 0;
4125  switch (MI.getOpcode()) {
4126  default:
4127  llvm_unreachable("Unknown branch instruction?");
4128  case AArch64::Bcc:
4129  return false;
4130  case AArch64::CBZW:
4131  case AArch64::CBZX:
4132  TargetBBInMI = 1;
4133  break;
4134  case AArch64::CBNZW:
4135  case AArch64::CBNZX:
4136  TargetBBInMI = 1;
4137  IsNegativeBranch = true;
4138  break;
4139  case AArch64::TBZW:
4140  case AArch64::TBZX:
4141  TargetBBInMI = 2;
4142  IsTestAndBranch = true;
4143  break;
4144  case AArch64::TBNZW:
4145  case AArch64::TBNZX:
4146  TargetBBInMI = 2;
4147  IsNegativeBranch = true;
4148  IsTestAndBranch = true;
4149  break;
4150  }
4151  // So we increment a zero register and test for bits other
4152  // than bit 0? Conservatively bail out in case the verifier
4153  // missed this case.
4154  if (IsTestAndBranch && MI.getOperand(1).getImm())
4155  return false;
4156 
4157  // Find Definition.
4158  assert(MI.getParent() && "Incomplete machine instruciton\n");
4159  MachineBasicBlock *MBB = MI.getParent();
4160  MachineFunction *MF = MBB->getParent();
4161  MachineRegisterInfo *MRI = &MF->getRegInfo();
4162  unsigned VReg = MI.getOperand(0).getReg();
4164  return false;
4165 
4166  MachineInstr *DefMI = MRI->getVRegDef(VReg);
4167 
4168  // Look through COPY instructions to find definition.
4169  while (DefMI->isCopy()) {
4170  unsigned CopyVReg = DefMI->getOperand(1).getReg();
4171  if (!MRI->hasOneNonDBGUse(CopyVReg))
4172  return false;
4173  if (!MRI->hasOneDef(CopyVReg))
4174  return false;
4175  DefMI = MRI->getVRegDef(CopyVReg);
4176  }
4177 
4178  switch (DefMI->getOpcode()) {
4179  default:
4180  return false;
4181  // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
4182  case AArch64::ANDWri:
4183  case AArch64::ANDXri: {
4184  if (IsTestAndBranch)
4185  return false;
4186  if (DefMI->getParent() != MBB)
4187  return false;
4188  if (!MRI->hasOneNonDBGUse(VReg))
4189  return false;
4190 
4191  bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
4193  DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
4194  if (!isPowerOf2_64(Mask))
4195  return false;
4196 
4197  MachineOperand &MO = DefMI->getOperand(1);
4198  unsigned NewReg = MO.getReg();
4200  return false;
4201 
4202  assert(!MRI->def_empty(NewReg) && "Register must be defined.");
4203 
4204  MachineBasicBlock &RefToMBB = *MBB;
4205  MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
4206  DebugLoc DL = MI.getDebugLoc();
4207  unsigned Imm = Log2_64(Mask);
4208  unsigned Opc = (Imm < 32)
4209  ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
4210  : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
4211  MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
4212  .addReg(NewReg)
4213  .addImm(Imm)
4214  .addMBB(TBB);
4215  // Register lives on to the CBZ now.
4216  MO.setIsKill(false);
4217 
4218  // For immediate smaller than 32, we need to use the 32-bit
4219  // variant (W) in all cases. Indeed the 64-bit variant does not
4220  // allow to encode them.
4221  // Therefore, if the input register is 64-bit, we need to take the
4222  // 32-bit sub-part.
4223  if (!Is32Bit && Imm < 32)
4224  NewMI->getOperand(0).setSubReg(AArch64::sub_32);
4225  MI.eraseFromParent();
4226  return true;
4227  }
4228  // Look for CSINC
4229  case AArch64::CSINCWr:
4230  case AArch64::CSINCXr: {
4231  if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
4232  DefMI->getOperand(2).getReg() == AArch64::WZR) &&
4233  !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
4234  DefMI->getOperand(2).getReg() == AArch64::XZR))
4235  return false;
4236 
4237  if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
4238  return false;
4239 
4241  // Convert only when the condition code is not modified between
4242  // the CSINC and the branch. The CC may be used by other
4243  // instructions in between.
4245  return false;
4246  MachineBasicBlock &RefToMBB = *MBB;
4247  MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
4248  DebugLoc DL = MI.getDebugLoc();
4249  if (IsNegativeBranch)
4251  BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
4252  MI.eraseFromParent();
4253  return true;
4254  }
4255  }
4256 }
4257 
4258 std::pair<unsigned, unsigned>
4260  const unsigned Mask = AArch64II::MO_FRAGMENT;
4261  return std::make_pair(TF & Mask, TF & ~Mask);
4262 }
4263 
4266  using namespace AArch64II;
4267 
4268  static const std::pair<unsigned, const char *> TargetFlags[] = {
4269  {MO_PAGE, "aarch64-page"},
4270  {MO_PAGEOFF, "aarch64-pageoff"},
4271  {MO_G3, "aarch64-g3"},
4272  {MO_G2, "aarch64-g2"},
4273  {MO_G1, "aarch64-g1"},
4274  {MO_G0, "aarch64-g0"},
4275  {MO_HI12, "aarch64-hi12"}};
4276  return makeArrayRef(TargetFlags);
4277 }
4278 
4281  using namespace AArch64II;
4282 
4283  static const std::pair<unsigned, const char *> TargetFlags[] = {
4284  {MO_GOT, "aarch64-got"},
4285  {MO_NC, "aarch64-nc"},
4286  {MO_TLS, "aarch64-tls"}};
4287  return makeArrayRef(TargetFlags);
4288 }
bool isFullCopy() const
Definition: MachineInstr.h:810
static const MachineMemOperand::Flags MOSuppressPair
ArrayRef< std::pair< unsigned, const char * > > getSerializableBitmaskMachineOperandTargetFlags() const override
const MachineInstrBuilder & addMetadata(const MDNode *MD) const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address...
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
bool isGPRZero(const MachineInstr &MI) const
Does this instruction set its full destination register to zero?
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition: StackMaps.h:98
instr_iterator instr_end()
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) const override
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern > &Patterns) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root...
MachineBasicBlock * getMBB() const
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern > &Patterns)
Floating-Point Support.
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool hasShiftedReg(const MachineInstr &MI) const
Returns true if there is a shiftable register and that the shift value is non-zero.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
static CondCode getInvertedCondCode(CondCode Code)
bool hasZeroCycleRegMove() const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
void setIsUndef(bool Val=true)
MachineInstrBuilder MachineInstrBuilder &DefMI const MCInstrDesc & Desc
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
void suppressLdStPair(MachineInstr &MI) const
Hint that pairing the given load or store is unprofitable.
static bool isSUBSRegImm(unsigned Opcode)
static bool processLogicalImmediate(uint64_t Imm, unsigned RegSize, uint64_t &Encoding)
processLogicalImmediate - Determine if an immediate value can be encoded as the immediate operand of ...
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr) const override
bool hasZeroCycleZeroing() const
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
Offset can apply, at least partly.
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
bool hasCustomCheapAsMoveHandling() const
bool shouldScheduleAdjacent(const MachineInstr &First, const MachineInstr &Second) const override
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:270
const char * getSymbolName() const
bool shouldClusterMemOps(MachineInstr &FirstLdSt, MachineInstr &SecondLdSt, unsigned NumLoads) const override
Detect opportunities for ldp/stp formation.
A debug info location.
Definition: DebugLoc.h:34
Metadata node.
Definition: Metadata.h:830
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address...
void getNoopForMachoTarget(MCInst &NopInst) const override
iterator_range< mmo_iterator > memoperands()
Definition: MachineInstr.h:365
bool hasExtendedReg(const MachineInstr &MI) const
Returns true if there is an extendable register and that the extending value is non-zero.
return AArch64::GPR64RegClass contains(Reg)
iterator_range< succ_iterator > successors()
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address...
static bool getMemDoShift(unsigned Imm)
getMemDoShift - Extract the "do shift" flag value for load/store instructions.
static MachineInstr * genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, const TargetRegisterClass *RC)
genMaddR - Generate madd instruction and combine mul and add using an extra virtual register Example ...
bool isPaired128Slow() const
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:172
unsigned getSize() const
Return the size of the register in bytes, which is also the size of a stack slot allocated to hold a ...
bool isUnscaledLdSt(unsigned Opc) const
Return true if this is an unscaled load/store.
A description of a memory reference used in the backend.
bool isAssociativeAndCommutative(const MachineInstr &Inst) const override
Return true when Inst is associative and commutative so that it can be reassociated.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const HexagonInstrInfo * TII
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize)
Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:440
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:32
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
unsigned SubReg
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
bool useMachineCombiner() const override
AArch64 supports MachineCombiner.
Reg
All possible values of the reg field in the ModR/M byte.
static int getRegClass(RegisterKind Is, unsigned RegWidth)
ARMProcFamilyEnum getProcFamily() const
Returns ARM processor family.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
bool isUndef() const
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:589
static bool isCombineInstrCandidate64(unsigned Opc)
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:277
int isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
void RemoveOperand(unsigned i)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
AArch64InstrInfo(const AArch64Subtarget &STI)
bool isKill() const
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
MachineBasicBlock * MBB
static AArch64_AM::ShiftExtendType getMemExtendType(unsigned Imm)
getExtendType - Extract the extend type for the offset operand of loads/stores.
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg...
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
iterator getLastNonDebugInstr()
Returns an iterator to the last non-debug instruction in the basic block, or end().
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, unsigned, unsigned, int &, int &, int &) const override
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc, unsigned ZeroReg=0, bool CheckZeroReg=false)
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
int64_t getImm() const
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:141
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc)
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:150
unsigned getKillRegState(bool B)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:121
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:131
TargetInstrInfo - Interface to description of machine instruction set.
static bool isCondBranchOpcode(int Opc)
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:57
unsigned getDefRegState(bool B)
static bool isCombineInstrCandidateFP(const MachineInstr &Inst)
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
bool operator|=(SparseBitVector< ElementSize > &LHS, const SparseBitVector< ElementSize > *RHS)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
FPOpFusion::FPOpFusionMode AllowFPOpFusion
AllowFPOpFusion - This flag is set by the -fuse-fp-ops=xxx option.
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address...
unsigned const MachineRegisterInfo * MRI
bool isFPRCopy(const MachineInstr &MI) const
Does this instruction rename an FPR without modifying bits?
static cl::opt< unsigned > BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)"))
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
bool isAsCheapAsAMove(QueryType Type=AllInBundle) const
Returns true if this instruction has the same cost (or less) than a move instruction.
Definition: MachineInstr.h:691
int findRegisterDefOperandIdx(unsigned Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a def of the specified register or -1 if it is not found...
MachineInstr * emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var, const MDNode *Expr, const DebugLoc &DL) const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
unsigned getSubReg(unsigned Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo...
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static cl::opt< unsigned > CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"))
bool isCopy() const
Definition: MachineInstr.h:807
static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr, const TargetRegisterInfo *TRI)
Check if CmpInstr can be substituted by MI.
constexpr bool isPowerOf2_64(uint64_t Value)
isPowerOf2_64 - This function returns true if the argument is a power of two 0 (64 bit edition...
Definition: MathExtras.h:405
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:743
uint32_t Offset
static bool isPairableLdStInst(const MachineInstr &MI)
static const unsigned End
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
MI-level patchpoint operands.
Definition: StackMaps.h:70
self_iterator getIterator()
Definition: ilist_node.h:81
bool areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA=nullptr) const override
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
bool isAsCheapAsAMove(const MachineInstr &MI) const override
bool expandPostRAPseudo(MachineInstr &MI) const override
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:136
bool isThroughputPattern(MachineCombinerPattern Pattern) const override
Return true when a code sequence can improve throughput.
MachineCombinerPattern
These are instruction patterns matched by the machine combiner pass.
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern > &Patterns) const override
Return true when there is potentially a faster code sequence for an instruction chain ending in <Root...
unsigned getSubReg() const
MO_HI12 - This flag indicates that a symbol operand represents the bits 13-24 of a 64-bit address...
bool isIntN(unsigned N, int64_t x)
isIntN - Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:366
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DstReg, ArrayRef< MachineOperand > Cond, unsigned TrueReg, unsigned FalseReg) const override
This class contains a discriminated union of information about pointers in memory operands...
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned &SubIdx) const override
unsigned char ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
bool definesRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr fully defines the specified register.
Definition: MachineInstr.h:895
EH_LABEL - Represents a label in mid basic block used to track locations needed for debug and excepti...
Definition: ISDOpcodes.h:594
void setIsKill(bool Val=true)
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:203
The memory access writes data.
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewVReg=nullptr)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
bool readsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
Definition: MachineInstr.h:865
int findRegisterUseOperandIdx(unsigned Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a use of the specific register or -1 if it is not found...
bool memoperands_empty() const
Return true if we don't have any memory operands which described the the memory access done by this i...
Definition: MachineInstr.h:363
static bool isIndirectBranchOpcode(int Opc)
void setOpcode(unsigned Op)
Definition: MCInst.h:158
static bool isUncondBranchOpcode(int Opc)
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
MachineOperand class - Representation of each machine instruction operand.
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
static bool isCombineInstrCandidate(unsigned Opc)
const MachineInstrBuilder & addFrameIndex(int Idx) const
bool optimizeCondBranch(MachineInstr &MI) const override
Replace csincr-branch sequence by simple conditional branch.
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
void addRegisterDefined(unsigned Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SynchronizationScope SynchScope=CrossThread, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Target - Wrapper for Target specific information.
const TargetRegisterClass * getRegClassConstraint(unsigned OpIdx, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
Compute the static register class constraint for operand OpIdx.
bool hasArithmeticBccFusion() const
static bool getMaddPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern > &Patterns)
Find instructions that can be turned into madd.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
const TargetRegisterClass * getMinimalPhysRegClass(unsigned Reg, MVT VT=MVT::Other) const
Returns the Register Class of a physical register of the given type, picking the most sub register cl...
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:250
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition: StackMaps.h:44
static cl::opt< unsigned > TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"))
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
Flags
Flags values. These may be or'd together.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Representation of each machine instruction.
Definition: MachineInstr.h:52
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
bool hasOneDef(unsigned RegNo) const
Return true if there is exactly one operand defining the specified register.
bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction supplying the argument to the comparison into one that...
bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, unsigned &SrcReg2, int &CmpMask, int &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2...
bool isScaledAddr(const MachineInstr &MI) const
Return true if this is load/store scales or extends its register offset.
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
MI-level stackmap operands.
Definition: StackMaps.h:29
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:615
static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB)
Check if AArch64::NZCV should be alive in successors of MBB.
bool hasArithmeticCbzFusion() const
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
bool hasOneNonDBGUse(unsigned RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug instruction using the specified regis...
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
static MachineOperand CreateImm(int64_t Val)
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page...
void setSubReg(unsigned subReg)
bool isLdStPairSuppressed(const MachineInstr &MI) const
Return true if pairing the given load or store is hinted to be unprofitable.
void clearKillFlags(unsigned Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
static unsigned getBranchDisplacementBits(unsigned Opc)
static unsigned convertFlagSettingOpcode(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode...
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
bool getMemOpBaseRegImmOfsWidth(MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width, const TargetRegisterInfo *TRI) const
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:723
bool isGPRCopy(const MachineInstr &MI) const
Does this instruction rename a GPR without modifying bits?
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg)
static bool isCombineInstrCandidate32(unsigned Opc)
unsigned getReg() const
getReg - Returns the register number.
static bool scaleOffset(unsigned Opc, int64_t &Offset)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef< unsigned > Indices) const
void genAlternativeCodeSequence(MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual const TargetInstrInfo * getInstrInfo() const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:239
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
static bool areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI, const AccessKind AccessToCheck=AK_All)
True when condition flags are accessed (either by writing or reading) on the instruction trace starti...
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:81
virtual void genAlternativeCodeSequence(MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
bool def_empty(unsigned RegNo) const
def_empty - Return true if there are no instructions defining the specified register (it may be live-...
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind=FMAInstKind::Default)
genFusedMultiply - Generate fused multiply instructions.
Primary interface to the complete machine description for the target machine.
IRTranslator LLVM IR MI
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
void addOperand(const MCOperand &Op)
Definition: MCInst.h:168
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow...
bool isCandidateToMergeOrPair(MachineInstr &MI) const
Return true if this is a load/store that can be potentially paired/merged.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
static bool isCombineInstrSettingFlag(unsigned Opc)
auto find_if(R &&Range, UnaryPredicate P) -> decltype(std::begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:764
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:117
unsigned Log2_64(uint64_t Value)
Log2_64 - This function returns the floor log base 2 of the specified value, -1 if the value is zero...
Definition: MathExtras.h:519
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
Definition: MachineInstr.h:903
static bool UpdateOperandRegClass(MachineInstr &Instr)
static bool isADDSRegImm(unsigned Opcode)
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:358
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.