LLVM  3.7.0
AArch64InstrInfo.cpp
Go to the documentation of this file.
1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the AArch64 implementation of the TargetInstrInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64InstrInfo.h"
16 #include "AArch64Subtarget.h"
23 #include "llvm/MC/MCInst.h"
26 
27 using namespace llvm;
28 
29 #define GET_INSTRINFO_CTOR_DTOR
30 #include "AArch64GenInstrInfo.inc"
31 
33  : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
34  RI(STI.getTargetTriple()), Subtarget(STI) {}
35 
36 /// GetInstSize - Return the number of bytes of code the specified
37 /// instruction may be. This returns the maximum number of bytes.
39  const MachineBasicBlock &MBB = *MI->getParent();
40  const MachineFunction *MF = MBB.getParent();
41  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
42 
43  if (MI->getOpcode() == AArch64::INLINEASM)
44  return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
45 
46  const MCInstrDesc &Desc = MI->getDesc();
47  switch (Desc.getOpcode()) {
48  default:
49  // Anything not explicitly designated otherwise is a nomal 4-byte insn.
50  return 4;
54  case TargetOpcode::KILL:
55  return 0;
56  }
57 
58  llvm_unreachable("GetInstSizeInBytes()- Unable to determin insn size");
59 }
60 
63  // Block ends with fall-through condbranch.
64  switch (LastInst->getOpcode()) {
65  default:
66  llvm_unreachable("Unknown branch instruction?");
67  case AArch64::Bcc:
68  Target = LastInst->getOperand(1).getMBB();
69  Cond.push_back(LastInst->getOperand(0));
70  break;
71  case AArch64::CBZW:
72  case AArch64::CBZX:
73  case AArch64::CBNZW:
74  case AArch64::CBNZX:
75  Target = LastInst->getOperand(1).getMBB();
78  Cond.push_back(LastInst->getOperand(0));
79  break;
80  case AArch64::TBZW:
81  case AArch64::TBZX:
82  case AArch64::TBNZW:
83  case AArch64::TBNZX:
84  Target = LastInst->getOperand(2).getMBB();
87  Cond.push_back(LastInst->getOperand(0));
88  Cond.push_back(LastInst->getOperand(1));
89  }
90 }
91 
92 // Branch analysis.
94  MachineBasicBlock *&TBB,
95  MachineBasicBlock *&FBB,
97  bool AllowModify) const {
98  // If the block has no terminators, it just falls into the block after it.
100  if (I == MBB.end())
101  return false;
102 
103  if (!isUnpredicatedTerminator(I))
104  return false;
105 
106  // Get the last instruction in the block.
107  MachineInstr *LastInst = I;
108 
109  // If there is only one terminator instruction, process it.
110  unsigned LastOpc = LastInst->getOpcode();
111  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
112  if (isUncondBranchOpcode(LastOpc)) {
113  TBB = LastInst->getOperand(0).getMBB();
114  return false;
115  }
116  if (isCondBranchOpcode(LastOpc)) {
117  // Block ends with fall-through condbranch.
118  parseCondBranch(LastInst, TBB, Cond);
119  return false;
120  }
121  return true; // Can't handle indirect branch.
122  }
123 
124  // Get the instruction before it if it is a terminator.
125  MachineInstr *SecondLastInst = I;
126  unsigned SecondLastOpc = SecondLastInst->getOpcode();
127 
128  // If AllowModify is true and the block ends with two or more unconditional
129  // branches, delete all but the first unconditional branch.
130  if (AllowModify && isUncondBranchOpcode(LastOpc)) {
131  while (isUncondBranchOpcode(SecondLastOpc)) {
132  LastInst->eraseFromParent();
133  LastInst = SecondLastInst;
134  LastOpc = LastInst->getOpcode();
135  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
136  // Return now the only terminator is an unconditional branch.
137  TBB = LastInst->getOperand(0).getMBB();
138  return false;
139  } else {
140  SecondLastInst = I;
141  SecondLastOpc = SecondLastInst->getOpcode();
142  }
143  }
144  }
145 
146  // If there are three terminators, we don't know what sort of block this is.
147  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
148  return true;
149 
150  // If the block ends with a B and a Bcc, handle it.
151  if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
152  parseCondBranch(SecondLastInst, TBB, Cond);
153  FBB = LastInst->getOperand(0).getMBB();
154  return false;
155  }
156 
157  // If the block ends with two unconditional branches, handle it. The second
158  // one is not executed, so remove it.
159  if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
160  TBB = SecondLastInst->getOperand(0).getMBB();
161  I = LastInst;
162  if (AllowModify)
163  I->eraseFromParent();
164  return false;
165  }
166 
167  // ...likewise if it ends with an indirect branch followed by an unconditional
168  // branch.
169  if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
170  I = LastInst;
171  if (AllowModify)
172  I->eraseFromParent();
173  return true;
174  }
175 
176  // Otherwise, can't handle this.
177  return true;
178 }
179 
181  SmallVectorImpl<MachineOperand> &Cond) const {
182  if (Cond[0].getImm() != -1) {
183  // Regular Bcc
184  AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
185  Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
186  } else {
187  // Folded compare-and-branch
188  switch (Cond[1].getImm()) {
189  default:
190  llvm_unreachable("Unknown conditional branch!");
191  case AArch64::CBZW:
192  Cond[1].setImm(AArch64::CBNZW);
193  break;
194  case AArch64::CBNZW:
195  Cond[1].setImm(AArch64::CBZW);
196  break;
197  case AArch64::CBZX:
198  Cond[1].setImm(AArch64::CBNZX);
199  break;
200  case AArch64::CBNZX:
201  Cond[1].setImm(AArch64::CBZX);
202  break;
203  case AArch64::TBZW:
204  Cond[1].setImm(AArch64::TBNZW);
205  break;
206  case AArch64::TBNZW:
207  Cond[1].setImm(AArch64::TBZW);
208  break;
209  case AArch64::TBZX:
210  Cond[1].setImm(AArch64::TBNZX);
211  break;
212  case AArch64::TBNZX:
213  Cond[1].setImm(AArch64::TBZX);
214  break;
215  }
216  }
217 
218  return false;
219 }
220 
223  if (I == MBB.end())
224  return 0;
225 
226  if (!isUncondBranchOpcode(I->getOpcode()) &&
227  !isCondBranchOpcode(I->getOpcode()))
228  return 0;
229 
230  // Remove the branch.
231  I->eraseFromParent();
232 
233  I = MBB.end();
234 
235  if (I == MBB.begin())
236  return 1;
237  --I;
238  if (!isCondBranchOpcode(I->getOpcode()))
239  return 1;
240 
241  // Remove the branch.
242  I->eraseFromParent();
243  return 2;
244 }
245 
246 void AArch64InstrInfo::instantiateCondBranch(
248  ArrayRef<MachineOperand> Cond) const {
249  if (Cond[0].getImm() != -1) {
250  // Regular Bcc
251  BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
252  } else {
253  // Folded compare-and-branch
254  // Note that we use addOperand instead of addReg to keep the flags.
255  const MachineInstrBuilder MIB =
256  BuildMI(&MBB, DL, get(Cond[1].getImm())).addOperand(Cond[2]);
257  if (Cond.size() > 3)
258  MIB.addImm(Cond[3].getImm());
259  MIB.addMBB(TBB);
260  }
261 }
262 
265  ArrayRef<MachineOperand> Cond, DebugLoc DL) const {
266  // Shouldn't be a fall through.
267  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
268 
269  if (!FBB) {
270  if (Cond.empty()) // Unconditional branch?
271  BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
272  else
273  instantiateCondBranch(MBB, DL, TBB, Cond);
274  return 1;
275  }
276 
277  // Two-way conditional branch.
278  instantiateCondBranch(MBB, DL, TBB, Cond);
279  BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
280  return 2;
281 }
282 
283 // Find the original register that VReg is copied from.
284 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
286  const MachineInstr *DefMI = MRI.getVRegDef(VReg);
287  if (!DefMI->isFullCopy())
288  return VReg;
289  VReg = DefMI->getOperand(1).getReg();
290  }
291  return VReg;
292 }
293 
294 // Determine if VReg is defined by an instruction that can be folded into a
295 // csel instruction. If so, return the folded opcode, and the replacement
296 // register.
297 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
298  unsigned *NewVReg = nullptr) {
299  VReg = removeCopies(MRI, VReg);
301  return 0;
302 
303  bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
304  const MachineInstr *DefMI = MRI.getVRegDef(VReg);
305  unsigned Opc = 0;
306  unsigned SrcOpNum = 0;
307  switch (DefMI->getOpcode()) {
308  case AArch64::ADDSXri:
309  case AArch64::ADDSWri:
310  // if NZCV is used, do not fold.
311  if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
312  return 0;
313  // fall-through to ADDXri and ADDWri.
314  case AArch64::ADDXri:
315  case AArch64::ADDWri:
316  // add x, 1 -> csinc.
317  if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
318  DefMI->getOperand(3).getImm() != 0)
319  return 0;
320  SrcOpNum = 1;
321  Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
322  break;
323 
324  case AArch64::ORNXrr:
325  case AArch64::ORNWrr: {
326  // not x -> csinv, represented as orn dst, xzr, src.
327  unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
328  if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
329  return 0;
330  SrcOpNum = 2;
331  Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
332  break;
333  }
334 
335  case AArch64::SUBSXrr:
336  case AArch64::SUBSWrr:
337  // if NZCV is used, do not fold.
338  if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
339  return 0;
340  // fall-through to SUBXrr and SUBWrr.
341  case AArch64::SUBXrr:
342  case AArch64::SUBWrr: {
343  // neg x -> csneg, represented as sub dst, xzr, src.
344  unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
345  if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
346  return 0;
347  SrcOpNum = 2;
348  Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
349  break;
350  }
351  default:
352  return 0;
353  }
354  assert(Opc && SrcOpNum && "Missing parameters");
355 
356  if (NewVReg)
357  *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
358  return Opc;
359 }
360 
363  unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles,
364  int &FalseCycles) const {
365  // Check register classes.
366  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
367  const TargetRegisterClass *RC =
368  RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
369  if (!RC)
370  return false;
371 
372  // Expanding cbz/tbz requires an extra cycle of latency on the condition.
373  unsigned ExtraCondLat = Cond.size() != 1;
374 
375  // GPRs are handled by csel.
376  // FIXME: Fold in x+1, -x, and ~x when applicable.
377  if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
378  AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
379  // Single-cycle csel, csinc, csinv, and csneg.
380  CondCycles = 1 + ExtraCondLat;
381  TrueCycles = FalseCycles = 1;
382  if (canFoldIntoCSel(MRI, TrueReg))
383  TrueCycles = 0;
384  else if (canFoldIntoCSel(MRI, FalseReg))
385  FalseCycles = 0;
386  return true;
387  }
388 
389  // Scalar floating point is handled by fcsel.
390  // FIXME: Form fabs, fmin, and fmax when applicable.
391  if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
392  AArch64::FPR32RegClass.hasSubClassEq(RC)) {
393  CondCycles = 5 + ExtraCondLat;
394  TrueCycles = FalseCycles = 2;
395  return true;
396  }
397 
398  // Can't do vectors.
399  return false;
400 }
401 
404  unsigned DstReg,
406  unsigned TrueReg, unsigned FalseReg) const {
407  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
408 
409  // Parse the condition code, see parseCondBranch() above.
411  switch (Cond.size()) {
412  default:
413  llvm_unreachable("Unknown condition opcode in Cond");
414  case 1: // b.cc
415  CC = AArch64CC::CondCode(Cond[0].getImm());
416  break;
417  case 3: { // cbz/cbnz
418  // We must insert a compare against 0.
419  bool Is64Bit;
420  switch (Cond[1].getImm()) {
421  default:
422  llvm_unreachable("Unknown branch opcode in Cond");
423  case AArch64::CBZW:
424  Is64Bit = 0;
425  CC = AArch64CC::EQ;
426  break;
427  case AArch64::CBZX:
428  Is64Bit = 1;
429  CC = AArch64CC::EQ;
430  break;
431  case AArch64::CBNZW:
432  Is64Bit = 0;
433  CC = AArch64CC::NE;
434  break;
435  case AArch64::CBNZX:
436  Is64Bit = 1;
437  CC = AArch64CC::NE;
438  break;
439  }
440  unsigned SrcReg = Cond[2].getReg();
441  if (Is64Bit) {
442  // cmp reg, #0 is actually subs xzr, reg, #0.
443  MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
444  BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
445  .addReg(SrcReg)
446  .addImm(0)
447  .addImm(0);
448  } else {
449  MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
450  BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
451  .addReg(SrcReg)
452  .addImm(0)
453  .addImm(0);
454  }
455  break;
456  }
457  case 4: { // tbz/tbnz
458  // We must insert a tst instruction.
459  switch (Cond[1].getImm()) {
460  default:
461  llvm_unreachable("Unknown branch opcode in Cond");
462  case AArch64::TBZW:
463  case AArch64::TBZX:
464  CC = AArch64CC::EQ;
465  break;
466  case AArch64::TBNZW:
467  case AArch64::TBNZX:
468  CC = AArch64CC::NE;
469  break;
470  }
471  // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
472  if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
473  BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
474  .addReg(Cond[2].getReg())
475  .addImm(
476  AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
477  else
478  BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
479  .addReg(Cond[2].getReg())
480  .addImm(
481  AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
482  break;
483  }
484  }
485 
486  unsigned Opc = 0;
487  const TargetRegisterClass *RC = nullptr;
488  bool TryFold = false;
489  if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
490  RC = &AArch64::GPR64RegClass;
491  Opc = AArch64::CSELXr;
492  TryFold = true;
493  } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
494  RC = &AArch64::GPR32RegClass;
495  Opc = AArch64::CSELWr;
496  TryFold = true;
497  } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
498  RC = &AArch64::FPR64RegClass;
499  Opc = AArch64::FCSELDrrr;
500  } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
501  RC = &AArch64::FPR32RegClass;
502  Opc = AArch64::FCSELSrrr;
503  }
504  assert(RC && "Unsupported regclass");
505 
506  // Try folding simple instructions into the csel.
507  if (TryFold) {
508  unsigned NewVReg = 0;
509  unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
510  if (FoldedOpc) {
511  // The folded opcodes csinc, csinc and csneg apply the operation to
512  // FalseReg, so we need to invert the condition.
514  TrueReg = FalseReg;
515  } else
516  FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
517 
518  // Fold the operation. Leave any dead instructions for DCE to clean up.
519  if (FoldedOpc) {
520  FalseReg = NewVReg;
521  Opc = FoldedOpc;
522  // The extends the live range of NewVReg.
523  MRI.clearKillFlags(NewVReg);
524  }
525  }
526 
527  // Pull all virtual register into the appropriate class.
528  MRI.constrainRegClass(TrueReg, RC);
529  MRI.constrainRegClass(FalseReg, RC);
530 
531  // Insert the csel.
532  BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm(
533  CC);
534 }
535 
536 // FIXME: this implementation should be micro-architecture dependent, so a
537 // micro-architecture target hook should be introduced here in future.
539  if (!Subtarget.isCortexA57() && !Subtarget.isCortexA53())
540  return MI->isAsCheapAsAMove();
541 
542  switch (MI->getOpcode()) {
543  default:
544  return false;
545 
546  // add/sub on register without shift
547  case AArch64::ADDWri:
548  case AArch64::ADDXri:
549  case AArch64::SUBWri:
550  case AArch64::SUBXri:
551  return (MI->getOperand(3).getImm() == 0);
552 
553  // logical ops on immediate
554  case AArch64::ANDWri:
555  case AArch64::ANDXri:
556  case AArch64::EORWri:
557  case AArch64::EORXri:
558  case AArch64::ORRWri:
559  case AArch64::ORRXri:
560  return true;
561 
562  // logical ops on register without shift
563  case AArch64::ANDWrr:
564  case AArch64::ANDXrr:
565  case AArch64::BICWrr:
566  case AArch64::BICXrr:
567  case AArch64::EONWrr:
568  case AArch64::EONXrr:
569  case AArch64::EORWrr:
570  case AArch64::EORXrr:
571  case AArch64::ORNWrr:
572  case AArch64::ORNXrr:
573  case AArch64::ORRWrr:
574  case AArch64::ORRXrr:
575  return true;
576  }
577 
578  llvm_unreachable("Unknown opcode to check as cheap as a move!");
579 }
580 
582  unsigned &SrcReg, unsigned &DstReg,
583  unsigned &SubIdx) const {
584  switch (MI.getOpcode()) {
585  default:
586  return false;
587  case AArch64::SBFMXri: // aka sxtw
588  case AArch64::UBFMXri: // aka uxtw
589  // Check for the 32 -> 64 bit extension case, these instructions can do
590  // much more.
591  if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
592  return false;
593  // This is a signed or unsigned 32 -> 64 bit extension.
594  SrcReg = MI.getOperand(1).getReg();
595  DstReg = MI.getOperand(0).getReg();
596  SubIdx = AArch64::sub_32;
597  return true;
598  }
599 }
600 
601 bool
603  MachineInstr *MIb,
604  AliasAnalysis *AA) const {
605  const TargetRegisterInfo *TRI = &getRegisterInfo();
606  unsigned BaseRegA = 0, BaseRegB = 0;
607  int OffsetA = 0, OffsetB = 0;
608  int WidthA = 0, WidthB = 0;
609 
610  assert(MIa && MIa->mayLoadOrStore() && "MIa must be a load or store.");
611  assert(MIb && MIb->mayLoadOrStore() && "MIb must be a load or store.");
612 
613  if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() ||
614  MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef())
615  return false;
616 
617  // Retrieve the base register, offset from the base register and width. Width
618  // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
619  // base registers are identical, and the offset of a lower memory access +
620  // the width doesn't overlap the offset of a higher memory access,
621  // then the memory accesses are different.
622  if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
623  getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
624  if (BaseRegA == BaseRegB) {
625  int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
626  int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
627  int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
628  if (LowOffset + LowWidth <= HighOffset)
629  return true;
630  }
631  }
632  return false;
633 }
634 
635 /// analyzeCompare - For a comparison instruction, return the source registers
636 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
637 /// Return true if the comparison instruction can be analyzed.
638 bool AArch64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
639  unsigned &SrcReg2, int &CmpMask,
640  int &CmpValue) const {
641  switch (MI->getOpcode()) {
642  default:
643  break;
644  case AArch64::SUBSWrr:
645  case AArch64::SUBSWrs:
646  case AArch64::SUBSWrx:
647  case AArch64::SUBSXrr:
648  case AArch64::SUBSXrs:
649  case AArch64::SUBSXrx:
650  case AArch64::ADDSWrr:
651  case AArch64::ADDSWrs:
652  case AArch64::ADDSWrx:
653  case AArch64::ADDSXrr:
654  case AArch64::ADDSXrs:
655  case AArch64::ADDSXrx:
656  // Replace SUBSWrr with SUBWrr if NZCV is not used.
657  SrcReg = MI->getOperand(1).getReg();
658  SrcReg2 = MI->getOperand(2).getReg();
659  CmpMask = ~0;
660  CmpValue = 0;
661  return true;
662  case AArch64::SUBSWri:
663  case AArch64::ADDSWri:
664  case AArch64::SUBSXri:
665  case AArch64::ADDSXri:
666  SrcReg = MI->getOperand(1).getReg();
667  SrcReg2 = 0;
668  CmpMask = ~0;
669  // FIXME: In order to convert CmpValue to 0 or 1
670  CmpValue = (MI->getOperand(2).getImm() != 0);
671  return true;
672  case AArch64::ANDSWri:
673  case AArch64::ANDSXri:
674  // ANDS does not use the same encoding scheme as the others xxxS
675  // instructions.
676  SrcReg = MI->getOperand(1).getReg();
677  SrcReg2 = 0;
678  CmpMask = ~0;
679  // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
680  // while the type of CmpValue is int. When converting uint64_t to int,
681  // the high 32 bits of uint64_t will be lost.
682  // In fact it causes a bug in spec2006-483.xalancbmk
683  // CmpValue is only used to compare with zero in OptimizeCompareInstr
685  MI->getOperand(2).getImm(),
686  MI->getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0);
687  return true;
688  }
689 
690  return false;
691 }
692 
693 static bool UpdateOperandRegClass(MachineInstr *Instr) {
694  MachineBasicBlock *MBB = Instr->getParent();
695  assert(MBB && "Can't get MachineBasicBlock here");
696  MachineFunction *MF = MBB->getParent();
697  assert(MF && "Can't get MachineFunction here");
698  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
699  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
700  MachineRegisterInfo *MRI = &MF->getRegInfo();
701 
702  for (unsigned OpIdx = 0, EndIdx = Instr->getNumOperands(); OpIdx < EndIdx;
703  ++OpIdx) {
704  MachineOperand &MO = Instr->getOperand(OpIdx);
705  const TargetRegisterClass *OpRegCstraints =
706  Instr->getRegClassConstraint(OpIdx, TII, TRI);
707 
708  // If there's no constraint, there's nothing to do.
709  if (!OpRegCstraints)
710  continue;
711  // If the operand is a frame index, there's nothing to do here.
712  // A frame index operand will resolve correctly during PEI.
713  if (MO.isFI())
714  continue;
715 
716  assert(MO.isReg() &&
717  "Operand has register constraints without being a register!");
718 
719  unsigned Reg = MO.getReg();
721  if (!OpRegCstraints->contains(Reg))
722  return false;
723  } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
724  !MRI->constrainRegClass(Reg, OpRegCstraints))
725  return false;
726  }
727 
728  return true;
729 }
730 
731 /// \brief Return the opcode that does not set flags when possible - otherwise
732 /// return the original opcode. The caller is responsible to do the actual
733 /// substitution and legality checking.
734 static unsigned convertFlagSettingOpcode(const MachineInstr *MI) {
735  // Don't convert all compare instructions, because for some the zero register
736  // encoding becomes the sp register.
737  bool MIDefinesZeroReg = false;
738  if (MI->definesRegister(AArch64::WZR) || MI->definesRegister(AArch64::XZR))
739  MIDefinesZeroReg = true;
740 
741  switch (MI->getOpcode()) {
742  default:
743  return MI->getOpcode();
744  case AArch64::ADDSWrr:
745  return AArch64::ADDWrr;
746  case AArch64::ADDSWri:
747  return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
748  case AArch64::ADDSWrs:
749  return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
750  case AArch64::ADDSWrx:
751  return AArch64::ADDWrx;
752  case AArch64::ADDSXrr:
753  return AArch64::ADDXrr;
754  case AArch64::ADDSXri:
755  return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
756  case AArch64::ADDSXrs:
757  return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
758  case AArch64::ADDSXrx:
759  return AArch64::ADDXrx;
760  case AArch64::SUBSWrr:
761  return AArch64::SUBWrr;
762  case AArch64::SUBSWri:
763  return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
764  case AArch64::SUBSWrs:
765  return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
766  case AArch64::SUBSWrx:
767  return AArch64::SUBWrx;
768  case AArch64::SUBSXrr:
769  return AArch64::SUBXrr;
770  case AArch64::SUBSXri:
771  return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
772  case AArch64::SUBSXrs:
773  return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
774  case AArch64::SUBSXrx:
775  return AArch64::SUBXrx;
776  }
777 }
778 
779 /// True when condition code could be modified on the instruction
780 /// trace starting at from and ending at to.
782  const bool CheckOnlyCCWrites,
783  const TargetRegisterInfo *TRI) {
784  // We iterate backward starting \p To until we hit \p From
785  MachineBasicBlock::iterator I = To, E = From, B = To->getParent()->begin();
786 
787  // Early exit if To is at the beginning of the BB.
788  if (I == B)
789  return true;
790 
791  // Check whether the definition of SrcReg is in the same basic block as
792  // Compare. If not, assume the condition code gets modified on some path.
793  if (To->getParent() != From->getParent())
794  return true;
795 
796  // Check that NZCV isn't set on the trace.
797  for (--I; I != E; --I) {
798  const MachineInstr &Instr = *I;
799 
800  if (Instr.modifiesRegister(AArch64::NZCV, TRI) ||
801  (!CheckOnlyCCWrites && Instr.readsRegister(AArch64::NZCV, TRI)))
802  // This instruction modifies or uses NZCV after the one we want to
803  // change.
804  return true;
805  if (I == B)
806  // We currently don't allow the instruction trace to cross basic
807  // block boundaries
808  return true;
809  }
810  return false;
811 }
812 /// optimizeCompareInstr - Convert the instruction supplying the argument to the
813 /// comparison into one that sets the zero bit in the flags register.
815  MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
816  int CmpValue, const MachineRegisterInfo *MRI) const {
817 
818  // Replace SUBSWrr with SUBWrr if NZCV is not used.
819  int Cmp_NZCV = CmpInstr->findRegisterDefOperandIdx(AArch64::NZCV, true);
820  if (Cmp_NZCV != -1) {
821  if (CmpInstr->definesRegister(AArch64::WZR) ||
822  CmpInstr->definesRegister(AArch64::XZR)) {
823  CmpInstr->eraseFromParent();
824  return true;
825  }
826  unsigned Opc = CmpInstr->getOpcode();
827  unsigned NewOpc = convertFlagSettingOpcode(CmpInstr);
828  if (NewOpc == Opc)
829  return false;
830  const MCInstrDesc &MCID = get(NewOpc);
831  CmpInstr->setDesc(MCID);
832  CmpInstr->RemoveOperand(Cmp_NZCV);
833  bool succeeded = UpdateOperandRegClass(CmpInstr);
834  (void)succeeded;
835  assert(succeeded && "Some operands reg class are incompatible!");
836  return true;
837  }
838 
839  // Continue only if we have a "ri" where immediate is zero.
840  // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
841  // function.
842  assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
843  if (CmpValue != 0 || SrcReg2 != 0)
844  return false;
845 
846  // CmpInstr is a Compare instruction if destination register is not used.
847  if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg()))
848  return false;
849 
850  // Get the unique definition of SrcReg.
851  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
852  if (!MI)
853  return false;
854 
855  bool CheckOnlyCCWrites = false;
856  const TargetRegisterInfo *TRI = &getRegisterInfo();
857  if (modifiesConditionCode(MI, CmpInstr, CheckOnlyCCWrites, TRI))
858  return false;
859 
860  unsigned NewOpc = MI->getOpcode();
861  switch (MI->getOpcode()) {
862  default:
863  return false;
864  case AArch64::ADDSWrr:
865  case AArch64::ADDSWri:
866  case AArch64::ADDSXrr:
867  case AArch64::ADDSXri:
868  case AArch64::SUBSWrr:
869  case AArch64::SUBSWri:
870  case AArch64::SUBSXrr:
871  case AArch64::SUBSXri:
872  break;
873  case AArch64::ADDWrr: NewOpc = AArch64::ADDSWrr; break;
874  case AArch64::ADDWri: NewOpc = AArch64::ADDSWri; break;
875  case AArch64::ADDXrr: NewOpc = AArch64::ADDSXrr; break;
876  case AArch64::ADDXri: NewOpc = AArch64::ADDSXri; break;
877  case AArch64::ADCWr: NewOpc = AArch64::ADCSWr; break;
878  case AArch64::ADCXr: NewOpc = AArch64::ADCSXr; break;
879  case AArch64::SUBWrr: NewOpc = AArch64::SUBSWrr; break;
880  case AArch64::SUBWri: NewOpc = AArch64::SUBSWri; break;
881  case AArch64::SUBXrr: NewOpc = AArch64::SUBSXrr; break;
882  case AArch64::SUBXri: NewOpc = AArch64::SUBSXri; break;
883  case AArch64::SBCWr: NewOpc = AArch64::SBCSWr; break;
884  case AArch64::SBCXr: NewOpc = AArch64::SBCSXr; break;
885  case AArch64::ANDWri: NewOpc = AArch64::ANDSWri; break;
886  case AArch64::ANDXri: NewOpc = AArch64::ANDSXri; break;
887  }
888 
889  // Scan forward for the use of NZCV.
890  // When checking against MI: if it's a conditional code requires
891  // checking of V bit, then this is not safe to do.
892  // It is safe to remove CmpInstr if NZCV is redefined or killed.
893  // If we are done with the basic block, we need to check whether NZCV is
894  // live-out.
895  bool IsSafe = false;
896  for (MachineBasicBlock::iterator I = CmpInstr,
897  E = CmpInstr->getParent()->end();
898  !IsSafe && ++I != E;) {
899  const MachineInstr &Instr = *I;
900  for (unsigned IO = 0, EO = Instr.getNumOperands(); !IsSafe && IO != EO;
901  ++IO) {
902  const MachineOperand &MO = Instr.getOperand(IO);
903  if (MO.isRegMask() && MO.clobbersPhysReg(AArch64::NZCV)) {
904  IsSafe = true;
905  break;
906  }
907  if (!MO.isReg() || MO.getReg() != AArch64::NZCV)
908  continue;
909  if (MO.isDef()) {
910  IsSafe = true;
911  break;
912  }
913 
914  // Decode the condition code.
915  unsigned Opc = Instr.getOpcode();
917  switch (Opc) {
918  default:
919  return false;
920  case AArch64::Bcc:
921  CC = (AArch64CC::CondCode)Instr.getOperand(IO - 2).getImm();
922  break;
923  case AArch64::CSINVWr:
924  case AArch64::CSINVXr:
925  case AArch64::CSINCWr:
926  case AArch64::CSINCXr:
927  case AArch64::CSELWr:
928  case AArch64::CSELXr:
929  case AArch64::CSNEGWr:
930  case AArch64::CSNEGXr:
931  case AArch64::FCSELSrrr:
932  case AArch64::FCSELDrrr:
933  CC = (AArch64CC::CondCode)Instr.getOperand(IO - 1).getImm();
934  break;
935  }
936 
937  // It is not safe to remove Compare instruction if Overflow(V) is used.
938  switch (CC) {
939  default:
940  // NZCV can be used multiple times, we should continue.
941  break;
942  case AArch64CC::VS:
943  case AArch64CC::VC:
944  case AArch64CC::GE:
945  case AArch64CC::LT:
946  case AArch64CC::GT:
947  case AArch64CC::LE:
948  return false;
949  }
950  }
951  }
952 
953  // If NZCV is not killed nor re-defined, we should check whether it is
954  // live-out. If it is live-out, do not optimize.
955  if (!IsSafe) {
956  MachineBasicBlock *ParentBlock = CmpInstr->getParent();
957  for (auto *MBB : ParentBlock->successors())
958  if (MBB->isLiveIn(AArch64::NZCV))
959  return false;
960  }
961 
962  // Update the instruction to set NZCV.
963  MI->setDesc(get(NewOpc));
964  CmpInstr->eraseFromParent();
965  bool succeeded = UpdateOperandRegClass(MI);
966  (void)succeeded;
967  assert(succeeded && "Some operands reg class are incompatible!");
969  return true;
970 }
971 
972 bool
974  if (MI->getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
975  return false;
976 
977  MachineBasicBlock &MBB = *MI->getParent();
978  DebugLoc DL = MI->getDebugLoc();
979  unsigned Reg = MI->getOperand(0).getReg();
980  const GlobalValue *GV =
981  cast<GlobalValue>((*MI->memoperands_begin())->getValue());
982  const TargetMachine &TM = MBB.getParent()->getTarget();
983  unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
984  const unsigned char MO_NC = AArch64II::MO_NC;
985 
986  if ((OpFlags & AArch64II::MO_GOT) != 0) {
987  BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
988  .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
989  BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
990  .addReg(Reg, RegState::Kill).addImm(0)
991  .addMemOperand(*MI->memoperands_begin());
992  } else if (TM.getCodeModel() == CodeModel::Large) {
993  BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
995  BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
996  .addReg(Reg, RegState::Kill)
997  .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32);
998  BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
999  .addReg(Reg, RegState::Kill)
1000  .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16);
1001  BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1002  .addReg(Reg, RegState::Kill)
1003  .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0);
1004  BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1005  .addReg(Reg, RegState::Kill).addImm(0)
1006  .addMemOperand(*MI->memoperands_begin());
1007  } else {
1008  BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1009  .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1010  unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1011  BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1012  .addReg(Reg, RegState::Kill)
1013  .addGlobalAddress(GV, 0, LoFlags)
1014  .addMemOperand(*MI->memoperands_begin());
1015  }
1016 
1017  MBB.erase(MI);
1018 
1019  return true;
1020 }
1021 
1022 /// Return true if this is this instruction has a non-zero immediate
1024  switch (MI->getOpcode()) {
1025  default:
1026  break;
1027  case AArch64::ADDSWrs:
1028  case AArch64::ADDSXrs:
1029  case AArch64::ADDWrs:
1030  case AArch64::ADDXrs:
1031  case AArch64::ANDSWrs:
1032  case AArch64::ANDSXrs:
1033  case AArch64::ANDWrs:
1034  case AArch64::ANDXrs:
1035  case AArch64::BICSWrs:
1036  case AArch64::BICSXrs:
1037  case AArch64::BICWrs:
1038  case AArch64::BICXrs:
1039  case AArch64::CRC32Brr:
1040  case AArch64::CRC32CBrr:
1041  case AArch64::CRC32CHrr:
1042  case AArch64::CRC32CWrr:
1043  case AArch64::CRC32CXrr:
1044  case AArch64::CRC32Hrr:
1045  case AArch64::CRC32Wrr:
1046  case AArch64::CRC32Xrr:
1047  case AArch64::EONWrs:
1048  case AArch64::EONXrs:
1049  case AArch64::EORWrs:
1050  case AArch64::EORXrs:
1051  case AArch64::ORNWrs:
1052  case AArch64::ORNXrs:
1053  case AArch64::ORRWrs:
1054  case AArch64::ORRXrs:
1055  case AArch64::SUBSWrs:
1056  case AArch64::SUBSXrs:
1057  case AArch64::SUBWrs:
1058  case AArch64::SUBXrs:
1059  if (MI->getOperand(3).isImm()) {
1060  unsigned val = MI->getOperand(3).getImm();
1061  return (val != 0);
1062  }
1063  break;
1064  }
1065  return false;
1066 }
1067 
1068 /// Return true if this is this instruction has a non-zero immediate
1070  switch (MI->getOpcode()) {
1071  default:
1072  break;
1073  case AArch64::ADDSWrx:
1074  case AArch64::ADDSXrx:
1075  case AArch64::ADDSXrx64:
1076  case AArch64::ADDWrx:
1077  case AArch64::ADDXrx:
1078  case AArch64::ADDXrx64:
1079  case AArch64::SUBSWrx:
1080  case AArch64::SUBSXrx:
1081  case AArch64::SUBSXrx64:
1082  case AArch64::SUBWrx:
1083  case AArch64::SUBXrx:
1084  case AArch64::SUBXrx64:
1085  if (MI->getOperand(3).isImm()) {
1086  unsigned val = MI->getOperand(3).getImm();
1087  return (val != 0);
1088  }
1089  break;
1090  }
1091 
1092  return false;
1093 }
1094 
1095 // Return true if this instruction simply sets its single destination register
1096 // to zero. This is equivalent to a register rename of the zero-register.
1098  switch (MI->getOpcode()) {
1099  default:
1100  break;
1101  case AArch64::MOVZWi:
1102  case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
1103  if (MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) {
1104  assert(MI->getDesc().getNumOperands() == 3 &&
1105  MI->getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1106  return true;
1107  }
1108  break;
1109  case AArch64::ANDWri: // and Rd, Rzr, #imm
1110  return MI->getOperand(1).getReg() == AArch64::WZR;
1111  case AArch64::ANDXri:
1112  return MI->getOperand(1).getReg() == AArch64::XZR;
1113  case TargetOpcode::COPY:
1114  return MI->getOperand(1).getReg() == AArch64::WZR;
1115  }
1116  return false;
1117 }
1118 
1119 // Return true if this instruction simply renames a general register without
1120 // modifying bits.
1122  switch (MI->getOpcode()) {
1123  default:
1124  break;
1125  case TargetOpcode::COPY: {
1126  // GPR32 copies will by lowered to ORRXrs
1127  unsigned DstReg = MI->getOperand(0).getReg();
1128  return (AArch64::GPR32RegClass.contains(DstReg) ||
1129  AArch64::GPR64RegClass.contains(DstReg));
1130  }
1131  case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
1132  if (MI->getOperand(1).getReg() == AArch64::XZR) {
1133  assert(MI->getDesc().getNumOperands() == 4 &&
1134  MI->getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1135  return true;
1136  }
1137  break;
1138  case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
1139  if (MI->getOperand(2).getImm() == 0) {
1140  assert(MI->getDesc().getNumOperands() == 4 &&
1141  MI->getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1142  return true;
1143  }
1144  break;
1145  }
1146  return false;
1147 }
1148 
1149 // Return true if this instruction simply renames a general register without
1150 // modifying bits.
1152  switch (MI->getOpcode()) {
1153  default:
1154  break;
1155  case TargetOpcode::COPY: {
1156  // FPR64 copies will by lowered to ORR.16b
1157  unsigned DstReg = MI->getOperand(0).getReg();
1158  return (AArch64::FPR64RegClass.contains(DstReg) ||
1159  AArch64::FPR128RegClass.contains(DstReg));
1160  }
1161  case AArch64::ORRv16i8:
1162  if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
1163  assert(MI->getDesc().getNumOperands() == 3 && MI->getOperand(0).isReg() &&
1164  "invalid ORRv16i8 operands");
1165  return true;
1166  }
1167  break;
1168  }
1169  return false;
1170 }
1171 
1173  int &FrameIndex) const {
1174  switch (MI->getOpcode()) {
1175  default:
1176  break;
1177  case AArch64::LDRWui:
1178  case AArch64::LDRXui:
1179  case AArch64::LDRBui:
1180  case AArch64::LDRHui:
1181  case AArch64::LDRSui:
1182  case AArch64::LDRDui:
1183  case AArch64::LDRQui:
1184  if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() &&
1185  MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) {
1186  FrameIndex = MI->getOperand(1).getIndex();
1187  return MI->getOperand(0).getReg();
1188  }
1189  break;
1190  }
1191 
1192  return 0;
1193 }
1194 
1196  int &FrameIndex) const {
1197  switch (MI->getOpcode()) {
1198  default:
1199  break;
1200  case AArch64::STRWui:
1201  case AArch64::STRXui:
1202  case AArch64::STRBui:
1203  case AArch64::STRHui:
1204  case AArch64::STRSui:
1205  case AArch64::STRDui:
1206  case AArch64::STRQui:
1207  if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() &&
1208  MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) {
1209  FrameIndex = MI->getOperand(1).getIndex();
1210  return MI->getOperand(0).getReg();
1211  }
1212  break;
1213  }
1214  return 0;
1215 }
1216 
1217 /// Return true if this is load/store scales or extends its register offset.
1218 /// This refers to scaling a dynamic index as opposed to scaled immediates.
1219 /// MI should be a memory op that allows scaled addressing.
1221  switch (MI->getOpcode()) {
1222  default:
1223  break;
1224  case AArch64::LDRBBroW:
1225  case AArch64::LDRBroW:
1226  case AArch64::LDRDroW:
1227  case AArch64::LDRHHroW:
1228  case AArch64::LDRHroW:
1229  case AArch64::LDRQroW:
1230  case AArch64::LDRSBWroW:
1231  case AArch64::LDRSBXroW:
1232  case AArch64::LDRSHWroW:
1233  case AArch64::LDRSHXroW:
1234  case AArch64::LDRSWroW:
1235  case AArch64::LDRSroW:
1236  case AArch64::LDRWroW:
1237  case AArch64::LDRXroW:
1238  case AArch64::STRBBroW:
1239  case AArch64::STRBroW:
1240  case AArch64::STRDroW:
1241  case AArch64::STRHHroW:
1242  case AArch64::STRHroW:
1243  case AArch64::STRQroW:
1244  case AArch64::STRSroW:
1245  case AArch64::STRWroW:
1246  case AArch64::STRXroW:
1247  case AArch64::LDRBBroX:
1248  case AArch64::LDRBroX:
1249  case AArch64::LDRDroX:
1250  case AArch64::LDRHHroX:
1251  case AArch64::LDRHroX:
1252  case AArch64::LDRQroX:
1253  case AArch64::LDRSBWroX:
1254  case AArch64::LDRSBXroX:
1255  case AArch64::LDRSHWroX:
1256  case AArch64::LDRSHXroX:
1257  case AArch64::LDRSWroX:
1258  case AArch64::LDRSroX:
1259  case AArch64::LDRWroX:
1260  case AArch64::LDRXroX:
1261  case AArch64::STRBBroX:
1262  case AArch64::STRBroX:
1263  case AArch64::STRDroX:
1264  case AArch64::STRHHroX:
1265  case AArch64::STRHroX:
1266  case AArch64::STRQroX:
1267  case AArch64::STRSroX:
1268  case AArch64::STRWroX:
1269  case AArch64::STRXroX:
1270 
1271  unsigned Val = MI->getOperand(3).getImm();
1273  return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
1274  }
1275  return false;
1276 }
1277 
1278 /// Check all MachineMemOperands for a hint to suppress pairing.
1280  assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) &&
1281  "Too many target MO flags");
1282  for (auto *MM : MI->memoperands()) {
1283  if (MM->getFlags() &
1284  (MOSuppressPair << MachineMemOperand::MOTargetStartBit)) {
1285  return true;
1286  }
1287  }
1288  return false;
1289 }
1290 
1291 /// Set a flag on the first MachineMemOperand to suppress pairing.
1293  if (MI->memoperands_empty())
1294  return;
1295 
1296  assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) &&
1297  "Too many target MO flags");
1298  (*MI->memoperands_begin())
1299  ->setFlags(MOSuppressPair << MachineMemOperand::MOTargetStartBit);
1300 }
1301 
1302 bool
1304  unsigned &Offset,
1305  const TargetRegisterInfo *TRI) const {
1306  switch (LdSt->getOpcode()) {
1307  default:
1308  return false;
1309  case AArch64::STRSui:
1310  case AArch64::STRDui:
1311  case AArch64::STRQui:
1312  case AArch64::STRXui:
1313  case AArch64::STRWui:
1314  case AArch64::LDRSui:
1315  case AArch64::LDRDui:
1316  case AArch64::LDRQui:
1317  case AArch64::LDRXui:
1318  case AArch64::LDRWui:
1319  if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
1320  return false;
1321  BaseReg = LdSt->getOperand(1).getReg();
1322  MachineFunction &MF = *LdSt->getParent()->getParent();
1323  unsigned Width = getRegClass(LdSt->getDesc(), 0, TRI, MF)->getSize();
1324  Offset = LdSt->getOperand(2).getImm() * Width;
1325  return true;
1326  };
1327 }
1328 
1330  MachineInstr *LdSt, unsigned &BaseReg, int &Offset, int &Width,
1331  const TargetRegisterInfo *TRI) const {
1332  // Handle only loads/stores with base register followed by immediate offset.
1333  if (LdSt->getNumOperands() != 3)
1334  return false;
1335  if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
1336  return false;
1337 
1338  // Offset is calculated as the immediate operand multiplied by the scaling factor.
1339  // Unscaled instructions have scaling factor set to 1.
1340  int Scale = 0;
1341  switch (LdSt->getOpcode()) {
1342  default:
1343  return false;
1344  case AArch64::LDURQi:
1345  case AArch64::STURQi:
1346  Width = 16;
1347  Scale = 1;
1348  break;
1349  case AArch64::LDURXi:
1350  case AArch64::LDURDi:
1351  case AArch64::STURXi:
1352  case AArch64::STURDi:
1353  Width = 8;
1354  Scale = 1;
1355  break;
1356  case AArch64::LDURWi:
1357  case AArch64::LDURSi:
1358  case AArch64::LDURSWi:
1359  case AArch64::STURWi:
1360  case AArch64::STURSi:
1361  Width = 4;
1362  Scale = 1;
1363  break;
1364  case AArch64::LDURHi:
1365  case AArch64::LDURHHi:
1366  case AArch64::LDURSHXi:
1367  case AArch64::LDURSHWi:
1368  case AArch64::STURHi:
1369  case AArch64::STURHHi:
1370  Width = 2;
1371  Scale = 1;
1372  break;
1373  case AArch64::LDURBi:
1374  case AArch64::LDURBBi:
1375  case AArch64::LDURSBXi:
1376  case AArch64::LDURSBWi:
1377  case AArch64::STURBi:
1378  case AArch64::STURBBi:
1379  Width = 1;
1380  Scale = 1;
1381  break;
1382  case AArch64::LDRXui:
1383  case AArch64::STRXui:
1384  Scale = Width = 8;
1385  break;
1386  case AArch64::LDRWui:
1387  case AArch64::STRWui:
1388  Scale = Width = 4;
1389  break;
1390  case AArch64::LDRBui:
1391  case AArch64::STRBui:
1392  Scale = Width = 1;
1393  break;
1394  case AArch64::LDRHui:
1395  case AArch64::STRHui:
1396  Scale = Width = 2;
1397  break;
1398  case AArch64::LDRSui:
1399  case AArch64::STRSui:
1400  Scale = Width = 4;
1401  break;
1402  case AArch64::LDRDui:
1403  case AArch64::STRDui:
1404  Scale = Width = 8;
1405  break;
1406  case AArch64::LDRQui:
1407  case AArch64::STRQui:
1408  Scale = Width = 16;
1409  break;
1410  case AArch64::LDRBBui:
1411  case AArch64::STRBBui:
1412  Scale = Width = 1;
1413  break;
1414  case AArch64::LDRHHui:
1415  case AArch64::STRHHui:
1416  Scale = Width = 2;
1417  break;
1418  };
1419 
1420  BaseReg = LdSt->getOperand(1).getReg();
1421  Offset = LdSt->getOperand(2).getImm() * Scale;
1422  return true;
1423 }
1424 
1425 /// Detect opportunities for ldp/stp formation.
1426 ///
1427 /// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
1429  MachineInstr *SecondLdSt,
1430  unsigned NumLoads) const {
1431  // Only cluster up to a single pair.
1432  if (NumLoads > 1)
1433  return false;
1434  if (FirstLdSt->getOpcode() != SecondLdSt->getOpcode())
1435  return false;
1436  // getMemOpBaseRegImmOfs guarantees that oper 2 isImm.
1437  unsigned Ofs1 = FirstLdSt->getOperand(2).getImm();
1438  // Allow 6 bits of positive range.
1439  if (Ofs1 > 64)
1440  return false;
1441  // The caller should already have ordered First/SecondLdSt by offset.
1442  unsigned Ofs2 = SecondLdSt->getOperand(2).getImm();
1443  return Ofs1 + 1 == Ofs2;
1444 }
1445 
1447  MachineInstr *Second) const {
1448  // Cyclone can fuse CMN, CMP followed by Bcc.
1449 
1450  // FIXME: B0 can also fuse:
1451  // AND, BIC, ORN, ORR, or EOR (optional S) followed by Bcc or CBZ or CBNZ.
1452  if (Second->getOpcode() != AArch64::Bcc)
1453  return false;
1454  switch (First->getOpcode()) {
1455  default:
1456  return false;
1457  case AArch64::SUBSWri:
1458  case AArch64::ADDSWri:
1459  case AArch64::ANDSWri:
1460  case AArch64::SUBSXri:
1461  case AArch64::ADDSXri:
1462  case AArch64::ANDSXri:
1463  return true;
1464  }
1465 }
1466 
1468  MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var,
1469  const MDNode *Expr, DebugLoc DL) const {
1470  MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE))
1471  .addFrameIndex(FrameIx)
1472  .addImm(0)
1473  .addImm(Offset)
1474  .addMetadata(Var)
1475  .addMetadata(Expr);
1476  return &*MIB;
1477 }
1478 
1480  unsigned Reg, unsigned SubIdx,
1481  unsigned State,
1482  const TargetRegisterInfo *TRI) {
1483  if (!SubIdx)
1484  return MIB.addReg(Reg, State);
1485 
1487  return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
1488  return MIB.addReg(Reg, State, SubIdx);
1489 }
1490 
1491 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
1492  unsigned NumRegs) {
1493  // We really want the positive remainder mod 32 here, that happens to be
1494  // easily obtainable with a mask.
1495  return ((DestReg - SrcReg) & 0x1f) < NumRegs;
1496 }
1497 
1500  unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode,
1501  llvm::ArrayRef<unsigned> Indices) const {
1502  assert(Subtarget.hasNEON() &&
1503  "Unexpected register copy without NEON");
1504  const TargetRegisterInfo *TRI = &getRegisterInfo();
1505  uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
1506  uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
1507  unsigned NumRegs = Indices.size();
1508 
1509  int SubReg = 0, End = NumRegs, Incr = 1;
1510  if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
1511  SubReg = NumRegs - 1;
1512  End = -1;
1513  Incr = -1;
1514  }
1515 
1516  for (; SubReg != End; SubReg += Incr) {
1517  const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
1518  AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
1519  AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
1520  AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
1521  }
1522 }
1523 
1526  unsigned DestReg, unsigned SrcReg,
1527  bool KillSrc) const {
1528  if (AArch64::GPR32spRegClass.contains(DestReg) &&
1529  (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
1530  const TargetRegisterInfo *TRI = &getRegisterInfo();
1531 
1532  if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
1533  // If either operand is WSP, expand to ADD #0.
1534  if (Subtarget.hasZeroCycleRegMove()) {
1535  // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
1536  unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
1537  &AArch64::GPR64spRegClass);
1538  unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
1539  &AArch64::GPR64spRegClass);
1540  // This instruction is reading and writing X registers. This may upset
1541  // the register scavenger and machine verifier, so we need to indicate
1542  // that we are reading an undefined value from SrcRegX, but a proper
1543  // value from SrcReg.
1544  BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
1545  .addReg(SrcRegX, RegState::Undef)
1546  .addImm(0)
1548  .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
1549  } else {
1550  BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
1551  .addReg(SrcReg, getKillRegState(KillSrc))
1552  .addImm(0)
1554  }
1555  } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
1556  BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm(
1558  } else {
1559  if (Subtarget.hasZeroCycleRegMove()) {
1560  // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
1561  unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
1562  &AArch64::GPR64spRegClass);
1563  unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
1564  &AArch64::GPR64spRegClass);
1565  // This instruction is reading and writing X registers. This may upset
1566  // the register scavenger and machine verifier, so we need to indicate
1567  // that we are reading an undefined value from SrcRegX, but a proper
1568  // value from SrcReg.
1569  BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
1570  .addReg(AArch64::XZR)
1571  .addReg(SrcRegX, RegState::Undef)
1572  .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
1573  } else {
1574  // Otherwise, expand to ORR WZR.
1575  BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
1576  .addReg(AArch64::WZR)
1577  .addReg(SrcReg, getKillRegState(KillSrc));
1578  }
1579  }
1580  return;
1581  }
1582 
1583  if (AArch64::GPR64spRegClass.contains(DestReg) &&
1584  (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
1585  if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
1586  // If either operand is SP, expand to ADD #0.
1587  BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
1588  .addReg(SrcReg, getKillRegState(KillSrc))
1589  .addImm(0)
1591  } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
1592  BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm(
1594  } else {
1595  // Otherwise, expand to ORR XZR.
1596  BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
1597  .addReg(AArch64::XZR)
1598  .addReg(SrcReg, getKillRegState(KillSrc));
1599  }
1600  return;
1601  }
1602 
1603  // Copy a DDDD register quad by copying the individual sub-registers.
1604  if (AArch64::DDDDRegClass.contains(DestReg) &&
1605  AArch64::DDDDRegClass.contains(SrcReg)) {
1606  static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
1607  AArch64::dsub2, AArch64::dsub3 };
1608  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
1609  Indices);
1610  return;
1611  }
1612 
1613  // Copy a DDD register triple by copying the individual sub-registers.
1614  if (AArch64::DDDRegClass.contains(DestReg) &&
1615  AArch64::DDDRegClass.contains(SrcReg)) {
1616  static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
1617  AArch64::dsub2 };
1618  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
1619  Indices);
1620  return;
1621  }
1622 
1623  // Copy a DD register pair by copying the individual sub-registers.
1624  if (AArch64::DDRegClass.contains(DestReg) &&
1625  AArch64::DDRegClass.contains(SrcReg)) {
1626  static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 };
1627  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
1628  Indices);
1629  return;
1630  }
1631 
1632  // Copy a QQQQ register quad by copying the individual sub-registers.
1633  if (AArch64::QQQQRegClass.contains(DestReg) &&
1634  AArch64::QQQQRegClass.contains(SrcReg)) {
1635  static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
1636  AArch64::qsub2, AArch64::qsub3 };
1637  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
1638  Indices);
1639  return;
1640  }
1641 
1642  // Copy a QQQ register triple by copying the individual sub-registers.
1643  if (AArch64::QQQRegClass.contains(DestReg) &&
1644  AArch64::QQQRegClass.contains(SrcReg)) {
1645  static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
1646  AArch64::qsub2 };
1647  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
1648  Indices);
1649  return;
1650  }
1651 
1652  // Copy a QQ register pair by copying the individual sub-registers.
1653  if (AArch64::QQRegClass.contains(DestReg) &&
1654  AArch64::QQRegClass.contains(SrcReg)) {
1655  static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 };
1656  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
1657  Indices);
1658  return;
1659  }
1660 
1661  if (AArch64::FPR128RegClass.contains(DestReg) &&
1662  AArch64::FPR128RegClass.contains(SrcReg)) {
1663  if(Subtarget.hasNEON()) {
1664  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
1665  .addReg(SrcReg)
1666  .addReg(SrcReg, getKillRegState(KillSrc));
1667  } else {
1668  BuildMI(MBB, I, DL, get(AArch64::STRQpre))
1669  .addReg(AArch64::SP, RegState::Define)
1670  .addReg(SrcReg, getKillRegState(KillSrc))
1671  .addReg(AArch64::SP)
1672  .addImm(-16);
1673  BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
1674  .addReg(AArch64::SP, RegState::Define)
1675  .addReg(DestReg, RegState::Define)
1676  .addReg(AArch64::SP)
1677  .addImm(16);
1678  }
1679  return;
1680  }
1681 
1682  if (AArch64::FPR64RegClass.contains(DestReg) &&
1683  AArch64::FPR64RegClass.contains(SrcReg)) {
1684  if(Subtarget.hasNEON()) {
1685  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
1686  &AArch64::FPR128RegClass);
1687  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
1688  &AArch64::FPR128RegClass);
1689  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
1690  .addReg(SrcReg)
1691  .addReg(SrcReg, getKillRegState(KillSrc));
1692  } else {
1693  BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
1694  .addReg(SrcReg, getKillRegState(KillSrc));
1695  }
1696  return;
1697  }
1698 
1699  if (AArch64::FPR32RegClass.contains(DestReg) &&
1700  AArch64::FPR32RegClass.contains(SrcReg)) {
1701  if(Subtarget.hasNEON()) {
1702  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
1703  &AArch64::FPR128RegClass);
1704  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
1705  &AArch64::FPR128RegClass);
1706  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
1707  .addReg(SrcReg)
1708  .addReg(SrcReg, getKillRegState(KillSrc));
1709  } else {
1710  BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
1711  .addReg(SrcReg, getKillRegState(KillSrc));
1712  }
1713  return;
1714  }
1715 
1716  if (AArch64::FPR16RegClass.contains(DestReg) &&
1717  AArch64::FPR16RegClass.contains(SrcReg)) {
1718  if(Subtarget.hasNEON()) {
1719  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
1720  &AArch64::FPR128RegClass);
1721  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
1722  &AArch64::FPR128RegClass);
1723  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
1724  .addReg(SrcReg)
1725  .addReg(SrcReg, getKillRegState(KillSrc));
1726  } else {
1727  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
1728  &AArch64::FPR32RegClass);
1729  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
1730  &AArch64::FPR32RegClass);
1731  BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
1732  .addReg(SrcReg, getKillRegState(KillSrc));
1733  }
1734  return;
1735  }
1736 
1737  if (AArch64::FPR8RegClass.contains(DestReg) &&
1738  AArch64::FPR8RegClass.contains(SrcReg)) {
1739  if(Subtarget.hasNEON()) {
1740  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
1741  &AArch64::FPR128RegClass);
1742  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
1743  &AArch64::FPR128RegClass);
1744  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
1745  .addReg(SrcReg)
1746  .addReg(SrcReg, getKillRegState(KillSrc));
1747  } else {
1748  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
1749  &AArch64::FPR32RegClass);
1750  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
1751  &AArch64::FPR32RegClass);
1752  BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
1753  .addReg(SrcReg, getKillRegState(KillSrc));
1754  }
1755  return;
1756  }
1757 
1758  // Copies between GPR64 and FPR64.
1759  if (AArch64::FPR64RegClass.contains(DestReg) &&
1760  AArch64::GPR64RegClass.contains(SrcReg)) {
1761  BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
1762  .addReg(SrcReg, getKillRegState(KillSrc));
1763  return;
1764  }
1765  if (AArch64::GPR64RegClass.contains(DestReg) &&
1766  AArch64::FPR64RegClass.contains(SrcReg)) {
1767  BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
1768  .addReg(SrcReg, getKillRegState(KillSrc));
1769  return;
1770  }
1771  // Copies between GPR32 and FPR32.
1772  if (AArch64::FPR32RegClass.contains(DestReg) &&
1773  AArch64::GPR32RegClass.contains(SrcReg)) {
1774  BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
1775  .addReg(SrcReg, getKillRegState(KillSrc));
1776  return;
1777  }
1778  if (AArch64::GPR32RegClass.contains(DestReg) &&
1779  AArch64::FPR32RegClass.contains(SrcReg)) {
1780  BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
1781  .addReg(SrcReg, getKillRegState(KillSrc));
1782  return;
1783  }
1784 
1785  if (DestReg == AArch64::NZCV) {
1786  assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
1787  BuildMI(MBB, I, DL, get(AArch64::MSR))
1789  .addReg(SrcReg, getKillRegState(KillSrc))
1791  return;
1792  }
1793 
1794  if (SrcReg == AArch64::NZCV) {
1795  assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
1796  BuildMI(MBB, I, DL, get(AArch64::MRS))
1797  .addReg(DestReg)
1800  return;
1801  }
1802 
1803  llvm_unreachable("unimplemented reg-to-reg copy");
1804 }
1805 
1807  MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
1808  bool isKill, int FI, const TargetRegisterClass *RC,
1809  const TargetRegisterInfo *TRI) const {
1810  DebugLoc DL;
1811  if (MBBI != MBB.end())
1812  DL = MBBI->getDebugLoc();
1813  MachineFunction &MF = *MBB.getParent();
1814  MachineFrameInfo &MFI = *MF.getFrameInfo();
1815  unsigned Align = MFI.getObjectAlignment(FI);
1816 
1819  PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
1820  unsigned Opc = 0;
1821  bool Offset = true;
1822  switch (RC->getSize()) {
1823  case 1:
1824  if (AArch64::FPR8RegClass.hasSubClassEq(RC))
1825  Opc = AArch64::STRBui;
1826  break;
1827  case 2:
1828  if (AArch64::FPR16RegClass.hasSubClassEq(RC))
1829  Opc = AArch64::STRHui;
1830  break;
1831  case 4:
1832  if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
1833  Opc = AArch64::STRWui;
1835  MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
1836  else
1837  assert(SrcReg != AArch64::WSP);
1838  } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
1839  Opc = AArch64::STRSui;
1840  break;
1841  case 8:
1842  if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
1843  Opc = AArch64::STRXui;
1845  MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
1846  else
1847  assert(SrcReg != AArch64::SP);
1848  } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
1849  Opc = AArch64::STRDui;
1850  break;
1851  case 16:
1852  if (AArch64::FPR128RegClass.hasSubClassEq(RC))
1853  Opc = AArch64::STRQui;
1854  else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
1855  assert(Subtarget.hasNEON() &&
1856  "Unexpected register store without NEON");
1857  Opc = AArch64::ST1Twov1d, Offset = false;
1858  }
1859  break;
1860  case 24:
1861  if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
1862  assert(Subtarget.hasNEON() &&
1863  "Unexpected register store without NEON");
1864  Opc = AArch64::ST1Threev1d, Offset = false;
1865  }
1866  break;
1867  case 32:
1868  if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
1869  assert(Subtarget.hasNEON() &&
1870  "Unexpected register store without NEON");
1871  Opc = AArch64::ST1Fourv1d, Offset = false;
1872  } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
1873  assert(Subtarget.hasNEON() &&
1874  "Unexpected register store without NEON");
1875  Opc = AArch64::ST1Twov2d, Offset = false;
1876  }
1877  break;
1878  case 48:
1879  if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
1880  assert(Subtarget.hasNEON() &&
1881  "Unexpected register store without NEON");
1882  Opc = AArch64::ST1Threev2d, Offset = false;
1883  }
1884  break;
1885  case 64:
1886  if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
1887  assert(Subtarget.hasNEON() &&
1888  "Unexpected register store without NEON");
1889  Opc = AArch64::ST1Fourv2d, Offset = false;
1890  }
1891  break;
1892  }
1893  assert(Opc && "Unknown register class");
1894 
1895  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
1896  .addReg(SrcReg, getKillRegState(isKill))
1897  .addFrameIndex(FI);
1898 
1899  if (Offset)
1900  MI.addImm(0);
1901  MI.addMemOperand(MMO);
1902 }
1903 
1905  MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
1906  int FI, const TargetRegisterClass *RC,
1907  const TargetRegisterInfo *TRI) const {
1908  DebugLoc DL;
1909  if (MBBI != MBB.end())
1910  DL = MBBI->getDebugLoc();
1911  MachineFunction &MF = *MBB.getParent();
1912  MachineFrameInfo &MFI = *MF.getFrameInfo();
1913  unsigned Align = MFI.getObjectAlignment(FI);
1916  PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
1917 
1918  unsigned Opc = 0;
1919  bool Offset = true;
1920  switch (RC->getSize()) {
1921  case 1:
1922  if (AArch64::FPR8RegClass.hasSubClassEq(RC))
1923  Opc = AArch64::LDRBui;
1924  break;
1925  case 2:
1926  if (AArch64::FPR16RegClass.hasSubClassEq(RC))
1927  Opc = AArch64::LDRHui;
1928  break;
1929  case 4:
1930  if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
1931  Opc = AArch64::LDRWui;
1933  MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
1934  else
1935  assert(DestReg != AArch64::WSP);
1936  } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
1937  Opc = AArch64::LDRSui;
1938  break;
1939  case 8:
1940  if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
1941  Opc = AArch64::LDRXui;
1943  MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
1944  else
1945  assert(DestReg != AArch64::SP);
1946  } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
1947  Opc = AArch64::LDRDui;
1948  break;
1949  case 16:
1950  if (AArch64::FPR128RegClass.hasSubClassEq(RC))
1951  Opc = AArch64::LDRQui;
1952  else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
1953  assert(Subtarget.hasNEON() &&
1954  "Unexpected register load without NEON");
1955  Opc = AArch64::LD1Twov1d, Offset = false;
1956  }
1957  break;
1958  case 24:
1959  if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
1960  assert(Subtarget.hasNEON() &&
1961  "Unexpected register load without NEON");
1962  Opc = AArch64::LD1Threev1d, Offset = false;
1963  }
1964  break;
1965  case 32:
1966  if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
1967  assert(Subtarget.hasNEON() &&
1968  "Unexpected register load without NEON");
1969  Opc = AArch64::LD1Fourv1d, Offset = false;
1970  } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
1971  assert(Subtarget.hasNEON() &&
1972  "Unexpected register load without NEON");
1973  Opc = AArch64::LD1Twov2d, Offset = false;
1974  }
1975  break;
1976  case 48:
1977  if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
1978  assert(Subtarget.hasNEON() &&
1979  "Unexpected register load without NEON");
1980  Opc = AArch64::LD1Threev2d, Offset = false;
1981  }
1982  break;
1983  case 64:
1984  if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
1985  assert(Subtarget.hasNEON() &&
1986  "Unexpected register load without NEON");
1987  Opc = AArch64::LD1Fourv2d, Offset = false;
1988  }
1989  break;
1990  }
1991  assert(Opc && "Unknown register class");
1992 
1993  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
1994  .addReg(DestReg, getDefRegState(true))
1995  .addFrameIndex(FI);
1996  if (Offset)
1997  MI.addImm(0);
1998  MI.addMemOperand(MMO);
1999 }
2000 
2003  unsigned DestReg, unsigned SrcReg, int Offset,
2004  const TargetInstrInfo *TII,
2005  MachineInstr::MIFlag Flag, bool SetNZCV) {
2006  if (DestReg == SrcReg && Offset == 0)
2007  return;
2008 
2009  bool isSub = Offset < 0;
2010  if (isSub)
2011  Offset = -Offset;
2012 
2013  // FIXME: If the offset won't fit in 24-bits, compute the offset into a
2014  // scratch register. If DestReg is a virtual register, use it as the
2015  // scratch register; otherwise, create a new virtual register (to be
2016  // replaced by the scavenger at the end of PEI). That case can be optimized
2017  // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
2018  // register can be loaded with offset%8 and the add/sub can use an extending
2019  // instruction with LSL#3.
2020  // Currently the function handles any offsets but generates a poor sequence
2021  // of code.
2022  // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
2023 
2024  unsigned Opc;
2025  if (SetNZCV)
2026  Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
2027  else
2028  Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
2029  const unsigned MaxEncoding = 0xfff;
2030  const unsigned ShiftSize = 12;
2031  const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
2032  while (((unsigned)Offset) >= (1 << ShiftSize)) {
2033  unsigned ThisVal;
2034  if (((unsigned)Offset) > MaxEncodableValue) {
2035  ThisVal = MaxEncodableValue;
2036  } else {
2037  ThisVal = Offset & MaxEncodableValue;
2038  }
2039  assert((ThisVal >> ShiftSize) <= MaxEncoding &&
2040  "Encoding cannot handle value that big");
2041  BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2042  .addReg(SrcReg)
2043  .addImm(ThisVal >> ShiftSize)
2045  .setMIFlag(Flag);
2046 
2047  SrcReg = DestReg;
2048  Offset -= ThisVal;
2049  if (Offset == 0)
2050  return;
2051  }
2052  BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2053  .addReg(SrcReg)
2054  .addImm(Offset)
2056  .setMIFlag(Flag);
2057 }
2058 
2061  MachineBasicBlock::iterator InsertPt, int FrameIndex) const {
2062  // This is a bit of a hack. Consider this instruction:
2063  //
2064  // %vreg0<def> = COPY %SP; GPR64all:%vreg0
2065  //
2066  // We explicitly chose GPR64all for the virtual register so such a copy might
2067  // be eliminated by RegisterCoalescer. However, that may not be possible, and
2068  // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all
2069  // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
2070  //
2071  // To prevent that, we are going to constrain the %vreg0 register class here.
2072  //
2073  // <rdar://problem/11522048>
2074  //
2075  if (MI->isCopy()) {
2076  unsigned DstReg = MI->getOperand(0).getReg();
2077  unsigned SrcReg = MI->getOperand(1).getReg();
2078  if (SrcReg == AArch64::SP &&
2080  MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
2081  return nullptr;
2082  }
2083  if (DstReg == AArch64::SP &&
2085  MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2086  return nullptr;
2087  }
2088  }
2089 
2090  // Cannot fold.
2091  return nullptr;
2092 }
2093 
2095  bool *OutUseUnscaledOp,
2096  unsigned *OutUnscaledOp,
2097  int *EmittableOffset) {
2098  int Scale = 1;
2099  bool IsSigned = false;
2100  // The ImmIdx should be changed case by case if it is not 2.
2101  unsigned ImmIdx = 2;
2102  unsigned UnscaledOp = 0;
2103  // Set output values in case of early exit.
2104  if (EmittableOffset)
2105  *EmittableOffset = 0;
2106  if (OutUseUnscaledOp)
2107  *OutUseUnscaledOp = false;
2108  if (OutUnscaledOp)
2109  *OutUnscaledOp = 0;
2110  switch (MI.getOpcode()) {
2111  default:
2112  llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
2113  // Vector spills/fills can't take an immediate offset.
2114  case AArch64::LD1Twov2d:
2115  case AArch64::LD1Threev2d:
2116  case AArch64::LD1Fourv2d:
2117  case AArch64::LD1Twov1d:
2118  case AArch64::LD1Threev1d:
2119  case AArch64::LD1Fourv1d:
2120  case AArch64::ST1Twov2d:
2121  case AArch64::ST1Threev2d:
2122  case AArch64::ST1Fourv2d:
2123  case AArch64::ST1Twov1d:
2124  case AArch64::ST1Threev1d:
2125  case AArch64::ST1Fourv1d:
2127  case AArch64::PRFMui:
2128  Scale = 8;
2129  UnscaledOp = AArch64::PRFUMi;
2130  break;
2131  case AArch64::LDRXui:
2132  Scale = 8;
2133  UnscaledOp = AArch64::LDURXi;
2134  break;
2135  case AArch64::LDRWui:
2136  Scale = 4;
2137  UnscaledOp = AArch64::LDURWi;
2138  break;
2139  case AArch64::LDRBui:
2140  Scale = 1;
2141  UnscaledOp = AArch64::LDURBi;
2142  break;
2143  case AArch64::LDRHui:
2144  Scale = 2;
2145  UnscaledOp = AArch64::LDURHi;
2146  break;
2147  case AArch64::LDRSui:
2148  Scale = 4;
2149  UnscaledOp = AArch64::LDURSi;
2150  break;
2151  case AArch64::LDRDui:
2152  Scale = 8;
2153  UnscaledOp = AArch64::LDURDi;
2154  break;
2155  case AArch64::LDRQui:
2156  Scale = 16;
2157  UnscaledOp = AArch64::LDURQi;
2158  break;
2159  case AArch64::LDRBBui:
2160  Scale = 1;
2161  UnscaledOp = AArch64::LDURBBi;
2162  break;
2163  case AArch64::LDRHHui:
2164  Scale = 2;
2165  UnscaledOp = AArch64::LDURHHi;
2166  break;
2167  case AArch64::LDRSBXui:
2168  Scale = 1;
2169  UnscaledOp = AArch64::LDURSBXi;
2170  break;
2171  case AArch64::LDRSBWui:
2172  Scale = 1;
2173  UnscaledOp = AArch64::LDURSBWi;
2174  break;
2175  case AArch64::LDRSHXui:
2176  Scale = 2;
2177  UnscaledOp = AArch64::LDURSHXi;
2178  break;
2179  case AArch64::LDRSHWui:
2180  Scale = 2;
2181  UnscaledOp = AArch64::LDURSHWi;
2182  break;
2183  case AArch64::LDRSWui:
2184  Scale = 4;
2185  UnscaledOp = AArch64::LDURSWi;
2186  break;
2187 
2188  case AArch64::STRXui:
2189  Scale = 8;
2190  UnscaledOp = AArch64::STURXi;
2191  break;
2192  case AArch64::STRWui:
2193  Scale = 4;
2194  UnscaledOp = AArch64::STURWi;
2195  break;
2196  case AArch64::STRBui:
2197  Scale = 1;
2198  UnscaledOp = AArch64::STURBi;
2199  break;
2200  case AArch64::STRHui:
2201  Scale = 2;
2202  UnscaledOp = AArch64::STURHi;
2203  break;
2204  case AArch64::STRSui:
2205  Scale = 4;
2206  UnscaledOp = AArch64::STURSi;
2207  break;
2208  case AArch64::STRDui:
2209  Scale = 8;
2210  UnscaledOp = AArch64::STURDi;
2211  break;
2212  case AArch64::STRQui:
2213  Scale = 16;
2214  UnscaledOp = AArch64::STURQi;
2215  break;
2216  case AArch64::STRBBui:
2217  Scale = 1;
2218  UnscaledOp = AArch64::STURBBi;
2219  break;
2220  case AArch64::STRHHui:
2221  Scale = 2;
2222  UnscaledOp = AArch64::STURHHi;
2223  break;
2224 
2225  case AArch64::LDPXi:
2226  case AArch64::LDPDi:
2227  case AArch64::STPXi:
2228  case AArch64::STPDi:
2229  IsSigned = true;
2230  Scale = 8;
2231  break;
2232  case AArch64::LDPQi:
2233  case AArch64::STPQi:
2234  IsSigned = true;
2235  Scale = 16;
2236  break;
2237  case AArch64::LDPWi:
2238  case AArch64::LDPSi:
2239  case AArch64::STPWi:
2240  case AArch64::STPSi:
2241  IsSigned = true;
2242  Scale = 4;
2243  break;
2244 
2245  case AArch64::LDURXi:
2246  case AArch64::LDURWi:
2247  case AArch64::LDURBi:
2248  case AArch64::LDURHi:
2249  case AArch64::LDURSi:
2250  case AArch64::LDURDi:
2251  case AArch64::LDURQi:
2252  case AArch64::LDURHHi:
2253  case AArch64::LDURBBi:
2254  case AArch64::LDURSBXi:
2255  case AArch64::LDURSBWi:
2256  case AArch64::LDURSHXi:
2257  case AArch64::LDURSHWi:
2258  case AArch64::LDURSWi:
2259  case AArch64::STURXi:
2260  case AArch64::STURWi:
2261  case AArch64::STURBi:
2262  case AArch64::STURHi:
2263  case AArch64::STURSi:
2264  case AArch64::STURDi:
2265  case AArch64::STURQi:
2266  case AArch64::STURBBi:
2267  case AArch64::STURHHi:
2268  Scale = 1;
2269  break;
2270  }
2271 
2272  Offset += MI.getOperand(ImmIdx).getImm() * Scale;
2273 
2274  bool useUnscaledOp = false;
2275  // If the offset doesn't match the scale, we rewrite the instruction to
2276  // use the unscaled instruction instead. Likewise, if we have a negative
2277  // offset (and have an unscaled op to use).
2278  if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
2279  useUnscaledOp = true;
2280 
2281  // Use an unscaled addressing mode if the instruction has a negative offset
2282  // (or if the instruction is already using an unscaled addressing mode).
2283  unsigned MaskBits;
2284  if (IsSigned) {
2285  // ldp/stp instructions.
2286  MaskBits = 7;
2287  Offset /= Scale;
2288  } else if (UnscaledOp == 0 || useUnscaledOp) {
2289  MaskBits = 9;
2290  IsSigned = true;
2291  Scale = 1;
2292  } else {
2293  MaskBits = 12;
2294  IsSigned = false;
2295  Offset /= Scale;
2296  }
2297 
2298  // Attempt to fold address computation.
2299  int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
2300  int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
2301  if (Offset >= MinOff && Offset <= MaxOff) {
2302  if (EmittableOffset)
2303  *EmittableOffset = Offset;
2304  Offset = 0;
2305  } else {
2306  int NewOff = Offset < 0 ? MinOff : MaxOff;
2307  if (EmittableOffset)
2308  *EmittableOffset = NewOff;
2309  Offset = (Offset - NewOff) * Scale;
2310  }
2311  if (OutUseUnscaledOp)
2312  *OutUseUnscaledOp = useUnscaledOp;
2313  if (OutUnscaledOp)
2314  *OutUnscaledOp = UnscaledOp;
2316  (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
2317 }
2318 
2319 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2320  unsigned FrameReg, int &Offset,
2321  const AArch64InstrInfo *TII) {
2322  unsigned Opcode = MI.getOpcode();
2323  unsigned ImmIdx = FrameRegIdx + 1;
2324 
2325  if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
2326  Offset += MI.getOperand(ImmIdx).getImm();
2327  emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
2328  MI.getOperand(0).getReg(), FrameReg, Offset, TII,
2329  MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
2330  MI.eraseFromParent();
2331  Offset = 0;
2332  return true;
2333  }
2334 
2335  int NewOffset;
2336  unsigned UnscaledOp;
2337  bool UseUnscaledOp;
2338  int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
2339  &UnscaledOp, &NewOffset);
2340  if (Status & AArch64FrameOffsetCanUpdate) {
2341  if (Status & AArch64FrameOffsetIsLegal)
2342  // Replace the FrameIndex with FrameReg.
2343  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2344  if (UseUnscaledOp)
2345  MI.setDesc(TII->get(UnscaledOp));
2346 
2347  MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
2348  return Offset == 0;
2349  }
2350 
2351  return false;
2352 }
2353 
2355  NopInst.setOpcode(AArch64::HINT);
2356  NopInst.addOperand(MCOperand::createImm(0));
2357 }
2358 /// useMachineCombiner - return true when a target supports MachineCombiner
2360  // AArch64 supports the combiner
2361  return true;
2362 }
2363 //
2364 // True when Opc sets flag
2365 static bool isCombineInstrSettingFlag(unsigned Opc) {
2366  switch (Opc) {
2367  case AArch64::ADDSWrr:
2368  case AArch64::ADDSWri:
2369  case AArch64::ADDSXrr:
2370  case AArch64::ADDSXri:
2371  case AArch64::SUBSWrr:
2372  case AArch64::SUBSXrr:
2373  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
2374  case AArch64::SUBSWri:
2375  case AArch64::SUBSXri:
2376  return true;
2377  default:
2378  break;
2379  }
2380  return false;
2381 }
2382 //
2383 // 32b Opcodes that can be combined with a MUL
2384 static bool isCombineInstrCandidate32(unsigned Opc) {
2385  switch (Opc) {
2386  case AArch64::ADDWrr:
2387  case AArch64::ADDWri:
2388  case AArch64::SUBWrr:
2389  case AArch64::ADDSWrr:
2390  case AArch64::ADDSWri:
2391  case AArch64::SUBSWrr:
2392  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
2393  case AArch64::SUBWri:
2394  case AArch64::SUBSWri:
2395  return true;
2396  default:
2397  break;
2398  }
2399  return false;
2400 }
2401 //
2402 // 64b Opcodes that can be combined with a MUL
2403 static bool isCombineInstrCandidate64(unsigned Opc) {
2404  switch (Opc) {
2405  case AArch64::ADDXrr:
2406  case AArch64::ADDXri:
2407  case AArch64::SUBXrr:
2408  case AArch64::ADDSXrr:
2409  case AArch64::ADDSXri:
2410  case AArch64::SUBSXrr:
2411  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
2412  case AArch64::SUBXri:
2413  case AArch64::SUBSXri:
2414  return true;
2415  default:
2416  break;
2417  }
2418  return false;
2419 }
2420 //
2421 // Opcodes that can be combined with a MUL
2422 static bool isCombineInstrCandidate(unsigned Opc) {
2424 }
2425 
2427  unsigned MulOpc, unsigned ZeroReg) {
2428  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2429  MachineInstr *MI = nullptr;
2430  // We need a virtual register definition.
2432  MI = MRI.getUniqueVRegDef(MO.getReg());
2433  // And it needs to be in the trace (otherwise, it won't have a depth).
2434  if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != MulOpc)
2435  return false;
2436 
2437  assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
2438  MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
2439  MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
2440 
2441  // The third input reg must be zero.
2442  if (MI->getOperand(3).getReg() != ZeroReg)
2443  return false;
2444 
2445  // Must only used by the user we combine with.
2446  if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
2447  return false;
2448 
2449  return true;
2450 }
2451 
2452 /// Return true when there is potentially a faster code sequence
2453 /// for an instruction chain ending in \p Root. All potential patterns are
2454 /// listed
2455 /// in the \p Pattern vector. Pattern should be sorted in priority order since
2456 /// the pattern evaluator stops checking as soon as it finds a faster sequence.
2457 
2459  MachineInstr &Root,
2461  unsigned Opc = Root.getOpcode();
2462  MachineBasicBlock &MBB = *Root.getParent();
2463  bool Found = false;
2464 
2465  if (!isCombineInstrCandidate(Opc))
2466  return 0;
2467  if (isCombineInstrSettingFlag(Opc)) {
2468  int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
2469  // When NZCV is live bail out.
2470  if (Cmp_NZCV == -1)
2471  return 0;
2472  unsigned NewOpc = convertFlagSettingOpcode(&Root);
2473  // When opcode can't change bail out.
2474  // CHECKME: do we miss any cases for opcode conversion?
2475  if (NewOpc == Opc)
2476  return 0;
2477  Opc = NewOpc;
2478  }
2479 
2480  switch (Opc) {
2481  default:
2482  break;
2483  case AArch64::ADDWrr:
2484  assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
2485  "ADDWrr does not have register operands");
2486  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
2487  AArch64::WZR)) {
2489  Found = true;
2490  }
2491  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
2492  AArch64::WZR)) {
2494  Found = true;
2495  }
2496  break;
2497  case AArch64::ADDXrr:
2498  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
2499  AArch64::XZR)) {
2501  Found = true;
2502  }
2503  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
2504  AArch64::XZR)) {
2506  Found = true;
2507  }
2508  break;
2509  case AArch64::SUBWrr:
2510  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
2511  AArch64::WZR)) {
2513  Found = true;
2514  }
2515  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
2516  AArch64::WZR)) {
2518  Found = true;
2519  }
2520  break;
2521  case AArch64::SUBXrr:
2522  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
2523  AArch64::XZR)) {
2525  Found = true;
2526  }
2527  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
2528  AArch64::XZR)) {
2530  Found = true;
2531  }
2532  break;
2533  case AArch64::ADDWri:
2534  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
2535  AArch64::WZR)) {
2537  Found = true;
2538  }
2539  break;
2540  case AArch64::ADDXri:
2541  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
2542  AArch64::XZR)) {
2544  Found = true;
2545  }
2546  break;
2547  case AArch64::SUBWri:
2548  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
2549  AArch64::WZR)) {
2551  Found = true;
2552  }
2553  break;
2554  case AArch64::SUBXri:
2555  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
2556  AArch64::XZR)) {
2558  Found = true;
2559  }
2560  break;
2561  }
2562  return Found;
2563 }
2564 
2565 /// genMadd - Generate madd instruction and combine mul and add.
2566 /// Example:
2567 /// MUL I=A,B,0
2568 /// ADD R,I,C
2569 /// ==> MADD R,A,B,C
2570 /// \param Root is the ADD instruction
2571 /// \param [out] InsInstrs is a vector of machine instructions and will
2572 /// contain the generated madd instruction
2573 /// \param IdxMulOpd is index of operand in Root that is the result of
2574 /// the MUL. In the example above IdxMulOpd is 1.
2575 /// \param MaddOpc the opcode fo the madd instruction
2577  const TargetInstrInfo *TII, MachineInstr &Root,
2579  unsigned IdxMulOpd, unsigned MaddOpc,
2580  const TargetRegisterClass *RC) {
2581  assert(IdxMulOpd == 1 || IdxMulOpd == 2);
2582 
2583  unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
2584  MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
2585  unsigned ResultReg = Root.getOperand(0).getReg();
2586  unsigned SrcReg0 = MUL->getOperand(1).getReg();
2587  bool Src0IsKill = MUL->getOperand(1).isKill();
2588  unsigned SrcReg1 = MUL->getOperand(2).getReg();
2589  bool Src1IsKill = MUL->getOperand(2).isKill();
2590  unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
2591  bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
2592 
2594  MRI.constrainRegClass(ResultReg, RC);
2596  MRI.constrainRegClass(SrcReg0, RC);
2598  MRI.constrainRegClass(SrcReg1, RC);
2600  MRI.constrainRegClass(SrcReg2, RC);
2601 
2602  MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc),
2603  ResultReg)
2604  .addReg(SrcReg0, getKillRegState(Src0IsKill))
2605  .addReg(SrcReg1, getKillRegState(Src1IsKill))
2606  .addReg(SrcReg2, getKillRegState(Src2IsKill));
2607  // Insert the MADD
2608  InsInstrs.push_back(MIB);
2609  return MUL;
2610 }
2611 
2612 /// genMaddR - Generate madd instruction and combine mul and add using
2613 /// an extra virtual register
2614 /// Example - an ADD intermediate needs to be stored in a register:
2615 /// MUL I=A,B,0
2616 /// ADD R,I,Imm
2617 /// ==> ORR V, ZR, Imm
2618 /// ==> MADD R,A,B,V
2619 /// \param Root is the ADD instruction
2620 /// \param [out] InsInstrs is a vector of machine instructions and will
2621 /// contain the generated madd instruction
2622 /// \param IdxMulOpd is index of operand in Root that is the result of
2623 /// the MUL. In the example above IdxMulOpd is 1.
2624 /// \param MaddOpc the opcode fo the madd instruction
2625 /// \param VR is a virtual register that holds the value of an ADD operand
2626 /// (V in the example above).
2628  const TargetInstrInfo *TII, MachineInstr &Root,
2630  unsigned IdxMulOpd, unsigned MaddOpc,
2631  unsigned VR, const TargetRegisterClass *RC) {
2632  assert(IdxMulOpd == 1 || IdxMulOpd == 2);
2633 
2634  MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
2635  unsigned ResultReg = Root.getOperand(0).getReg();
2636  unsigned SrcReg0 = MUL->getOperand(1).getReg();
2637  bool Src0IsKill = MUL->getOperand(1).isKill();
2638  unsigned SrcReg1 = MUL->getOperand(2).getReg();
2639  bool Src1IsKill = MUL->getOperand(2).isKill();
2640 
2642  MRI.constrainRegClass(ResultReg, RC);
2644  MRI.constrainRegClass(SrcReg0, RC);
2646  MRI.constrainRegClass(SrcReg1, RC);
2648  MRI.constrainRegClass(VR, RC);
2649 
2650  MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc),
2651  ResultReg)
2652  .addReg(SrcReg0, getKillRegState(Src0IsKill))
2653  .addReg(SrcReg1, getKillRegState(Src1IsKill))
2654  .addReg(VR);
2655  // Insert the MADD
2656  InsInstrs.push_back(MIB);
2657  return MUL;
2658 }
2659 
2660 /// When getMachineCombinerPatterns() finds potential patterns,
2661 /// this function generates the instructions that could replace the
2662 /// original code sequence
2667  DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
2668  MachineBasicBlock &MBB = *Root.getParent();
2669  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2670  MachineFunction &MF = *MBB.getParent();
2671  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
2672 
2673  MachineInstr *MUL;
2674  const TargetRegisterClass *RC;
2675  unsigned Opc;
2676  switch (Pattern) {
2677  default:
2678  // signal error.
2679  break;
2682  // MUL I=A,B,0
2683  // ADD R,I,C
2684  // ==> MADD R,A,B,C
2685  // --- Create(MADD);
2686  if (Pattern == MachineCombinerPattern::MC_MULADDW_OP1) {
2687  Opc = AArch64::MADDWrrr;
2688  RC = &AArch64::GPR32RegClass;
2689  } else {
2690  Opc = AArch64::MADDXrrr;
2691  RC = &AArch64::GPR64RegClass;
2692  }
2693  MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
2694  break;
2697  // MUL I=A,B,0
2698  // ADD R,C,I
2699  // ==> MADD R,A,B,C
2700  // --- Create(MADD);
2701  if (Pattern == MachineCombinerPattern::MC_MULADDW_OP2) {
2702  Opc = AArch64::MADDWrrr;
2703  RC = &AArch64::GPR32RegClass;
2704  } else {
2705  Opc = AArch64::MADDXrrr;
2706  RC = &AArch64::GPR64RegClass;
2707  }
2708  MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
2709  break;
2712  // MUL I=A,B,0
2713  // ADD R,I,Imm
2714  // ==> ORR V, ZR, Imm
2715  // ==> MADD R,A,B,V
2716  // --- Create(MADD);
2717  const TargetRegisterClass *OrrRC;
2718  unsigned BitSize, OrrOpc, ZeroReg;
2719  if (Pattern == MachineCombinerPattern::MC_MULADDWI_OP1) {
2720  OrrOpc = AArch64::ORRWri;
2721  OrrRC = &AArch64::GPR32spRegClass;
2722  BitSize = 32;
2723  ZeroReg = AArch64::WZR;
2724  Opc = AArch64::MADDWrrr;
2725  RC = &AArch64::GPR32RegClass;
2726  } else {
2727  OrrOpc = AArch64::ORRXri;
2728  OrrRC = &AArch64::GPR64spRegClass;
2729  BitSize = 64;
2730  ZeroReg = AArch64::XZR;
2731  Opc = AArch64::MADDXrrr;
2732  RC = &AArch64::GPR64RegClass;
2733  }
2734  unsigned NewVR = MRI.createVirtualRegister(OrrRC);
2735  uint64_t Imm = Root.getOperand(2).getImm();
2736 
2737  if (Root.getOperand(3).isImm()) {
2738  unsigned Val = Root.getOperand(3).getImm();
2739  Imm = Imm << Val;
2740  }
2741  uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
2742  uint64_t Encoding;
2743  if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
2744  MachineInstrBuilder MIB1 =
2745  BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
2746  .addReg(ZeroReg)
2747  .addImm(Encoding);
2748  InsInstrs.push_back(MIB1);
2749  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
2750  MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
2751  }
2752  break;
2753  }
2756  // MUL I=A,B,0
2757  // SUB R,I, C
2758  // ==> SUB V, 0, C
2759  // ==> MADD R,A,B,V // = -C + A*B
2760  // --- Create(MADD);
2761  const TargetRegisterClass *SubRC;
2762  unsigned SubOpc, ZeroReg;
2763  if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP1) {
2764  SubOpc = AArch64::SUBWrr;
2765  SubRC = &AArch64::GPR32spRegClass;
2766  ZeroReg = AArch64::WZR;
2767  Opc = AArch64::MADDWrrr;
2768  RC = &AArch64::GPR32RegClass;
2769  } else {
2770  SubOpc = AArch64::SUBXrr;
2771  SubRC = &AArch64::GPR64spRegClass;
2772  ZeroReg = AArch64::XZR;
2773  Opc = AArch64::MADDXrrr;
2774  RC = &AArch64::GPR64RegClass;
2775  }
2776  unsigned NewVR = MRI.createVirtualRegister(SubRC);
2777  // SUB NewVR, 0, C
2778  MachineInstrBuilder MIB1 =
2779  BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
2780  .addReg(ZeroReg)
2781  .addOperand(Root.getOperand(2));
2782  InsInstrs.push_back(MIB1);
2783  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
2784  MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
2785  break;
2786  }
2789  // MUL I=A,B,0
2790  // SUB R,C,I
2791  // ==> MSUB R,A,B,C (computes C - A*B)
2792  // --- Create(MSUB);
2793  if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP2) {
2794  Opc = AArch64::MSUBWrrr;
2795  RC = &AArch64::GPR32RegClass;
2796  } else {
2797  Opc = AArch64::MSUBXrrr;
2798  RC = &AArch64::GPR64RegClass;
2799  }
2800  MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
2801  break;
2804  // MUL I=A,B,0
2805  // SUB R,I, Imm
2806  // ==> ORR V, ZR, -Imm
2807  // ==> MADD R,A,B,V // = -Imm + A*B
2808  // --- Create(MADD);
2809  const TargetRegisterClass *OrrRC;
2810  unsigned BitSize, OrrOpc, ZeroReg;
2811  if (Pattern == MachineCombinerPattern::MC_MULSUBWI_OP1) {
2812  OrrOpc = AArch64::ORRWri;
2813  OrrRC = &AArch64::GPR32spRegClass;
2814  BitSize = 32;
2815  ZeroReg = AArch64::WZR;
2816  Opc = AArch64::MADDWrrr;
2817  RC = &AArch64::GPR32RegClass;
2818  } else {
2819  OrrOpc = AArch64::ORRXri;
2820  OrrRC = &AArch64::GPR64spRegClass;
2821  BitSize = 64;
2822  ZeroReg = AArch64::XZR;
2823  Opc = AArch64::MADDXrrr;
2824  RC = &AArch64::GPR64RegClass;
2825  }
2826  unsigned NewVR = MRI.createVirtualRegister(OrrRC);
2827  int Imm = Root.getOperand(2).getImm();
2828  if (Root.getOperand(3).isImm()) {
2829  unsigned Val = Root.getOperand(3).getImm();
2830  Imm = Imm << Val;
2831  }
2832  uint64_t UImm = -Imm << (64 - BitSize) >> (64 - BitSize);
2833  uint64_t Encoding;
2834  if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
2835  MachineInstrBuilder MIB1 =
2836  BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
2837  .addReg(ZeroReg)
2838  .addImm(Encoding);
2839  InsInstrs.push_back(MIB1);
2840  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
2841  MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
2842  }
2843  break;
2844  }
2845  } // end switch (Pattern)
2846  // Record MUL and ADD/SUB for deletion
2847  DelInstrs.push_back(MUL);
2848  DelInstrs.push_back(&Root);
2849 
2850  return;
2851 }
2852 
2853 /// \brief Replace csincr-branch sequence by simple conditional branch
2854 ///
2855 /// Examples:
2856 /// 1.
2857 /// csinc w9, wzr, wzr, <condition code>
2858 /// tbnz w9, #0, 0x44
2859 /// to
2860 /// b.<inverted condition code>
2861 ///
2862 /// 2.
2863 /// csinc w9, wzr, wzr, <condition code>
2864 /// tbz w9, #0, 0x44
2865 /// to
2866 /// b.<condition code>
2867 ///
2868 /// \param MI Conditional Branch
2869 /// \return True when the simple conditional branch is generated
2870 ///
2872  bool IsNegativeBranch = false;
2873  bool IsTestAndBranch = false;
2874  unsigned TargetBBInMI = 0;
2875  switch (MI->getOpcode()) {
2876  default:
2877  llvm_unreachable("Unknown branch instruction?");
2878  case AArch64::Bcc:
2879  return false;
2880  case AArch64::CBZW:
2881  case AArch64::CBZX:
2882  TargetBBInMI = 1;
2883  break;
2884  case AArch64::CBNZW:
2885  case AArch64::CBNZX:
2886  TargetBBInMI = 1;
2887  IsNegativeBranch = true;
2888  break;
2889  case AArch64::TBZW:
2890  case AArch64::TBZX:
2891  TargetBBInMI = 2;
2892  IsTestAndBranch = true;
2893  break;
2894  case AArch64::TBNZW:
2895  case AArch64::TBNZX:
2896  TargetBBInMI = 2;
2897  IsNegativeBranch = true;
2898  IsTestAndBranch = true;
2899  break;
2900  }
2901  // So we increment a zero register and test for bits other
2902  // than bit 0? Conservatively bail out in case the verifier
2903  // missed this case.
2904  if (IsTestAndBranch && MI->getOperand(1).getImm())
2905  return false;
2906 
2907  // Find Definition.
2908  assert(MI->getParent() && "Incomplete machine instruciton\n");
2909  MachineBasicBlock *MBB = MI->getParent();
2910  MachineFunction *MF = MBB->getParent();
2911  MachineRegisterInfo *MRI = &MF->getRegInfo();
2912  unsigned VReg = MI->getOperand(0).getReg();
2914  return false;
2915 
2916  MachineInstr *DefMI = MRI->getVRegDef(VReg);
2917 
2918  // Look for CSINC
2919  if (!(DefMI->getOpcode() == AArch64::CSINCWr &&
2920  DefMI->getOperand(1).getReg() == AArch64::WZR &&
2921  DefMI->getOperand(2).getReg() == AArch64::WZR) &&
2922  !(DefMI->getOpcode() == AArch64::CSINCXr &&
2923  DefMI->getOperand(1).getReg() == AArch64::XZR &&
2924  DefMI->getOperand(2).getReg() == AArch64::XZR))
2925  return false;
2926 
2927  if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
2928  return false;
2929 
2930  AArch64CC::CondCode CC =
2931  (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
2932  bool CheckOnlyCCWrites = true;
2933  // Convert only when the condition code is not modified between
2934  // the CSINC and the branch. The CC may be used by other
2935  // instructions in between.
2936  if (modifiesConditionCode(DefMI, MI, CheckOnlyCCWrites, &getRegisterInfo()))
2937  return false;
2938  MachineBasicBlock &RefToMBB = *MBB;
2939  MachineBasicBlock *TBB = MI->getOperand(TargetBBInMI).getMBB();
2940  DebugLoc DL = MI->getDebugLoc();
2941  if (IsNegativeBranch)
2943  BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
2944  MI->eraseFromParent();
2945  return true;
2946 }
bool isFullCopy() const
Definition: MachineInstr.h:781
bool hasExtendedReg(const MachineInstr *MI) const
Returns true if there is an extendable register and that the extending value is non-zero.
unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const override
const MachineInstrBuilder & addMetadata(const MDNode *MD) const
The memory access reads data.
const MachineFunction * getParent() const
getParent - Return the MachineFunction containing this basic block.
bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction supplying the argument to the comparison into one that...
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address...
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
The memory access writes data.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex) const override
static void Found()
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
bool getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, unsigned &Offset, const TargetRegisterInfo *TRI) const override
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
MachineBasicBlock * getMBB() const
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool hasSubClassEq(const TargetRegisterClass *RC) const
hasSubClassEq - Returns true if RC is a sub-class of or equal to this class.
static CondCode getInvertedCondCode(CondCode Code)
bool hasZeroCycleRegMove() const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:138
static bool isVirtualRegister(unsigned Reg)
isVirtualRegister - Return true if the specified register number is in the virtual register namespace...
static bool processLogicalImmediate(uint64_t Imm, unsigned RegSize, uint64_t &Encoding)
processLogicalImmediate - Determine if an immediate value can be encoded as the immediate operand of ...
bool hasZeroCycleZeroing() const
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
Offset can apply, at least partly.
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:264
const char * getSymbolName() const
A debug info location.
Definition: DebugLoc.h:34
Metadata node.
Definition: Metadata.h:740
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address...
void getNoopForMachoTarget(MCInst &NopInst) const override
bool ReverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
iterator_range< mmo_iterator > memoperands()
Definition: MachineInstr.h:344
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
getMachineMemOperand - Allocate a new MachineMemOperand.
iterator_range< succ_iterator > successors()
bool optimizeCondBranch(MachineInstr *MI) const override
Replace csincr-branch sequence by simple conditional branch.
bool isLdStPairSuppressed(const MachineInstr *MI) const
Return true if pairing the given load or store is hinted to be unprofitable.
bool getMemOpBaseRegImmOfsWidth(MachineInstr *LdSt, unsigned &BaseReg, int &Offset, int &Width, const TargetRegisterInfo *TRI) const
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address...
static bool getMemDoShift(unsigned Imm)
getMemDoShift - Extract the "do shift" flag value for load/store instructions.
static MachineInstr * genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, const TargetRegisterClass *RC)
genMaddR - Generate madd instruction and combine mul and add using an extra virtual register Example ...
COPY - Target-independent register copy.
Definition: TargetOpcodes.h:86
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:169
unsigned getSize() const
getSize - Return the size of the register in bytes, which is also the size of a stack slot allocated ...
MachineMemOperand - A description of a memory reference used in the backend.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
const HexagonInstrInfo * TII
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APInt.h:33
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const TargetRegisterClass * getRegClass(unsigned Reg) const
getRegClass - Return the register class of the specified virtual register.
bool useMachineCombiner() const override
useMachineCombiner - AArch64 supports MachineCombiner
Reg
All possible values of the reg field in the ModR/M byte.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
static const PseudoSourceValue * getFixedStack(int FI)
A pseudo source value referencing a fixed stack frame entry, e.g., a spill slot.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:317
const MachineInstrBuilder & addImm(int64_t Val) const
addImm - Add a new immediate operand.
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:571
static bool isCombineInstrCandidate64(unsigned Opc)
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:271
unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, DebugLoc DL) const override
int isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
void RemoveOperand(unsigned i)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
AArch64InstrInfo(const AArch64Subtarget &STI)
bool isKill() const
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, unsigned DstReg, ArrayRef< MachineOperand > Cond, unsigned TrueReg, unsigned FalseReg) const override
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
MachineInstr * emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var, const MDNode *Expr, DebugLoc DL) const
static AArch64_AM::ShiftExtendType getMemExtendType(unsigned Imm)
getExtendType - Extract the extend type for the offset operand of loads/stores.
bool isAsCheapAsAMove(const MachineInstr *MI) const override
unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
getMatchingSuperReg - Return a super-register of the specified register Reg so its sub-register of in...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: ArrayRef.h:31
iterator getLastNonDebugInstr()
getLastNonDebugInstr - returns an iterator to the last non-debug instruction in the basic block...
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, unsigned, unsigned, int &, int &, int &) const override
int64_t getImm() const
bool hasShiftedReg(const MachineInstr *MI) const
Returns true if there is a shiftable register and that the shift value is non-zero.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:134
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:150
unsigned getKillRegState(bool B)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:267
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:97
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:120
TargetInstrInfo - Interface to description of machine instruction set.
static bool isCondBranchOpcode(int Opc)
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:58
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern::MC_PATTERN > &Patterns) const override
Return true when there is potentially a faster code sequence for an instruction chain ending in <Root...
DBG_VALUE - a mapping of the llvm.dbg.value intrinsic.
Definition: TargetOpcodes.h:69
unsigned getDefRegState(bool B)
IMPLICIT_DEF - This is the MachineInstr-level equivalent of undef.
Definition: TargetOpcodes.h:52
bundle_iterator< MachineInstr, instr_iterator > iterator
unsigned RemoveBranch(MachineBasicBlock &MBB) const override
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address...
static MachineInstr * genMadd(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genMadd - Generate madd instruction and combine mul and add.
static bool modifiesConditionCode(MachineInstr *From, MachineInstr *To, const bool CheckOnlyCCWrites, const TargetRegisterInfo *TRI)
True when condition code could be modified on the instruction trace starting at from and ending at to...
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, int Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
bool isFPRCopy(const MachineInstr *MI) const
Does this instruction rename an FPR without modifying bits?
static bool UpdateOperandRegClass(MachineInstr *Instr)
bool isAsCheapAsAMove(QueryType Type=AllInBundle) const
Returns true if this instruction has the same cost (or less) than a move instruction.
Definition: MachineInstr.h:665
int findRegisterDefOperandIdx(unsigned Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a def of the specified register or -1 if it is not found...
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:273
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
unsigned getSubReg(unsigned Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo...
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
This pseudo-instruction loads the stack guard value.
bool isCopy() const
Definition: MachineInstr.h:778
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
bool isGPRZero(const MachineInstr *MI) const
Does this instruction set its full destination register to zero?
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:129
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
BuildMI - Builder interface.
unsigned getSubReg() const
bool shouldScheduleAdjacent(MachineInstr *First, MachineInstr *Second) const override
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
MachinePointerInfo - This class contains a discriminated union of information about pointers in memor...
bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned &SubIdx) const override
unsigned char ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
bool definesRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr fully defines the specified register.
Definition: MachineInstr.h:866
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
static unsigned getRegClass(bool IsVgpr, unsigned RegWidth)
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:178
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewVReg=nullptr)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
bool readsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
Definition: MachineInstr.h:836
bool shouldClusterLoads(MachineInstr *FirstLdSt, MachineInstr *SecondLdSt, unsigned NumLoads) const override
Detect opportunities for ldp/stp formation.
bool memoperands_empty() const
Definition: MachineInstr.h:342
static bool isIndirectBranchOpcode(int Opc)
void suppressLdStPair(MachineInstr *MI) const
Hint that pairing the given load or store is unprofitable.
unsigned GetInstSizeInBytes(const MachineInstr *MI) const
GetInstSize - Return the number of bytes of code the specified instruction may be.
void setOpcode(unsigned Op)
Definition: MCInst.h:158
static bool isUncondBranchOpcode(int Opc)
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
MachineOperand class - Representation of each machine instruction operand.
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
static bool isCombineInstrCandidate(unsigned Opc)
MachineFrameInfo * getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
const MachineInstrBuilder & addFrameIndex(int Idx) const
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef< unsigned > Indices) const
static cl::opt< AlignMode > Align(cl::desc("Load/store alignment support"), cl::Hidden, cl::init(NoStrictAlign), cl::values(clEnumValN(StrictAlign,"aarch64-strict-align","Disallow all unaligned memory accesses"), clEnumValN(NoStrictAlign,"aarch64-no-strict-align","Allow unaligned memory accesses"), clEnumValEnd))
void addRegisterDefined(unsigned Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
static bool clobbersPhysReg(const uint32_t *RegMask, unsigned PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
Target - Wrapper for Target specific information.
const TargetRegisterClass * getRegClassConstraint(unsigned OpIdx, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
Compute the static register class constraint for operand OpIdx.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:238
KILL - This instruction is a noop that is used only to adjust the liveness of registers.
Definition: TargetOpcodes.h:35
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:51
static bool isPhysicalRegister(unsigned Reg)
isPhysicalRegister - Return true if the specified register number is in the physical register namespa...
bool isLiveIn(unsigned Reg) const
isLiveIn - Return true if the specified register is in the live in set.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:589
bool isGPRCopy(const MachineInstr *MI) const
Does this instruction rename a GPR without modifying bits?
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
bool hasOneNonDBGUse(unsigned RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug instruction using the specified regis...
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
static MachineOperand CreateImm(int64_t Val)
#define I(x, y, z)
Definition: MD5.cpp:54
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page...
void clearKillFlags(unsigned Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg)
static bool isCombineInstrCandidate32(unsigned Opc)
unsigned getReg() const
getReg - Returns the register number.
virtual const TargetInstrInfo * getInstrInfo() const
void genAlternativeCodeSequence(MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
bool areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr *MIb, AliasAnalysis *AA=nullptr) const override
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:185
static unsigned convertFlagSettingOpcode(const MachineInstr *MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode...
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
Primary interface to the complete machine description for the target machine.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const override
void addOperand(const MCOperand &Op)
Definition: MCInst.h:168
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow...
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2, int &CmpMask, int &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2...
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override
static bool isCombineInstrSettingFlag(unsigned Opc)
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
addReg - Add a new virtual register operand...
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:117
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
Definition: MachineInstr.h:874
bool isScaledAddr(const MachineInstr *MI) const
Return true if this is load/store scales or extends its register offset.
bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:340
bool contains(unsigned Reg) const
contains - Return true if the specified register is included in this register class.