LLVM  3.7.0
ARMFrameLowering.cpp
Go to the documentation of this file.
1 //===-- ARMFrameLowering.cpp - ARM Frame Information ----------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the ARM implementation of TargetFrameLowering class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMFrameLowering.h"
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMBaseRegisterInfo.h"
17 #include "ARMConstantPoolValue.h"
18 #include "ARMMachineFunctionInfo.h"
26 #include "llvm/IR/CallingConv.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/MC/MCContext.h"
31 
32 using namespace llvm;
33 
34 static cl::opt<bool>
35 SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
36  cl::desc("Align ARM NEON spills in prolog and epilog"));
37 
40  unsigned NumAlignedDPRCS2Regs);
41 
43  : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
44  STI(sti) {}
45 
47  // iOS always has a FP for backtracking, force other targets to keep their FP
48  // when doing FastISel. The emitted code is currently superior, and in cases
49  // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
51  MF.getSubtarget<ARMSubtarget>().useFastISel();
52 }
53 
54 /// hasFP - Return true if the specified function should have a dedicated frame
55 /// pointer register. This is true if the function has variable sized allocas
56 /// or if frame pointer elimination is disabled.
58  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
59 
60  // iOS requires FP not to be clobbered for backtracing purpose.
61  if (STI.isTargetIOS())
62  return true;
63 
64  const MachineFrameInfo *MFI = MF.getFrameInfo();
65  // Always eliminate non-leaf frame pointers.
66  return ((MF.getTarget().Options.DisableFramePointerElim(MF) &&
67  MFI->hasCalls()) ||
68  RegInfo->needsStackRealignment(MF) ||
69  MFI->hasVarSizedObjects() ||
70  MFI->isFrameAddressTaken());
71 }
72 
73 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
74 /// not required, we reserve argument space for call sites in the function
75 /// immediately on entry to the current function. This eliminates the need for
76 /// add/sub sp brackets around call sites. Returns true if the call frame is
77 /// included as part of the stack frame.
79  const MachineFrameInfo *FFI = MF.getFrameInfo();
80  unsigned CFSize = FFI->getMaxCallFrameSize();
81  // It's not always a good idea to include the call frame as part of the
82  // stack frame. ARM (especially Thumb) has small immediate offset to
83  // address the stack frame. So a large call frame can cause poor codegen
84  // and may even makes it impossible to scavenge a register.
85  if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
86  return false;
87 
88  return !MF.getFrameInfo()->hasVarSizedObjects();
89 }
90 
91 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the
92 /// call frame pseudos can be simplified. Unlike most targets, having a FP
93 /// is not sufficient here since we still may reference some objects via SP
94 /// even when FP is available in Thumb2 mode.
95 bool
98 }
99 
101  const ARMBaseInstrInfo &TII,
102  const MCPhysReg *CSRegs) {
103  // Integer spill area is handled with "pop".
104  if (isPopOpcode(MI->getOpcode())) {
105  // The first two operands are predicates. The last two are
106  // imp-def and imp-use of SP. Check everything in between.
107  for (int i = 5, e = MI->getNumOperands(); i != e; ++i)
108  if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
109  return false;
110  return true;
111  }
112  if ((MI->getOpcode() == ARM::LDR_POST_IMM ||
113  MI->getOpcode() == ARM::LDR_POST_REG ||
114  MI->getOpcode() == ARM::t2LDR_POST) &&
115  isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) &&
116  MI->getOperand(1).getReg() == ARM::SP)
117  return true;
118 
119  return false;
120 }
121 
122 static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB,
124  const ARMBaseInstrInfo &TII, unsigned DestReg,
125  unsigned SrcReg, int NumBytes,
126  unsigned MIFlags = MachineInstr::NoFlags,
128  unsigned PredReg = 0) {
129  if (isARM)
130  emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
131  Pred, PredReg, TII, MIFlags);
132  else
133  emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
134  Pred, PredReg, TII, MIFlags);
135 }
136 
137 static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
139  const ARMBaseInstrInfo &TII, int NumBytes,
140  unsigned MIFlags = MachineInstr::NoFlags,
142  unsigned PredReg = 0) {
143  emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
144  MIFlags, Pred, PredReg);
145 }
146 
147 static int sizeOfSPAdjustment(const MachineInstr *MI) {
148  int RegSize;
149  switch (MI->getOpcode()) {
150  case ARM::VSTMDDB_UPD:
151  RegSize = 8;
152  break;
153  case ARM::STMDB_UPD:
154  case ARM::t2STMDB_UPD:
155  RegSize = 4;
156  break;
157  case ARM::t2STR_PRE:
158  case ARM::STR_PRE_IMM:
159  return 4;
160  default:
161  llvm_unreachable("Unknown push or pop like instruction");
162  }
163 
164  int count = 0;
165  // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
166  // pred) so the list starts at 4.
167  for (int i = MI->getNumOperands() - 1; i >= 4; --i)
168  count += RegSize;
169  return count;
170 }
171 
173  size_t StackSizeInBytes) {
174  const MachineFrameInfo *MFI = MF.getFrameInfo();
175  const Function *F = MF.getFunction();
176  unsigned StackProbeSize = (MFI->getStackProtectorIndex() > 0) ? 4080 : 4096;
177  if (F->hasFnAttribute("stack-probe-size"))
178  F->getFnAttribute("stack-probe-size")
180  .getAsInteger(0, StackProbeSize);
181  return StackSizeInBytes >= StackProbeSize;
182 }
183 
184 namespace {
185 struct StackAdjustingInsts {
186  struct InstInfo {
188  unsigned SPAdjust;
189  bool BeforeFPSet;
190  };
191 
193 
194  void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
195  bool BeforeFPSet = false) {
196  InstInfo Info = {I, SPAdjust, BeforeFPSet};
197  Insts.push_back(Info);
198  }
199 
200  void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
201  auto Info = std::find_if(Insts.begin(), Insts.end(),
202  [&](InstInfo &Info) { return Info.I == I; });
203  assert(Info != Insts.end() && "invalid sp adjusting instruction");
204  Info->SPAdjust += ExtraBytes;
205  }
206 
207  void emitDefCFAOffsets(MachineModuleInfo &MMI, MachineBasicBlock &MBB,
208  DebugLoc dl, const ARMBaseInstrInfo &TII, bool HasFP) {
209  unsigned CFAOffset = 0;
210  for (auto &Info : Insts) {
211  if (HasFP && !Info.BeforeFPSet)
212  return;
213 
214  CFAOffset -= Info.SPAdjust;
215  unsigned CFIIndex = MMI.addFrameInst(
216  MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
217  BuildMI(MBB, std::next(Info.I), dl,
219  .addCFIIndex(CFIIndex)
221  }
222  }
223 };
224 }
225 
226 /// Emit an instruction sequence that will align the address in
227 /// register Reg by zero-ing out the lower bits. For versions of the
228 /// architecture that support Neon, this must be done in a single
229 /// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
230 /// single instruction. That function only gets called when optimizing
231 /// spilling of D registers on a core with the Neon instruction set
232 /// present.
234  const TargetInstrInfo &TII,
235  MachineBasicBlock &MBB,
237  DebugLoc DL, const unsigned Reg,
238  const unsigned Alignment,
239  const bool MustBeSingleInstruction) {
240  const ARMSubtarget &AST =
241  static_cast<const ARMSubtarget &>(MF.getSubtarget());
242  const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
243  const unsigned AlignMask = Alignment - 1;
244  const unsigned NrBitsToZero = countTrailingZeros(Alignment);
245  assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
246  if (!AFI->isThumbFunction()) {
247  // if the BFC instruction is available, use that to zero the lower
248  // bits:
249  // bfc Reg, #0, log2(Alignment)
250  // otherwise use BIC, if the mask to zero the required number of bits
251  // can be encoded in the bic immediate field
252  // bic Reg, Reg, Alignment-1
253  // otherwise, emit
254  // lsr Reg, Reg, log2(Alignment)
255  // lsl Reg, Reg, log2(Alignment)
256  if (CanUseBFC) {
257  AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
258  .addReg(Reg, RegState::Kill)
259  .addImm(~AlignMask));
260  } else if (AlignMask <= 255) {
261  AddDefaultCC(
262  AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
263  .addReg(Reg, RegState::Kill)
264  .addImm(AlignMask)));
265  } else {
266  assert(!MustBeSingleInstruction &&
267  "Shouldn't call emitAligningInstructions demanding a single "
268  "instruction to be emitted for large stack alignment for a target "
269  "without BFC.");
271  BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
272  .addReg(Reg, RegState::Kill)
273  .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))));
275  BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
276  .addReg(Reg, RegState::Kill)
277  .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))));
278  }
279  } else {
280  // Since this is only reached for Thumb-2 targets, the BFC instruction
281  // should always be available.
282  assert(CanUseBFC);
283  AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
284  .addReg(Reg, RegState::Kill)
285  .addImm(~AlignMask));
286  }
287 }
288 
290  MachineBasicBlock &MBB) const {
291  assert(&MBB == &MF.front() && "Shrink-wrapping not yet implemented");
292  MachineBasicBlock::iterator MBBI = MBB.begin();
293  MachineFrameInfo *MFI = MF.getFrameInfo();
295  MachineModuleInfo &MMI = MF.getMMI();
296  MCContext &Context = MMI.getContext();
297  const TargetMachine &TM = MF.getTarget();
298  const MCRegisterInfo *MRI = Context.getRegisterInfo();
299  const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
300  const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
301  assert(!AFI->isThumb1OnlyFunction() &&
302  "This emitPrologue does not support Thumb1!");
303  bool isARM = !AFI->isThumbFunction();
305  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
306  unsigned NumBytes = MFI->getStackSize();
307  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
308  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
309  unsigned FramePtr = RegInfo->getFrameRegister(MF);
310 
311  // Determine the sizes of each callee-save spill areas and record which frame
312  // belongs to which callee-save spill areas.
313  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
314  int FramePtrSpillFI = 0;
315  int D8SpillFI = 0;
316 
317  // All calls are tail calls in GHC calling conv, and functions have no
318  // prologue/epilogue.
320  return;
321 
322  StackAdjustingInsts DefCFAOffsetCandidates;
323  bool HasFP = hasFP(MF);
324 
325  // Allocate the vararg register save area.
326  if (ArgRegsSaveSize) {
327  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
329  DefCFAOffsetCandidates.addInst(std::prev(MBBI), ArgRegsSaveSize, true);
330  }
331 
332  if (!AFI->hasStackFrame() &&
333  (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
334  if (NumBytes - ArgRegsSaveSize != 0) {
335  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize),
337  DefCFAOffsetCandidates.addInst(std::prev(MBBI),
338  NumBytes - ArgRegsSaveSize, true);
339  }
340  DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP);
341  return;
342  }
343 
344  // Determine spill area sizes.
345  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
346  unsigned Reg = CSI[i].getReg();
347  int FI = CSI[i].getFrameIdx();
348  switch (Reg) {
349  case ARM::R8:
350  case ARM::R9:
351  case ARM::R10:
352  case ARM::R11:
353  case ARM::R12:
354  if (STI.isTargetDarwin()) {
355  GPRCS2Size += 4;
356  break;
357  }
358  // fallthrough
359  case ARM::R0:
360  case ARM::R1:
361  case ARM::R2:
362  case ARM::R3:
363  case ARM::R4:
364  case ARM::R5:
365  case ARM::R6:
366  case ARM::R7:
367  case ARM::LR:
368  if (Reg == FramePtr)
369  FramePtrSpillFI = FI;
370  GPRCS1Size += 4;
371  break;
372  default:
373  // This is a DPR. Exclude the aligned DPRCS2 spills.
374  if (Reg == ARM::D8)
375  D8SpillFI = FI;
376  if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
377  DPRCSSize += 8;
378  }
379  }
380 
381  // Move past area 1.
382  MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
383  if (GPRCS1Size > 0) {
384  GPRCS1Push = LastPush = MBBI++;
385  DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
386  }
387 
388  // Determine starting offsets of spill areas.
389  unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size;
390  unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
391  unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U;
392  unsigned DPRGapSize = (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign;
393  unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
394  int FramePtrOffsetInPush = 0;
395  if (HasFP) {
396  FramePtrOffsetInPush =
397  MFI->getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize;
398  AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
399  NumBytes);
400  }
401  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
402  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
403  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
404 
405  // Move past area 2.
406  if (GPRCS2Size > 0) {
407  GPRCS2Push = LastPush = MBBI++;
408  DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
409  }
410 
411  // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
412  // .cfi_offset operations will reflect that.
413  if (DPRGapSize) {
414  assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
415  if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, DPRGapSize))
416  DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
417  else {
418  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
420  DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
421  }
422  }
423 
424  // Move past area 3.
425  if (DPRCSSize > 0) {
426  // Since vpush register list cannot have gaps, there may be multiple vpush
427  // instructions in the prologue.
428  while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
429  DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(MBBI));
430  LastPush = MBBI++;
431  }
432  }
433 
434  // Move past the aligned DPRCS2 area.
435  if (AFI->getNumAlignedDPRCS2Regs() > 0) {
437  // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
438  // leaves the stack pointer pointing to the DPRCS2 area.
439  //
440  // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
441  NumBytes += MFI->getObjectOffset(D8SpillFI);
442  } else
443  NumBytes = DPRCSOffset;
444 
445  if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
446  uint32_t NumWords = NumBytes >> 2;
447 
448  if (NumWords < 65536)
449  AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
450  .addImm(NumWords)
452  else
453  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4)
454  .addImm(NumWords)
456 
457  switch (TM.getCodeModel()) {
458  case CodeModel::Small:
459  case CodeModel::Medium:
460  case CodeModel::Default:
461  case CodeModel::Kernel:
462  BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
463  .addImm((unsigned)ARMCC::AL).addReg(0)
464  .addExternalSymbol("__chkstk")
467  break;
468  case CodeModel::Large:
470  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
471  .addExternalSymbol("__chkstk")
473 
474  BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
475  .addImm((unsigned)ARMCC::AL).addReg(0)
476  .addReg(ARM::R12, RegState::Kill)
479  break;
480  }
481 
482  AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr),
483  ARM::SP)
484  .addReg(ARM::SP, RegState::Define)
487  NumBytes = 0;
488  }
489 
490  if (NumBytes) {
491  // Adjust SP after all the callee-save spills.
492  if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes))
493  DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
494  else {
495  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
497  DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
498  }
499 
500  if (HasFP && isARM)
501  // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
502  // Note it's not safe to do this in Thumb2 mode because it would have
503  // taken two instructions:
504  // mov sp, r7
505  // sub sp, #24
506  // If an interrupt is taken between the two instructions, then sp is in
507  // an inconsistent state (pointing to the middle of callee-saved area).
508  // The interrupt handler can end up clobbering the registers.
509  AFI->setShouldRestoreSPFromFP(true);
510  }
511 
512  // Set FP to point to the stack slot that contains the previous FP.
513  // For iOS, FP is R7, which has now been stored in spill area 1.
514  // Otherwise, if this is not iOS, all the callee-saved registers go
515  // into spill area 1, including the FP in R11. In either case, it
516  // is in area one and the adjustment needs to take place just after
517  // that push.
518  if (HasFP) {
519  MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push);
520  unsigned PushSize = sizeOfSPAdjustment(GPRCS1Push);
521  emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush,
522  dl, TII, FramePtr, ARM::SP,
523  PushSize + FramePtrOffsetInPush,
525  if (FramePtrOffsetInPush + PushSize != 0) {
526  unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa(
527  nullptr, MRI->getDwarfRegNum(FramePtr, true),
528  -(ArgRegsSaveSize - FramePtrOffsetInPush)));
529  BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
530  .addCFIIndex(CFIIndex)
532  } else {
533  unsigned CFIIndex =
535  nullptr, MRI->getDwarfRegNum(FramePtr, true)));
536  BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
537  .addCFIIndex(CFIIndex)
539  }
540  }
541 
542  // Now that the prologue's actual instructions are finalised, we can insert
543  // the necessary DWARF cf instructions to describe the situation. Start by
544  // recording where each register ended up:
545  if (GPRCS1Size > 0) {
546  MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
547  int CFIIndex;
548  for (const auto &Entry : CSI) {
549  unsigned Reg = Entry.getReg();
550  int FI = Entry.getFrameIdx();
551  switch (Reg) {
552  case ARM::R8:
553  case ARM::R9:
554  case ARM::R10:
555  case ARM::R11:
556  case ARM::R12:
557  if (STI.isTargetDarwin())
558  break;
559  // fallthrough
560  case ARM::R0:
561  case ARM::R1:
562  case ARM::R2:
563  case ARM::R3:
564  case ARM::R4:
565  case ARM::R5:
566  case ARM::R6:
567  case ARM::R7:
568  case ARM::LR:
569  CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
570  nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI)));
571  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
572  .addCFIIndex(CFIIndex)
574  break;
575  }
576  }
577  }
578 
579  if (GPRCS2Size > 0) {
580  MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
581  for (const auto &Entry : CSI) {
582  unsigned Reg = Entry.getReg();
583  int FI = Entry.getFrameIdx();
584  switch (Reg) {
585  case ARM::R8:
586  case ARM::R9:
587  case ARM::R10:
588  case ARM::R11:
589  case ARM::R12:
590  if (STI.isTargetDarwin()) {
591  unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
592  unsigned Offset = MFI->getObjectOffset(FI);
593  unsigned CFIIndex = MMI.addFrameInst(
594  MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
595  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
596  .addCFIIndex(CFIIndex)
598  }
599  break;
600  }
601  }
602  }
603 
604  if (DPRCSSize > 0) {
605  // Since vpush register list cannot have gaps, there may be multiple vpush
606  // instructions in the prologue.
607  MachineBasicBlock::iterator Pos = std::next(LastPush);
608  for (const auto &Entry : CSI) {
609  unsigned Reg = Entry.getReg();
610  int FI = Entry.getFrameIdx();
611  if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
612  (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
613  unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
614  unsigned Offset = MFI->getObjectOffset(FI);
615  unsigned CFIIndex = MMI.addFrameInst(
616  MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
617  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
618  .addCFIIndex(CFIIndex)
620  }
621  }
622  }
623 
624  // Now we can emit descriptions of where the canonical frame address was
625  // throughout the process. If we have a frame pointer, it takes over the job
626  // half-way through, so only the first few .cfi_def_cfa_offset instructions
627  // actually get emitted.
628  DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP);
629 
630  if (STI.isTargetELF() && hasFP(MF))
632  AFI->getFramePtrSpillOffset());
633 
634  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
635  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
636  AFI->setDPRCalleeSavedGapSize(DPRGapSize);
637  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
638 
639  // If we need dynamic stack realignment, do it here. Be paranoid and make
640  // sure if we also have VLAs, we have a base pointer for frame access.
641  // If aligned NEON registers were spilled, the stack has already been
642  // realigned.
643  if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) {
644  unsigned MaxAlign = MFI->getMaxAlignment();
645  assert(!AFI->isThumb1OnlyFunction());
646  if (!AFI->isThumbFunction()) {
647  emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
648  false);
649  } else {
650  // We cannot use sp as source/dest register here, thus we're using r4 to
651  // perform the calculations. We're emitting the following sequence:
652  // mov r4, sp
653  // -- use emitAligningInstructions to produce best sequence to zero
654  // -- out lower bits in r4
655  // mov sp, r4
656  // FIXME: It will be better just to find spare register here.
657  AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
658  .addReg(ARM::SP, RegState::Kill));
659  emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
660  false);
661  AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
662  .addReg(ARM::R4, RegState::Kill));
663  }
664 
665  AFI->setShouldRestoreSPFromFP(true);
666  }
667 
668  // If we need a base pointer, set it up here. It's whatever the value
669  // of the stack pointer is at this point. Any variable size objects
670  // will be allocated after this, so we can still use the base pointer
671  // to reference locals.
672  // FIXME: Clarify FrameSetup flags here.
673  if (RegInfo->hasBasePointer(MF)) {
674  if (isARM)
675  BuildMI(MBB, MBBI, dl,
676  TII.get(ARM::MOVr), RegInfo->getBaseRegister())
677  .addReg(ARM::SP)
678  .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
679  else
680  AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
681  RegInfo->getBaseRegister())
682  .addReg(ARM::SP));
683  }
684 
685  // If the frame has variable sized objects then the epilogue must restore
686  // the sp from fp. We can assume there's an FP here since hasFP already
687  // checks for hasVarSizedObjects.
688  if (MFI->hasVarSizedObjects())
689  AFI->setShouldRestoreSPFromFP(true);
690 }
691 
692 // Resolve TCReturn pseudo-instruction
694  MachineBasicBlock &MBB) const {
696  assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
697  unsigned RetOpcode = MBBI->getOpcode();
698  DebugLoc dl = MBBI->getDebugLoc();
699  const ARMBaseInstrInfo &TII =
700  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
701 
702  if (!(RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri))
703  return;
704 
705  // Tail call return: adjust the stack pointer and jump to callee.
706  MBBI = MBB.getLastNonDebugInstr();
707  MachineOperand &JumpTarget = MBBI->getOperand(0);
708 
709  // Jump to label or value in register.
710  if (RetOpcode == ARM::TCRETURNdi) {
711  unsigned TCOpcode = STI.isThumb() ?
712  (STI.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) :
713  ARM::TAILJMPd;
714  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
715  if (JumpTarget.isGlobal())
716  MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
717  JumpTarget.getTargetFlags());
718  else {
719  assert(JumpTarget.isSymbol());
720  MIB.addExternalSymbol(JumpTarget.getSymbolName(),
721  JumpTarget.getTargetFlags());
722  }
723 
724  // Add the default predicate in Thumb mode.
725  if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0);
726  } else if (RetOpcode == ARM::TCRETURNri) {
727  BuildMI(MBB, MBBI, dl,
728  TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
729  addReg(JumpTarget.getReg(), RegState::Kill);
730  }
731 
732  MachineInstr *NewMI = std::prev(MBBI);
733  for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
734  NewMI->addOperand(MBBI->getOperand(i));
735 
736  // Delete the pseudo instruction TCRETURN.
737  MBB.erase(MBBI);
738  MBBI = NewMI;
739 }
740 
742  MachineBasicBlock &MBB) const {
744  assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
745  DebugLoc dl = MBBI->getDebugLoc();
746  MachineFrameInfo *MFI = MF.getFrameInfo();
748  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
749  const ARMBaseInstrInfo &TII =
750  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
751  assert(!AFI->isThumb1OnlyFunction() &&
752  "This emitEpilogue does not support Thumb1!");
753  bool isARM = !AFI->isThumbFunction();
754 
755  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
756  int NumBytes = (int)MFI->getStackSize();
757  unsigned FramePtr = RegInfo->getFrameRegister(MF);
758 
759  // All calls are tail calls in GHC calling conv, and functions have no
760  // prologue/epilogue.
762  fixTCReturn(MF, MBB);
763  return;
764  }
765 
766  if (!AFI->hasStackFrame()) {
767  if (NumBytes - ArgRegsSaveSize != 0)
768  emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize);
769  } else {
770  // Unwind MBBI to point to first LDR / VLDRD.
771  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
772  if (MBBI != MBB.begin()) {
773  do {
774  --MBBI;
775  } while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs));
776  if (!isCSRestore(MBBI, TII, CSRegs))
777  ++MBBI;
778  }
779 
780  // Move SP to start of FP callee save spill area.
781  NumBytes -= (ArgRegsSaveSize +
784  AFI->getDPRCalleeSavedGapSize() +
786 
787  // Reset SP based on frame pointer only if the stack frame extends beyond
788  // frame pointer stack slot or target is ELF and the function has FP.
789  if (AFI->shouldRestoreSPFromFP()) {
790  NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
791  if (NumBytes) {
792  if (isARM)
793  emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
794  ARMCC::AL, 0, TII);
795  else {
796  // It's not possible to restore SP from FP in a single instruction.
797  // For iOS, this looks like:
798  // mov sp, r7
799  // sub sp, #24
800  // This is bad, if an interrupt is taken after the mov, sp is in an
801  // inconsistent state.
802  // Use the first callee-saved register as a scratch register.
803  assert(!MFI->getPristineRegs(MF).test(ARM::R4) &&
804  "No scratch register to restore SP from FP!");
805  emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
806  ARMCC::AL, 0, TII);
807  AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
808  ARM::SP)
809  .addReg(ARM::R4));
810  }
811  } else {
812  // Thumb2 or ARM.
813  if (isARM)
814  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
815  .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
816  else
817  AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
818  ARM::SP)
819  .addReg(FramePtr));
820  }
821  } else if (NumBytes &&
822  !tryFoldSPUpdateIntoPushPop(STI, MF, MBBI, NumBytes))
823  emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
824 
825  // Increment past our save areas.
826  if (AFI->getDPRCalleeSavedAreaSize()) {
827  MBBI++;
828  // Since vpop register list cannot have gaps, there may be multiple vpop
829  // instructions in the epilogue.
830  while (MBBI->getOpcode() == ARM::VLDMDIA_UPD)
831  MBBI++;
832  }
833  if (AFI->getDPRCalleeSavedGapSize()) {
834  assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
835  "unexpected DPR alignment gap");
836  emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize());
837  }
838 
839  if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
840  if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
841  }
842 
843  fixTCReturn(MF, MBB);
844 
845  if (ArgRegsSaveSize)
846  emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
847 }
848 
849 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
850 /// debug info. It's the same as what we use for resolving the code-gen
851 /// references for now. FIXME: This can go wrong when references are
852 /// SP-relative and simple call frames aren't used.
853 int
855  unsigned &FrameReg) const {
856  return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
857 }
858 
859 int
861  int FI, unsigned &FrameReg,
862  int SPAdj) const {
863  const MachineFrameInfo *MFI = MF.getFrameInfo();
864  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
866  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
867  int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
868  int FPOffset = Offset - AFI->getFramePtrSpillOffset();
869  bool isFixed = MFI->isFixedObjectIndex(FI);
870 
871  FrameReg = ARM::SP;
872  Offset += SPAdj;
873 
874  // SP can move around if there are allocas. We may also lose track of SP
875  // when emergency spilling inside a non-reserved call frame setup.
876  bool hasMovingSP = !hasReservedCallFrame(MF);
877 
878  // When dynamically realigning the stack, use the frame pointer for
879  // parameters, and the stack/base pointer for locals.
880  if (RegInfo->needsStackRealignment(MF)) {
881  assert (hasFP(MF) && "dynamic stack realignment without a FP!");
882  if (isFixed) {
883  FrameReg = RegInfo->getFrameRegister(MF);
884  Offset = FPOffset;
885  } else if (hasMovingSP) {
886  assert(RegInfo->hasBasePointer(MF) &&
887  "VLAs and dynamic stack alignment, but missing base pointer!");
888  FrameReg = RegInfo->getBaseRegister();
889  }
890  return Offset;
891  }
892 
893  // If there is a frame pointer, use it when we can.
894  if (hasFP(MF) && AFI->hasStackFrame()) {
895  // Use frame pointer to reference fixed objects. Use it for locals if
896  // there are VLAs (and thus the SP isn't reliable as a base).
897  if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
898  FrameReg = RegInfo->getFrameRegister(MF);
899  return FPOffset;
900  } else if (hasMovingSP) {
901  assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
902  if (AFI->isThumb2Function()) {
903  // Try to use the frame pointer if we can, else use the base pointer
904  // since it's available. This is handy for the emergency spill slot, in
905  // particular.
906  if (FPOffset >= -255 && FPOffset < 0) {
907  FrameReg = RegInfo->getFrameRegister(MF);
908  return FPOffset;
909  }
910  }
911  } else if (AFI->isThumb2Function()) {
912  // Use add <rd>, sp, #<imm8>
913  // ldr <rd>, [sp, #<imm8>]
914  // if at all possible to save space.
915  if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
916  return Offset;
917  // In Thumb2 mode, the negative offset is very limited. Try to avoid
918  // out of range references. ldr <rt>,[<rn>, #-<imm8>]
919  if (FPOffset >= -255 && FPOffset < 0) {
920  FrameReg = RegInfo->getFrameRegister(MF);
921  return FPOffset;
922  }
923  } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
924  // Otherwise, use SP or FP, whichever is closer to the stack slot.
925  FrameReg = RegInfo->getFrameRegister(MF);
926  return FPOffset;
927  }
928  }
929  // Use the base pointer if we have one.
930  if (RegInfo->hasBasePointer(MF))
931  FrameReg = RegInfo->getBaseRegister();
932  return Offset;
933 }
934 
936  int FI) const {
937  unsigned FrameReg;
938  return getFrameIndexReference(MF, FI, FrameReg);
939 }
940 
941 void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
943  const std::vector<CalleeSavedInfo> &CSI,
944  unsigned StmOpc, unsigned StrOpc,
945  bool NoGap,
946  bool(*Func)(unsigned, bool),
947  unsigned NumAlignedDPRCS2Regs,
948  unsigned MIFlags) const {
949  MachineFunction &MF = *MBB.getParent();
950  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
951 
952  DebugLoc DL;
953  if (MI != MBB.end()) DL = MI->getDebugLoc();
954 
956  unsigned i = CSI.size();
957  while (i != 0) {
958  unsigned LastReg = 0;
959  for (; i != 0; --i) {
960  unsigned Reg = CSI[i-1].getReg();
961  if (!(Func)(Reg, STI.isTargetDarwin())) continue;
962 
963  // D-registers in the aligned area DPRCS2 are NOT spilled here.
964  if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
965  continue;
966 
967  // Add the callee-saved register as live-in unless it's LR and
968  // @llvm.returnaddress is called. If LR is returned for
969  // @llvm.returnaddress then it's already added to the function and
970  // entry block live-in sets.
971  bool isKill = true;
972  if (Reg == ARM::LR) {
973  if (MF.getFrameInfo()->isReturnAddressTaken() &&
974  MF.getRegInfo().isLiveIn(Reg))
975  isKill = false;
976  }
977 
978  if (isKill)
979  MBB.addLiveIn(Reg);
980 
981  // If NoGap is true, push consecutive registers and then leave the rest
982  // for other instructions. e.g.
983  // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
984  if (NoGap && LastReg && LastReg != Reg-1)
985  break;
986  LastReg = Reg;
987  Regs.push_back(std::make_pair(Reg, isKill));
988  }
989 
990  if (Regs.empty())
991  continue;
992  if (Regs.size() > 1 || StrOpc== 0) {
993  MachineInstrBuilder MIB =
994  AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
995  .addReg(ARM::SP).setMIFlags(MIFlags));
996  for (unsigned i = 0, e = Regs.size(); i < e; ++i)
997  MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
998  } else if (Regs.size() == 1) {
999  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc),
1000  ARM::SP)
1001  .addReg(Regs[0].first, getKillRegState(Regs[0].second))
1002  .addReg(ARM::SP).setMIFlags(MIFlags)
1003  .addImm(-4);
1004  AddDefaultPred(MIB);
1005  }
1006  Regs.clear();
1007 
1008  // Put any subsequent vpush instructions before this one: they will refer to
1009  // higher register numbers so need to be pushed first in order to preserve
1010  // monotonicity.
1011  --MI;
1012  }
1013 }
1014 
1015 void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1017  const std::vector<CalleeSavedInfo> &CSI,
1018  unsigned LdmOpc, unsigned LdrOpc,
1019  bool isVarArg, bool NoGap,
1020  bool(*Func)(unsigned, bool),
1021  unsigned NumAlignedDPRCS2Regs) const {
1022  MachineFunction &MF = *MBB.getParent();
1023  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1025  DebugLoc DL = MI->getDebugLoc();
1026  unsigned RetOpcode = MI->getOpcode();
1027  bool isTailCall = (RetOpcode == ARM::TCRETURNdi ||
1028  RetOpcode == ARM::TCRETURNri);
1029  bool isInterrupt =
1030  RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1031 
1033  unsigned i = CSI.size();
1034  while (i != 0) {
1035  unsigned LastReg = 0;
1036  bool DeleteRet = false;
1037  for (; i != 0; --i) {
1038  unsigned Reg = CSI[i-1].getReg();
1039  if (!(Func)(Reg, STI.isTargetDarwin())) continue;
1040 
1041  // The aligned reloads from area DPRCS2 are not inserted here.
1042  if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1043  continue;
1044 
1045  if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1046  STI.hasV5TOps()) {
1047  Reg = ARM::PC;
1048  LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1049  // Fold the return instruction into the LDM.
1050  DeleteRet = true;
1051  }
1052 
1053  // If NoGap is true, pop consecutive registers and then leave the rest
1054  // for other instructions. e.g.
1055  // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1056  if (NoGap && LastReg && LastReg != Reg-1)
1057  break;
1058 
1059  LastReg = Reg;
1060  Regs.push_back(Reg);
1061  }
1062 
1063  if (Regs.empty())
1064  continue;
1065  if (Regs.size() > 1 || LdrOpc == 0) {
1066  MachineInstrBuilder MIB =
1067  AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
1068  .addReg(ARM::SP));
1069  for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1070  MIB.addReg(Regs[i], getDefRegState(true));
1071  if (DeleteRet) {
1072  MIB.copyImplicitOps(&*MI);
1073  MI->eraseFromParent();
1074  }
1075  MI = MIB;
1076  } else if (Regs.size() == 1) {
1077  // If we adjusted the reg to PC from LR above, switch it back here. We
1078  // only do that for LDM.
1079  if (Regs[0] == ARM::PC)
1080  Regs[0] = ARM::LR;
1081  MachineInstrBuilder MIB =
1082  BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
1083  .addReg(ARM::SP, RegState::Define)
1084  .addReg(ARM::SP);
1085  // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1086  // that refactoring is complete (eventually).
1087  if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1088  MIB.addReg(0);
1090  } else
1091  MIB.addImm(4);
1092  AddDefaultPred(MIB);
1093  }
1094  Regs.clear();
1095 
1096  // Put any subsequent vpop instructions after this one: they will refer to
1097  // higher register numbers so need to be popped afterwards.
1098  ++MI;
1099  }
1100 }
1101 
1102 /// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1103 /// starting from d8. Also insert stack realignment code and leave the stack
1104 /// pointer pointing to the d8 spill slot.
1107  unsigned NumAlignedDPRCS2Regs,
1108  const std::vector<CalleeSavedInfo> &CSI,
1109  const TargetRegisterInfo *TRI) {
1110  MachineFunction &MF = *MBB.getParent();
1112  DebugLoc DL = MI->getDebugLoc();
1113  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1114  MachineFrameInfo &MFI = *MF.getFrameInfo();
1115 
1116  // Mark the D-register spill slots as properly aligned. Since MFI computes
1117  // stack slot layout backwards, this can actually mean that the d-reg stack
1118  // slot offsets can be wrong. The offset for d8 will always be correct.
1119  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1120  unsigned DNum = CSI[i].getReg() - ARM::D8;
1121  if (DNum >= 8)
1122  continue;
1123  int FI = CSI[i].getFrameIdx();
1124  // The even-numbered registers will be 16-byte aligned, the odd-numbered
1125  // registers will be 8-byte aligned.
1126  MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16);
1127 
1128  // The stack slot for D8 needs to be maximally aligned because this is
1129  // actually the point where we align the stack pointer. MachineFrameInfo
1130  // computes all offsets relative to the incoming stack pointer which is a
1131  // bit weird when realigning the stack. Any extra padding for this
1132  // over-alignment is not realized because the code inserted below adjusts
1133  // the stack pointer by numregs * 8 before aligning the stack pointer.
1134  if (DNum == 0)
1135  MFI.setObjectAlignment(FI, MFI.getMaxAlignment());
1136  }
1137 
1138  // Move the stack pointer to the d8 spill slot, and align it at the same
1139  // time. Leave the stack slot address in the scratch register r4.
1140  //
1141  // sub r4, sp, #numregs * 8
1142  // bic r4, r4, #align - 1
1143  // mov sp, r4
1144  //
1145  bool isThumb = AFI->isThumbFunction();
1146  assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1147  AFI->setShouldRestoreSPFromFP(true);
1148 
1149  // sub r4, sp, #numregs * 8
1150  // The immediate is <= 64, so it doesn't need any special encoding.
1151  unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1152  AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1153  .addReg(ARM::SP)
1154  .addImm(8 * NumAlignedDPRCS2Regs)));
1155 
1156  unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment();
1157  // We must set parameter MustBeSingleInstruction to true, since
1158  // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1159  // stack alignment. Luckily, this can always be done since all ARM
1160  // architecture versions that support Neon also support the BFC
1161  // instruction.
1162  emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
1163 
1164  // mov sp, r4
1165  // The stack pointer must be adjusted before spilling anything, otherwise
1166  // the stack slots could be clobbered by an interrupt handler.
1167  // Leave r4 live, it is used below.
1168  Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
1169  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
1170  .addReg(ARM::R4);
1171  MIB = AddDefaultPred(MIB);
1172  if (!isThumb)
1173  AddDefaultCC(MIB);
1174 
1175  // Now spill NumAlignedDPRCS2Regs registers starting from d8.
1176  // r4 holds the stack slot address.
1177  unsigned NextReg = ARM::D8;
1178 
1179  // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
1180  // The writeback is only needed when emitting two vst1.64 instructions.
1181  if (NumAlignedDPRCS2Regs >= 6) {
1182  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1183  &ARM::QQPRRegClass);
1184  MBB.addLiveIn(SupReg);
1185  AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed),
1186  ARM::R4)
1187  .addReg(ARM::R4, RegState::Kill).addImm(16)
1188  .addReg(NextReg)
1189  .addReg(SupReg, RegState::ImplicitKill));
1190  NextReg += 4;
1191  NumAlignedDPRCS2Regs -= 4;
1192  }
1193 
1194  // We won't modify r4 beyond this point. It currently points to the next
1195  // register to be spilled.
1196  unsigned R4BaseReg = NextReg;
1197 
1198  // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
1199  if (NumAlignedDPRCS2Regs >= 4) {
1200  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1201  &ARM::QQPRRegClass);
1202  MBB.addLiveIn(SupReg);
1203  AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
1204  .addReg(ARM::R4).addImm(16).addReg(NextReg)
1205  .addReg(SupReg, RegState::ImplicitKill));
1206  NextReg += 4;
1207  NumAlignedDPRCS2Regs -= 4;
1208  }
1209 
1210  // 16-byte aligned vst1.64 with 2 d-regs.
1211  if (NumAlignedDPRCS2Regs >= 2) {
1212  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1213  &ARM::QPRRegClass);
1214  MBB.addLiveIn(SupReg);
1215  AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
1216  .addReg(ARM::R4).addImm(16).addReg(SupReg));
1217  NextReg += 2;
1218  NumAlignedDPRCS2Regs -= 2;
1219  }
1220 
1221  // Finally, use a vanilla vstr.64 for the odd last register.
1222  if (NumAlignedDPRCS2Regs) {
1223  MBB.addLiveIn(NextReg);
1224  // vstr.64 uses addrmode5 which has an offset scale of 4.
1225  AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
1226  .addReg(NextReg)
1227  .addReg(ARM::R4).addImm((NextReg-R4BaseReg)*2));
1228  }
1229 
1230  // The last spill instruction inserted should kill the scratch register r4.
1231  std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1232 }
1233 
1234 /// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
1235 /// iterator to the following instruction.
1238  unsigned NumAlignedDPRCS2Regs) {
1239  // sub r4, sp, #numregs * 8
1240  // bic r4, r4, #align - 1
1241  // mov sp, r4
1242  ++MI; ++MI; ++MI;
1243  assert(MI->mayStore() && "Expecting spill instruction");
1244 
1245  // These switches all fall through.
1246  switch(NumAlignedDPRCS2Regs) {
1247  case 7:
1248  ++MI;
1249  assert(MI->mayStore() && "Expecting spill instruction");
1250  default:
1251  ++MI;
1252  assert(MI->mayStore() && "Expecting spill instruction");
1253  case 1:
1254  case 2:
1255  case 4:
1256  assert(MI->killsRegister(ARM::R4) && "Missed kill flag");
1257  ++MI;
1258  }
1259  return MI;
1260 }
1261 
1262 /// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
1263 /// starting from d8. These instructions are assumed to execute while the
1264 /// stack is still aligned, unlike the code inserted by emitPopInst.
1267  unsigned NumAlignedDPRCS2Regs,
1268  const std::vector<CalleeSavedInfo> &CSI,
1269  const TargetRegisterInfo *TRI) {
1270  MachineFunction &MF = *MBB.getParent();
1272  DebugLoc DL = MI->getDebugLoc();
1273  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1274 
1275  // Find the frame index assigned to d8.
1276  int D8SpillFI = 0;
1277  for (unsigned i = 0, e = CSI.size(); i != e; ++i)
1278  if (CSI[i].getReg() == ARM::D8) {
1279  D8SpillFI = CSI[i].getFrameIdx();
1280  break;
1281  }
1282 
1283  // Materialize the address of the d8 spill slot into the scratch register r4.
1284  // This can be fairly complicated if the stack frame is large, so just use
1285  // the normal frame index elimination mechanism to do it. This code runs as
1286  // the initial part of the epilog where the stack and base pointers haven't
1287  // been changed yet.
1288  bool isThumb = AFI->isThumbFunction();
1289  assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1290 
1291  unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
1292  AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1293  .addFrameIndex(D8SpillFI).addImm(0)));
1294 
1295  // Now restore NumAlignedDPRCS2Regs registers starting from d8.
1296  unsigned NextReg = ARM::D8;
1297 
1298  // 16-byte aligned vld1.64 with 4 d-regs and writeback.
1299  if (NumAlignedDPRCS2Regs >= 6) {
1300  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1301  &ARM::QQPRRegClass);
1302  AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
1303  .addReg(ARM::R4, RegState::Define)
1305  .addReg(SupReg, RegState::ImplicitDefine));
1306  NextReg += 4;
1307  NumAlignedDPRCS2Regs -= 4;
1308  }
1309 
1310  // We won't modify r4 beyond this point. It currently points to the next
1311  // register to be spilled.
1312  unsigned R4BaseReg = NextReg;
1313 
1314  // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
1315  if (NumAlignedDPRCS2Regs >= 4) {
1316  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1317  &ARM::QQPRRegClass);
1318  AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
1319  .addReg(ARM::R4).addImm(16)
1320  .addReg(SupReg, RegState::ImplicitDefine));
1321  NextReg += 4;
1322  NumAlignedDPRCS2Regs -= 4;
1323  }
1324 
1325  // 16-byte aligned vld1.64 with 2 d-regs.
1326  if (NumAlignedDPRCS2Regs >= 2) {
1327  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1328  &ARM::QPRRegClass);
1329  AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
1330  .addReg(ARM::R4).addImm(16));
1331  NextReg += 2;
1332  NumAlignedDPRCS2Regs -= 2;
1333  }
1334 
1335  // Finally, use a vanilla vldr.64 for the remaining odd register.
1336  if (NumAlignedDPRCS2Regs)
1337  AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
1338  .addReg(ARM::R4).addImm(2*(NextReg-R4BaseReg)));
1339 
1340  // Last store kills r4.
1341  std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1342 }
1343 
1346  const std::vector<CalleeSavedInfo> &CSI,
1347  const TargetRegisterInfo *TRI) const {
1348  if (CSI.empty())
1349  return false;
1350 
1351  MachineFunction &MF = *MBB.getParent();
1353 
1354  unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
1355  unsigned PushOneOpc = AFI->isThumbFunction() ?
1356  ARM::t2STR_PRE : ARM::STR_PRE_IMM;
1357  unsigned FltOpc = ARM::VSTMDDB_UPD;
1358  unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1359  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,
1361  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,
1363  emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
1364  NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
1365 
1366  // The code above does not insert spill code for the aligned DPRCS2 registers.
1367  // The stack realignment code will be inserted between the push instructions
1368  // and these spills.
1369  if (NumAlignedDPRCS2Regs)
1370  emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
1371 
1372  return true;
1373 }
1374 
1377  const std::vector<CalleeSavedInfo> &CSI,
1378  const TargetRegisterInfo *TRI) const {
1379  if (CSI.empty())
1380  return false;
1381 
1382  MachineFunction &MF = *MBB.getParent();
1384  bool isVarArg = AFI->getArgRegsSaveSize() > 0;
1385  unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1386 
1387  // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
1388  // registers. Do that here instead.
1389  if (NumAlignedDPRCS2Regs)
1390  emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
1391 
1392  unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
1393  unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
1394  unsigned FltOpc = ARM::VLDMDIA_UPD;
1395  emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
1396  NumAlignedDPRCS2Regs);
1397  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1398  &isARMArea2Register, 0);
1399  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1400  &isARMArea1Register, 0);
1401 
1402  return true;
1403 }
1404 
1405 // FIXME: Make generic?
1406 static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
1407  const ARMBaseInstrInfo &TII) {
1408  unsigned FnSize = 0;
1409  for (auto &MBB : MF) {
1410  for (auto &MI : MBB)
1411  FnSize += TII.GetInstSizeInBytes(&MI);
1412  }
1413  return FnSize;
1414 }
1415 
1416 /// estimateRSStackSizeLimit - Look at each instruction that references stack
1417 /// frames and return the stack size limit beyond which some of these
1418 /// instructions will require a scratch register during their expansion later.
1419 // FIXME: Move to TII?
1421  const TargetFrameLowering *TFI) {
1422  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1423  unsigned Limit = (1 << 12) - 1;
1424  for (auto &MBB : MF) {
1425  for (auto &MI : MBB) {
1426  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1427  if (!MI.getOperand(i).isFI())
1428  continue;
1429 
1430  // When using ADDri to get the address of a stack object, 255 is the
1431  // largest offset guaranteed to fit in the immediate offset.
1432  if (MI.getOpcode() == ARM::ADDri) {
1433  Limit = std::min(Limit, (1U << 8) - 1);
1434  break;
1435  }
1436 
1437  // Otherwise check the addressing mode.
1438  switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
1439  case ARMII::AddrMode3:
1440  case ARMII::AddrModeT2_i8:
1441  Limit = std::min(Limit, (1U << 8) - 1);
1442  break;
1443  case ARMII::AddrMode5:
1445  Limit = std::min(Limit, ((1U << 8) - 1) * 4);
1446  break;
1447  case ARMII::AddrModeT2_i12:
1448  // i12 supports only positive offset so these will be converted to
1449  // i8 opcodes. See llvm::rewriteT2FrameIndex.
1450  if (TFI->hasFP(MF) && AFI->hasStackFrame())
1451  Limit = std::min(Limit, (1U << 8) - 1);
1452  break;
1453  case ARMII::AddrMode4:
1454  case ARMII::AddrMode6:
1455  // Addressing modes 4 & 6 (load/store) instructions can't encode an
1456  // immediate offset for stack references.
1457  return 0;
1458  default:
1459  break;
1460  }
1461  break; // At most one FI per instruction
1462  }
1463  }
1464  }
1465 
1466  return Limit;
1467 }
1468 
1469 // In functions that realign the stack, it can be an advantage to spill the
1470 // callee-saved vector registers after realigning the stack. The vst1 and vld1
1471 // instructions take alignment hints that can improve performance.
1472 //
1473 static void
1475  MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
1476  if (!SpillAlignedNEONRegs)
1477  return;
1478 
1479  // Naked functions don't spill callee-saved registers.
1481  return;
1482 
1483  // We are planning to use NEON instructions vst1 / vld1.
1484  if (!static_cast<const ARMSubtarget &>(MF.getSubtarget()).hasNEON())
1485  return;
1486 
1487  // Don't bother if the default stack alignment is sufficiently high.
1488  if (MF.getSubtarget().getFrameLowering()->getStackAlignment() >= 8)
1489  return;
1490 
1491  // Aligned spills require stack realignment.
1492  if (!static_cast<const ARMBaseRegisterInfo *>(
1493  MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))
1494  return;
1495 
1496  // We always spill contiguous d-registers starting from d8. Count how many
1497  // needs spilling. The register allocator will almost always use the
1498  // callee-saved registers in order, but it can happen that there are holes in
1499  // the range. Registers above the hole will be spilled to the standard DPRCS
1500  // area.
1501  unsigned NumSpills = 0;
1502  for (; NumSpills < 8; ++NumSpills)
1503  if (!SavedRegs.test(ARM::D8 + NumSpills))
1504  break;
1505 
1506  // Don't do this for just one d-register. It's not worth it.
1507  if (NumSpills < 2)
1508  return;
1509 
1510  // Spill the first NumSpills D-registers after realigning the stack.
1511  MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
1512 
1513  // A scratch register is required for the vst1 / vld1 instructions.
1514  SavedRegs.set(ARM::R4);
1515 }
1516 
1518  BitVector &SavedRegs,
1519  RegScavenger *RS) const {
1520  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1521  // This tells PEI to spill the FP as if it is any other callee-save register
1522  // to take advantage the eliminateFrameIndex machinery. This also ensures it
1523  // is spilled in the order specified by getCalleeSavedRegs() to make it easier
1524  // to combine multiple loads / stores.
1525  bool CanEliminateFrame = true;
1526  bool CS1Spilled = false;
1527  bool LRSpilled = false;
1528  unsigned NumGPRSpills = 0;
1529  SmallVector<unsigned, 4> UnspilledCS1GPRs;
1530  SmallVector<unsigned, 4> UnspilledCS2GPRs;
1531  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1532  MF.getSubtarget().getRegisterInfo());
1533  const ARMBaseInstrInfo &TII =
1534  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1536  MachineFrameInfo *MFI = MF.getFrameInfo();
1537  MachineRegisterInfo &MRI = MF.getRegInfo();
1538  unsigned FramePtr = RegInfo->getFrameRegister(MF);
1539 
1540  // Spill R4 if Thumb2 function requires stack realignment - it will be used as
1541  // scratch register. Also spill R4 if Thumb2 function has varsized objects,
1542  // since it's not always possible to restore sp from fp in a single
1543  // instruction.
1544  // FIXME: It will be better just to find spare register here.
1545  if (AFI->isThumb2Function() &&
1546  (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
1547  SavedRegs.set(ARM::R4);
1548 
1549  if (AFI->isThumb1OnlyFunction()) {
1550  // Spill LR if Thumb1 function uses variable length argument lists.
1551  if (AFI->getArgRegsSaveSize() > 0)
1552  SavedRegs.set(ARM::LR);
1553 
1554  // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know
1555  // for sure what the stack size will be, but for this, an estimate is good
1556  // enough. If there anything changes it, it'll be a spill, which implies
1557  // we've used all the registers and so R4 is already used, so not marking
1558  // it here will be OK.
1559  // FIXME: It will be better just to find spare register here.
1560  unsigned StackSize = MFI->estimateStackSize(MF);
1561  if (MFI->hasVarSizedObjects() || StackSize > 508)
1562  SavedRegs.set(ARM::R4);
1563  }
1564 
1565  // See if we can spill vector registers to aligned stack.
1566  checkNumAlignedDPRCS2Regs(MF, SavedRegs);
1567 
1568  // Spill the BasePtr if it's used.
1569  if (RegInfo->hasBasePointer(MF))
1570  SavedRegs.set(RegInfo->getBaseRegister());
1571 
1572  // Don't spill FP if the frame can be eliminated. This is determined
1573  // by scanning the callee-save registers to see if any is modified.
1574  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
1575  for (unsigned i = 0; CSRegs[i]; ++i) {
1576  unsigned Reg = CSRegs[i];
1577  bool Spilled = false;
1578  if (SavedRegs.test(Reg)) {
1579  Spilled = true;
1580  CanEliminateFrame = false;
1581  }
1582 
1583  if (!ARM::GPRRegClass.contains(Reg))
1584  continue;
1585 
1586  if (Spilled) {
1587  NumGPRSpills++;
1588 
1589  if (!STI.isTargetDarwin()) {
1590  if (Reg == ARM::LR)
1591  LRSpilled = true;
1592  CS1Spilled = true;
1593  continue;
1594  }
1595 
1596  // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
1597  switch (Reg) {
1598  case ARM::LR:
1599  LRSpilled = true;
1600  // Fallthrough
1601  case ARM::R0: case ARM::R1:
1602  case ARM::R2: case ARM::R3:
1603  case ARM::R4: case ARM::R5:
1604  case ARM::R6: case ARM::R7:
1605  CS1Spilled = true;
1606  break;
1607  default:
1608  break;
1609  }
1610  } else {
1611  if (!STI.isTargetDarwin()) {
1612  UnspilledCS1GPRs.push_back(Reg);
1613  continue;
1614  }
1615 
1616  switch (Reg) {
1617  case ARM::R0: case ARM::R1:
1618  case ARM::R2: case ARM::R3:
1619  case ARM::R4: case ARM::R5:
1620  case ARM::R6: case ARM::R7:
1621  case ARM::LR:
1622  UnspilledCS1GPRs.push_back(Reg);
1623  break;
1624  default:
1625  UnspilledCS2GPRs.push_back(Reg);
1626  break;
1627  }
1628  }
1629  }
1630 
1631  bool ForceLRSpill = false;
1632  if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
1633  unsigned FnSize = GetFunctionSizeInBytes(MF, TII);
1634  // Force LR to be spilled if the Thumb function size is > 2048. This enables
1635  // use of BL to implement far jump. If it turns out that it's not needed
1636  // then the branch fix up path will undo it.
1637  if (FnSize >= (1 << 11)) {
1638  CanEliminateFrame = false;
1639  ForceLRSpill = true;
1640  }
1641  }
1642 
1643  // If any of the stack slot references may be out of range of an immediate
1644  // offset, make sure a register (or a spill slot) is available for the
1645  // register scavenger. Note that if we're indexing off the frame pointer, the
1646  // effective stack size is 4 bytes larger since the FP points to the stack
1647  // slot of the previous FP. Also, if we have variable sized objects in the
1648  // function, stack slot references will often be negative, and some of
1649  // our instructions are positive-offset only, so conservatively consider
1650  // that case to want a spill slot (or register) as well. Similarly, if
1651  // the function adjusts the stack pointer during execution and the
1652  // adjustments aren't already part of our stack size estimate, our offset
1653  // calculations may be off, so be conservative.
1654  // FIXME: We could add logic to be more precise about negative offsets
1655  // and which instructions will need a scratch register for them. Is it
1656  // worth the effort and added fragility?
1657  bool BigStack =
1658  (RS &&
1659  (MFI->estimateStackSize(MF) +
1660  ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
1661  estimateRSStackSizeLimit(MF, this)))
1662  || MFI->hasVarSizedObjects()
1663  || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
1664 
1665  bool ExtraCSSpill = false;
1666  if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
1667  AFI->setHasStackFrame(true);
1668 
1669  // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
1670  // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
1671  if (!LRSpilled && CS1Spilled) {
1672  SavedRegs.set(ARM::LR);
1673  NumGPRSpills++;
1675  LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),
1676  (unsigned)ARM::LR);
1677  if (LRPos != UnspilledCS1GPRs.end())
1678  UnspilledCS1GPRs.erase(LRPos);
1679 
1680  ForceLRSpill = false;
1681  ExtraCSSpill = true;
1682  }
1683 
1684  if (hasFP(MF)) {
1685  SavedRegs.set(FramePtr);
1686  auto FPPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),
1687  FramePtr);
1688  if (FPPos != UnspilledCS1GPRs.end())
1689  UnspilledCS1GPRs.erase(FPPos);
1690  NumGPRSpills++;
1691  }
1692 
1693  // If stack and double are 8-byte aligned and we are spilling an odd number
1694  // of GPRs, spill one extra callee save GPR so we won't have to pad between
1695  // the integer and double callee save areas.
1696  unsigned TargetAlign = getStackAlignment();
1697  if (TargetAlign >= 8 && (NumGPRSpills & 1)) {
1698  if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
1699  for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
1700  unsigned Reg = UnspilledCS1GPRs[i];
1701  // Don't spill high register if the function is thumb
1702  if (!AFI->isThumbFunction() ||
1703  isARMLowRegister(Reg) || Reg == ARM::LR) {
1704  SavedRegs.set(Reg);
1705  if (!MRI.isReserved(Reg))
1706  ExtraCSSpill = true;
1707  break;
1708  }
1709  }
1710  } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
1711  unsigned Reg = UnspilledCS2GPRs.front();
1712  SavedRegs.set(Reg);
1713  if (!MRI.isReserved(Reg))
1714  ExtraCSSpill = true;
1715  }
1716  }
1717 
1718  // Estimate if we might need to scavenge a register at some point in order
1719  // to materialize a stack offset. If so, either spill one additional
1720  // callee-saved register or reserve a special spill slot to facilitate
1721  // register scavenging. Thumb1 needs a spill slot for stack pointer
1722  // adjustments also, even when the frame itself is small.
1723  if (BigStack && !ExtraCSSpill) {
1724  // If any non-reserved CS register isn't spilled, just spill one or two
1725  // extra. That should take care of it!
1726  unsigned NumExtras = TargetAlign / 4;
1727  SmallVector<unsigned, 2> Extras;
1728  while (NumExtras && !UnspilledCS1GPRs.empty()) {
1729  unsigned Reg = UnspilledCS1GPRs.back();
1730  UnspilledCS1GPRs.pop_back();
1731  if (!MRI.isReserved(Reg) &&
1732  (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
1733  Reg == ARM::LR)) {
1734  Extras.push_back(Reg);
1735  NumExtras--;
1736  }
1737  }
1738  // For non-Thumb1 functions, also check for hi-reg CS registers
1739  if (!AFI->isThumb1OnlyFunction()) {
1740  while (NumExtras && !UnspilledCS2GPRs.empty()) {
1741  unsigned Reg = UnspilledCS2GPRs.back();
1742  UnspilledCS2GPRs.pop_back();
1743  if (!MRI.isReserved(Reg)) {
1744  Extras.push_back(Reg);
1745  NumExtras--;
1746  }
1747  }
1748  }
1749  if (Extras.size() && NumExtras == 0) {
1750  for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
1751  SavedRegs.set(Extras[i]);
1752  }
1753  } else if (!AFI->isThumb1OnlyFunction()) {
1754  // note: Thumb1 functions spill to R12, not the stack. Reserve a slot
1755  // closest to SP or frame pointer.
1756  const TargetRegisterClass *RC = &ARM::GPRRegClass;
1757  RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
1758  RC->getAlignment(),
1759  false));
1760  }
1761  }
1762  }
1763 
1764  if (ForceLRSpill) {
1765  SavedRegs.set(ARM::LR);
1766  AFI->setLRIsSpilledForFarJump(true);
1767  }
1768 }
1769 
1770 
1771 void ARMFrameLowering::
1772 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
1773  MachineBasicBlock::iterator I) const {
1774  const ARMBaseInstrInfo &TII =
1775  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1776  if (!hasReservedCallFrame(MF)) {
1777  // If we have alloca, convert as follows:
1778  // ADJCALLSTACKDOWN -> sub, sp, sp, amount
1779  // ADJCALLSTACKUP -> add, sp, sp, amount
1780  MachineInstr *Old = I;
1781  DebugLoc dl = Old->getDebugLoc();
1782  unsigned Amount = Old->getOperand(0).getImm();
1783  if (Amount != 0) {
1784  // We need to keep the stack aligned properly. To do this, we round the
1785  // amount of space needed for the outgoing arguments up to the next
1786  // alignment boundary.
1787  unsigned Align = getStackAlignment();
1788  Amount = (Amount+Align-1)/Align*Align;
1789 
1791  assert(!AFI->isThumb1OnlyFunction() &&
1792  "This eliminateCallFramePseudoInstr does not support Thumb1!");
1793  bool isARM = !AFI->isThumbFunction();
1794 
1795  // Replace the pseudo instruction with a new instruction...
1796  unsigned Opc = Old->getOpcode();
1797  int PIdx = Old->findFirstPredOperandIdx();
1798  ARMCC::CondCodes Pred = (PIdx == -1)
1799  ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm();
1800  if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
1801  // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
1802  unsigned PredReg = Old->getOperand(2).getReg();
1803  emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
1804  Pred, PredReg);
1805  } else {
1806  // Note: PredReg is operand 3 for ADJCALLSTACKUP.
1807  unsigned PredReg = Old->getOperand(3).getReg();
1808  assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
1809  emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
1810  Pred, PredReg);
1811  }
1812  }
1813  }
1814  MBB.erase(I);
1815 }
1816 
1817 /// Get the minimum constant for ARM that is greater than or equal to the
1818 /// argument. In ARM, constants can have any value that can be produced by
1819 /// rotating an 8-bit value to the right by an even number of bits within a
1820 /// 32-bit word.
1821 static uint32_t alignToARMConstant(uint32_t Value) {
1822  unsigned Shifted = 0;
1823 
1824  if (Value == 0)
1825  return 0;
1826 
1827  while (!(Value & 0xC0000000)) {
1828  Value = Value << 2;
1829  Shifted += 2;
1830  }
1831 
1832  bool Carry = (Value & 0x00FFFFFF);
1833  Value = ((Value & 0xFF000000) >> 24) + Carry;
1834 
1835  if (Value & 0x0000100)
1836  Value = Value & 0x000001FC;
1837 
1838  if (Shifted > 24)
1839  Value = Value >> (Shifted - 24);
1840  else
1841  Value = Value << (24 - Shifted);
1842 
1843  return Value;
1844 }
1845 
1846 // The stack limit in the TCB is set to this many bytes above the actual
1847 // stack limit.
1848 static const uint64_t kSplitStackAvailable = 256;
1849 
1850 // Adjust the function prologue to enable split stacks. This currently only
1851 // supports android and linux.
1852 //
1853 // The ABI of the segmented stack prologue is a little arbitrarily chosen, but
1854 // must be well defined in order to allow for consistent implementations of the
1855 // __morestack helper function. The ABI is also not a normal ABI in that it
1856 // doesn't follow the normal calling conventions because this allows the
1857 // prologue of each function to be optimized further.
1858 //
1859 // Currently, the ABI looks like (when calling __morestack)
1860 //
1861 // * r4 holds the minimum stack size requested for this function call
1862 // * r5 holds the stack size of the arguments to the function
1863 // * the beginning of the function is 3 instructions after the call to
1864 // __morestack
1865 //
1866 // Implementations of __morestack should use r4 to allocate a new stack, r5 to
1867 // place the arguments on to the new stack, and the 3-instruction knowledge to
1868 // jump directly to the body of the function when working on the new stack.
1869 //
1870 // An old (and possibly no longer compatible) implementation of __morestack for
1871 // ARM can be found at [1].
1872 //
1873 // [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
1875  MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
1876  unsigned Opcode;
1877  unsigned CFIIndex;
1878  const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
1879  bool Thumb = ST->isThumb();
1880 
1881  // Sadly, this currently doesn't support varargs, platforms other than
1882  // android/linux. Note that thumb1/thumb2 are support for android/linux.
1883  if (MF.getFunction()->isVarArg())
1884  report_fatal_error("Segmented stacks do not support vararg functions.");
1885  if (!ST->isTargetAndroid() && !ST->isTargetLinux())
1886  report_fatal_error("Segmented stacks not supported on this platform.");
1887 
1888  assert(&PrologueMBB == &MF.front() && "Shrink-wrapping not yet implemented");
1889  MachineFrameInfo *MFI = MF.getFrameInfo();
1890  MachineModuleInfo &MMI = MF.getMMI();
1891  MCContext &Context = MMI.getContext();
1892  const MCRegisterInfo *MRI = Context.getRegisterInfo();
1893  const ARMBaseInstrInfo &TII =
1894  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1895  ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
1896  DebugLoc DL;
1897 
1898  uint64_t StackSize = MFI->getStackSize();
1899 
1900  // Do not generate a prologue for functions with a stack of size zero
1901  if (StackSize == 0)
1902  return;
1903 
1904  // Use R4 and R5 as scratch registers.
1905  // We save R4 and R5 before use and restore them before leaving the function.
1906  unsigned ScratchReg0 = ARM::R4;
1907  unsigned ScratchReg1 = ARM::R5;
1908  uint64_t AlignedStackSize;
1909 
1910  MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
1911  MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
1912  MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock();
1915 
1916  for (MachineBasicBlock::livein_iterator i = PrologueMBB.livein_begin(),
1917  e = PrologueMBB.livein_end();
1918  i != e; ++i) {
1919  AllocMBB->addLiveIn(*i);
1920  GetMBB->addLiveIn(*i);
1921  McrMBB->addLiveIn(*i);
1922  PrevStackMBB->addLiveIn(*i);
1923  PostStackMBB->addLiveIn(*i);
1924  }
1925 
1926  MF.push_front(PostStackMBB);
1927  MF.push_front(AllocMBB);
1928  MF.push_front(GetMBB);
1929  MF.push_front(McrMBB);
1930  MF.push_front(PrevStackMBB);
1931 
1932  // The required stack size that is aligned to ARM constant criterion.
1933  AlignedStackSize = alignToARMConstant(StackSize);
1934 
1935  // When the frame size is less than 256 we just compare the stack
1936  // boundary directly to the value of the stack pointer, per gcc.
1937  bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
1938 
1939  // We will use two of the callee save registers as scratch registers so we
1940  // need to save those registers onto the stack.
1941  // We will use SR0 to hold stack limit and SR1 to hold the stack size
1942  // requested and arguments for __morestack().
1943  // SR0: Scratch Register #0
1944  // SR1: Scratch Register #1
1945  // push {SR0, SR1}
1946  if (Thumb) {
1947  AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH)))
1948  .addReg(ScratchReg0).addReg(ScratchReg1);
1949  } else {
1950  AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
1951  .addReg(ARM::SP, RegState::Define).addReg(ARM::SP))
1952  .addReg(ScratchReg0).addReg(ScratchReg1);
1953  }
1954 
1955  // Emit the relevant DWARF information about the change in stack pointer as
1956  // well as where to find both r4 and r5 (the callee-save registers)
1957  CFIIndex =
1959  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1960  .addCFIIndex(CFIIndex);
1962  nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
1963  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1964  .addCFIIndex(CFIIndex);
1966  nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
1967  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1968  .addCFIIndex(CFIIndex);
1969 
1970  // mov SR1, sp
1971  if (Thumb) {
1972  AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
1973  .addReg(ARM::SP));
1974  } else if (CompareStackPointer) {
1975  AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
1976  .addReg(ARM::SP)).addReg(0);
1977  }
1978 
1979  // sub SR1, sp, #StackSize
1980  if (!CompareStackPointer && Thumb) {
1982  AddDefaultCC(BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1))
1983  .addReg(ScratchReg1).addImm(AlignedStackSize));
1984  } else if (!CompareStackPointer) {
1985  AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
1986  .addReg(ARM::SP).addImm(AlignedStackSize)).addReg(0);
1987  }
1988 
1989  if (Thumb && ST->isThumb1Only()) {
1990  unsigned PCLabelId = ARMFI->createPICLabelUId();
1992  MF.getFunction()->getContext(), "__STACK_LIMIT", PCLabelId, 0);
1994  unsigned CPI = MCP->getConstantPoolIndex(NewCPV, MF.getAlignment());
1995 
1996  // ldr SR0, [pc, offset(STACK_LIMIT)]
1997  AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
1998  .addConstantPoolIndex(CPI));
1999 
2000  // ldr SR0, [SR0]
2001  AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
2002  .addReg(ScratchReg0).addImm(0));
2003  } else {
2004  // Get TLS base address from the coprocessor
2005  // mrc p15, #0, SR0, c13, c0, #3
2006  AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0)
2007  .addImm(15)
2008  .addImm(0)
2009  .addImm(13)
2010  .addImm(0)
2011  .addImm(3));
2012 
2013  // Use the last tls slot on android and a private field of the TCP on linux.
2014  assert(ST->isTargetAndroid() || ST->isTargetLinux());
2015  unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
2016 
2017  // Get the stack limit from the right offset
2018  // ldr SR0, [sr0, #4 * TlsOffset]
2019  AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0)
2020  .addReg(ScratchReg0).addImm(4 * TlsOffset));
2021  }
2022 
2023  // Compare stack limit with stack size requested.
2024  // cmp SR0, SR1
2025  Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
2026  AddDefaultPred(BuildMI(GetMBB, DL, TII.get(Opcode))
2027  .addReg(ScratchReg0)
2028  .addReg(ScratchReg1));
2029 
2030  // This jump is taken if StackLimit < SP - stack required.
2031  Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
2032  BuildMI(GetMBB, DL, TII.get(Opcode)).addMBB(PostStackMBB)
2033  .addImm(ARMCC::LO)
2034  .addReg(ARM::CPSR);
2035 
2036 
2037  // Calling __morestack(StackSize, Size of stack arguments).
2038  // __morestack knows that the stack size requested is in SR0(r4)
2039  // and amount size of stack arguments is in SR1(r5).
2040 
2041  // Pass first argument for the __morestack by Scratch Register #0.
2042  // The amount size of stack required
2043  if (Thumb) {
2044  AddDefaultPred(AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8),
2045  ScratchReg0)).addImm(AlignedStackSize));
2046  } else {
2047  AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
2048  .addImm(AlignedStackSize)).addReg(0);
2049  }
2050  // Pass second argument for the __morestack by Scratch Register #1.
2051  // The amount size of stack consumed to save function arguments.
2052  if (Thumb) {
2054  AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1))
2056  } else {
2057  AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
2058  .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())))
2059  .addReg(0);
2060  }
2061 
2062  // push {lr} - Save return address of this function.
2063  if (Thumb) {
2064  AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH)))
2065  .addReg(ARM::LR);
2066  } else {
2067  AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
2068  .addReg(ARM::SP, RegState::Define)
2069  .addReg(ARM::SP))
2070  .addReg(ARM::LR);
2071  }
2072 
2073  // Emit the DWARF info about the change in stack as well as where to find the
2074  // previous link register
2075  CFIIndex =
2077  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2078  .addCFIIndex(CFIIndex);
2080  nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
2081  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2082  .addCFIIndex(CFIIndex);
2083 
2084  // Call __morestack().
2085  if (Thumb) {
2086  AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tBL)))
2087  .addExternalSymbol("__morestack");
2088  } else {
2089  BuildMI(AllocMBB, DL, TII.get(ARM::BL))
2090  .addExternalSymbol("__morestack");
2091  }
2092 
2093  // pop {lr} - Restore return address of this original function.
2094  if (Thumb) {
2095  if (ST->isThumb1Only()) {
2096  AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP)))
2097  .addReg(ScratchReg0);
2098  AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
2099  .addReg(ScratchReg0));
2100  } else {
2101  AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
2102  .addReg(ARM::LR, RegState::Define)
2103  .addReg(ARM::SP, RegState::Define)
2104  .addReg(ARM::SP)
2105  .addImm(4));
2106  }
2107  } else {
2108  AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
2109  .addReg(ARM::SP, RegState::Define)
2110  .addReg(ARM::SP))
2111  .addReg(ARM::LR);
2112  }
2113 
2114  // Restore SR0 and SR1 in case of __morestack() was called.
2115  // __morestack() will skip PostStackMBB block so we need to restore
2116  // scratch registers from here.
2117  // pop {SR0, SR1}
2118  if (Thumb) {
2119  AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP)))
2120  .addReg(ScratchReg0)
2121  .addReg(ScratchReg1);
2122  } else {
2123  AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
2124  .addReg(ARM::SP, RegState::Define)
2125  .addReg(ARM::SP))
2126  .addReg(ScratchReg0)
2127  .addReg(ScratchReg1);
2128  }
2129 
2130  // Update the CFA offset now that we've popped
2131  CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
2132  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2133  .addCFIIndex(CFIIndex);
2134 
2135  // bx lr - Return from this function.
2136  Opcode = Thumb ? ARM::tBX_RET : ARM::BX_RET;
2137  AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(Opcode)));
2138 
2139  // Restore SR0 and SR1 in case of __morestack() was not called.
2140  // pop {SR0, SR1}
2141  if (Thumb) {
2142  AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP)))
2143  .addReg(ScratchReg0)
2144  .addReg(ScratchReg1);
2145  } else {
2146  AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
2147  .addReg(ARM::SP, RegState::Define)
2148  .addReg(ARM::SP))
2149  .addReg(ScratchReg0)
2150  .addReg(ScratchReg1);
2151  }
2152 
2153  // Update the CFA offset now that we've popped
2154  CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
2155  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2156  .addCFIIndex(CFIIndex);
2157 
2158  // Tell debuggers that r4 and r5 are now the same as they were in the
2159  // previous function, that they're the "Same Value".
2161  nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
2162  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2163  .addCFIIndex(CFIIndex);
2165  nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
2166  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2167  .addCFIIndex(CFIIndex);
2168 
2169  // Organizing MBB lists
2170  PostStackMBB->addSuccessor(&PrologueMBB);
2171 
2172  AllocMBB->addSuccessor(PostStackMBB);
2173 
2174  GetMBB->addSuccessor(PostStackMBB);
2175  GetMBB->addSuccessor(AllocMBB);
2176 
2177  McrMBB->addSuccessor(GetMBB);
2178 
2179  PrevStackMBB->addSuccessor(McrMBB);
2180 
2181 #ifdef XDEBUG
2182  MF.verify();
2183 #endif
2184 }
void push_front(MachineBasicBlock *MBB)
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:347
const MachineFunction * getParent() const
getParent - Return the MachineFunction containing this basic block.
BitVector & set()
Definition: BitVector.h:218
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getAlignment() const
getAlignment - Return the alignment (log2, not bytes) of the function.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
int getDwarfRegNum(unsigned RegNum, bool isEH) const
Map a target register to an equivalent dwarf register number.
const GlobalValue * getGlobal() const
#define R4(n)
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:223
BitVector getPristineRegs(const MachineFunction &MF) const
Return a set of physical registers that are pristine.
static unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
static cl::opt< bool > SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), cl::desc("Align ARM NEON spills in prolog and epilog"))
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register...
ARMConstantPoolValue - ARM specific constantpool value.
std::vector< unsigned >::const_iterator livein_iterator
unsigned getBaseRegister() const
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset)
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:381
void emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override
Adjust the prologue to have the function use segmented stacks.
void verify(Pass *p=nullptr, const char *Banner=nullptr) const
verify - Run the current MachineFunction through the machine code verifier, useful for debugger use...
int ResolveFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg, int SPAdj) const
void addLiveIn(unsigned Reg)
Adds the specified register as a live in.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
bool isReturnAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const
GetInstSize - Returns the size of the specified MachineInstr.
const char * getSymbolName() const
A debug info location.
Definition: DebugLoc.h:34
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:225
F(f)
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
#define R2(n)
bool hasV6T2Ops() const
Definition: ARMSubtarget.h:296
void setGPRCalleeSavedArea2Offset(unsigned o)
static uint32_t alignToARMConstant(uint32_t Value)
Get the minimum constant for ARM that is greater than or equal to the argument.
bool isThumb1Only() const
Definition: ARMSubtarget.h:405
static MCCFIInstruction createDefCfaOffset(MCSymbol *L, int Offset)
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:368
Naked function.
Definition: Attributes.h:81
static bool isThumb(const MCSubtargetInfo &STI)
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason, bool gen_crash_diag=true)
Reports a serious error, calling any installed error handler.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:172
unsigned getDPRCalleeSavedAreaSize() const
unsigned getMaxAlignment() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:262
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects...
static const uint64_t kSplitStackAvailable
livein_iterator livein_begin() const
unsigned getSize() const
getSize - Return the size of the register in bytes, which is also the size of a stack slot allocated ...
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
unsigned getNumAlignedDPRCS2Regs() const
static const MachineInstrBuilder & AddDefaultPred(const MachineInstrBuilder &MIB)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
const HexagonInstrInfo * TII
bool isTargetELF() const
Definition: ARMSubtarget.h:363
bool isTargetDarwin() const
Definition: ARMSubtarget.h:355
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
unsigned getFrameRegister(const MachineFunction &MF) const override
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required, we reserve argument space for call sites in the function immediately on entry to the current function.
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APInt.h:33
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
unsigned getArgRegsSaveSize() const
Reg
All possible values of the reg field in the ModR/M byte.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
Number of individual test Apply this number of consecutive mutations to each input exit after the first new interesting input is found the minimized corpus is saved into the first input directory Number of jobs to run If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setDPRCalleeSavedAreaOffset(unsigned o)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:317
bool isThumb() const
Definition: ARMSubtarget.h:404
const MachineInstrBuilder & addImm(int64_t Val) const
addImm - Add a new immediate operand.
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:271
void setFramePtrSpillOffset(unsigned o)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Context object for machine code objects.
Definition: MCContext.h:48
bool hasV7Ops() const
Definition: ARMSubtarget.h:297
const MachineBasicBlock & front() const
unsigned getArgumentStackSize() const
bool isLiveIn(unsigned Reg) const
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:57
void setDPRCalleeSavedGapSize(unsigned s)
int getOffsetAdjustment() const
Return the correction for frame offsets.
bool isTargetMachO() const
Definition: ARMSubtarget.h:364
unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
getMatchingSuperReg - Return a super-register of the specified register Reg so its sub-register of in...
unsigned LLVM_ATTRIBUTE_UNUSED_RESULT addFrameInst(const MCCFIInstruction &Inst)
iterator getLastNonDebugInstr()
getLastNonDebugInstr - returns an iterator to the last non-debug instruction in the basic block...
int64_t getImm() const
static bool isARMArea1Register(unsigned Reg, bool isIOS)
isARMArea1Register - Returns true if the register is a low register (r0-r7) or a stack/pc register th...
int getFrameIndexOffset(const MachineFunction &MF, int FI) const override
getFrameIndexOffset - Returns the displacement from the frame register to the stack frame of the spec...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
unsigned getKillRegState(bool B)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:267
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
TargetInstrInfo - Interface to description of machine instruction set.
bool isTargetIOS() const
Definition: ARMSubtarget.h:356
unsigned getDefRegState(bool B)
bundle_iterator< MachineInstr, instr_iterator > iterator
static MachineBasicBlock::iterator skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs)
Skip past the code inserted by emitAlignedDPRCS2Spills, and return an iterator to the following instr...
static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register)
.cfi_same_value Current value of Register is the same as in the previous frame.
Definition: MCDwarf.h:421
static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:325
unsigned getTargetFlags() const
static unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
static bool isCSRestore(MachineInstr *MI, const ARMBaseInstrInfo &TII, const MCPhysReg *CSRegs)
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:109
static MCCFIInstruction createDefCfa(MCSymbol *L, unsigned Register, int Offset)
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it...
Definition: MCDwarf.h:354
CodeModel::Model getCodeModel() const
Returns the code model.
virtual bool needsStackRealignment(const MachineFunction &MF) const
needsStackRealignment - true if storage within the function requires the stack pointer to be aligned ...
static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, const ARMBaseInstrInfo &TII, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
unsigned getAlignment() const
getAlignment - Return the minimum required alignment for a register of this class.
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register)
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:361
bool isReserved(unsigned PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
livein_iterator livein_end() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:273
static int sizeOfSPAdjustment(const MachineInstr *MI)
unsigned getFramePtrSpillOffset() const
bool isSymbol() const
isSymbol - Tests if this is a MO_ExternalSymbol operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setGPRCalleeSavedArea2Size(unsigned s)
int64_t getOffset() const
Return the offset from the symbol in this operand.
static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI)
Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers starting from d8...
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
const ARMFrameLowering * getFrameLowering() const override
Definition: ARMSubtarget.h:268
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
BuildMI - Builder interface.
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
iterator erase(iterator I)
Definition: SmallVector.h:455
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, const ARMBaseInstrInfo &TII, unsigned DestReg, unsigned SrcReg, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override
getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.
bool hasCalls() const
Return true if the current function has any function calls.
virtual const TargetFrameLowering * getFrameLowering() const
static const MachineInstrBuilder & AddDefaultCC(const MachineInstrBuilder &MIB)
virtual bool noFramePointerElim(const MachineFunction &MF) const
Return true if the target needs to disable frame pointer elimination.
static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI)
Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers starting from d8...
#define R6(n)
void setGPRCalleeSavedArea1Size(unsigned s)
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
const MCContext & getContext() const
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
bool test(unsigned Idx) const
Definition: BitVector.h:322
static bool isARMArea2Register(unsigned Reg, bool isIOS)
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call...
Information about stack frame layout on the target.
bool isTargetAndroid() const
Definition: ARMSubtarget.h:396
void setOffsetAdjustment(int Adj)
Set the correction for frame offsets.
MachineFrameInfo * getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
static cl::opt< AlignMode > Align(cl::desc("Load/store alignment support"), cl::Hidden, cl::init(NoStrictAlign), cl::values(clEnumValN(StrictAlign,"aarch64-strict-align","Disallow all unaligned memory accesses"), clEnumValN(NoStrictAlign,"aarch64-no-strict-align","Allow unaligned memory accesses"), clEnumValEnd))
void fixTCReturn(MachineFunction &MF, MachineBasicBlock &MBB) const
bool hasBasePointer(const MachineFunction &MF) const
static ARMConstantPoolSymbol * Create(LLVMContext &C, const char *s, unsigned ID, unsigned char PCAdj)
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
ARMFrameLowering(const ARMSubtarget &sti)
int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:238
int getStackProtectorIndex() const
Return the index for the stack protector object.
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
static unsigned estimateRSStackSizeLimit(MachineFunction &MF, const TargetFrameLowering *TFI)
estimateRSStackSizeLimit - Look at each instruction that references stack frames and return the stack...
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:227
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
static bool isPopOpcode(int Opc)
bool noFramePointerElim(const MachineFunction &MF) const override
Return true if the target needs to disable frame pointer elimination.
static bool isARMLowRegister(unsigned Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:210
Representation of each machine instruction.
Definition: MachineInstr.h:51
static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, const TargetInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, const unsigned Reg, const unsigned Alignment, const bool MustBeSingleInstruction)
Emit an instruction sequence that will align the address in register Reg by zero-ing out the lower bi...
void setGPRCalleeSavedArea1Offset(unsigned o)
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:217
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool hasV5TOps() const
Definition: ARMSubtarget.h:291
bool isTargetLinux() const
Definition: ARMSubtarget.h:357
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned char TargetFlags=0) const
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
#define I(x, y, z)
Definition: MD5.cpp:54
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:271
unsigned getDPRCalleeSavedGapSize() const
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
void setDPRCalleeSavedAreaSize(unsigned s)
const MachineInstrBuilder & copyImplicitOps(const MachineInstr *OtherMI)
Copy all the implicit operands from OtherMI onto this one.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
static bool isARMArea3Register(unsigned Reg, bool isIOS)
unsigned getReg() const
getReg - Returns the register number.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:140
static bool WindowsRequiresStackProbe(const MachineFunction &MF, size_t StackSizeInBytes)
bool cannotEliminateFrame(const MachineFunction &MF) const
virtual const TargetInstrInfo * getInstrInfo() const
LLVM Value Representation.
Definition: Value.h:69
static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, const ARMBaseInstrInfo &TII)
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
unsigned getGPRCalleeSavedArea1Size() const
static const unsigned FramePtr
Primary interface to the complete machine description for the target machine.
bool addRegisterKilled(unsigned IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
MachineModuleInfo & getMMI() const
const ARMSubtarget & STI
unsigned getGPRCalleeSavedArea2Size() const
static void checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs)
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
addReg - Add a new virtual register operand...
void setObjectAlignment(int ObjectIdx, unsigned Align)
setObjectAlignment - Change the alignment of the specified stack object.
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.cpp:229
bool needsStackRealignment(const MachineFunction &MF) const override
void addSuccessor(MachineBasicBlock *succ, uint32_t weight=0)
addSuccessor - Add succ as a successor of this MachineBasicBlock.
bool isTargetWindows() const
Definition: ARMSubtarget.h:360
unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one...
MachineModuleInfo - This class contains meta information specific to a module.