LLVM  8.0.0svn
ARMFrameLowering.cpp
Go to the documentation of this file.
1 //===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the ARM implementation of TargetFrameLowering class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMFrameLowering.h"
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMBaseRegisterInfo.h"
17 #include "ARMConstantPoolValue.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMSubtarget.h"
22 #include "Utils/ARMBaseInfo.h"
23 #include "llvm/ADT/BitVector.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallVector.h"
41 #include "llvm/IR/Attributes.h"
42 #include "llvm/IR/CallingConv.h"
43 #include "llvm/IR/DebugLoc.h"
44 #include "llvm/IR/Function.h"
45 #include "llvm/MC/MCContext.h"
46 #include "llvm/MC/MCDwarf.h"
47 #include "llvm/MC/MCInstrDesc.h"
48 #include "llvm/MC/MCRegisterInfo.h"
49 #include "llvm/Support/CodeGen.h"
51 #include "llvm/Support/Compiler.h"
52 #include "llvm/Support/Debug.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <cstddef>
61 #include <cstdint>
62 #include <iterator>
63 #include <utility>
64 #include <vector>
65 
66 #define DEBUG_TYPE "arm-frame-lowering"
67 
68 using namespace llvm;
69 
70 static cl::opt<bool>
71 SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
72  cl::desc("Align ARM NEON spills in prolog and epilog"));
73 
76  unsigned NumAlignedDPRCS2Regs);
77 
79  : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
80  STI(sti) {}
81 
83  // iOS always has a FP for backtracking, force other targets to keep their FP
84  // when doing FastISel. The emitted code is currently superior, and in cases
85  // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
87  MF.getSubtarget<ARMSubtarget>().useFastISel();
88 }
89 
90 /// Returns true if the target can safely skip saving callee-saved registers
91 /// for noreturn nounwind functions.
93  assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
94  MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
95  !MF.getFunction().hasFnAttribute(Attribute::UWTable));
96 
97  // Frame pointer and link register are not treated as normal CSR, thus we
98  // can always skip CSR saves for nonreturning functions.
99  return true;
100 }
101 
102 /// hasFP - Return true if the specified function should have a dedicated frame
103 /// pointer register. This is true if the function has variable sized allocas
104 /// or if frame pointer elimination is disabled.
106  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
107  const MachineFrameInfo &MFI = MF.getFrameInfo();
108 
109  // ABI-required frame pointer.
111  return true;
112 
113  // Frame pointer required for use within this function.
114  return (RegInfo->needsStackRealignment(MF) ||
115  MFI.hasVarSizedObjects() ||
116  MFI.isFrameAddressTaken());
117 }
118 
119 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
120 /// not required, we reserve argument space for call sites in the function
121 /// immediately on entry to the current function. This eliminates the need for
122 /// add/sub sp brackets around call sites. Returns true if the call frame is
123 /// included as part of the stack frame.
125  const MachineFrameInfo &MFI = MF.getFrameInfo();
126  unsigned CFSize = MFI.getMaxCallFrameSize();
127  // It's not always a good idea to include the call frame as part of the
128  // stack frame. ARM (especially Thumb) has small immediate offset to
129  // address the stack frame. So a large call frame can cause poor codegen
130  // and may even makes it impossible to scavenge a register.
131  if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
132  return false;
133 
134  return !MFI.hasVarSizedObjects();
135 }
136 
137 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the
138 /// call frame pseudos can be simplified. Unlike most targets, having a FP
139 /// is not sufficient here since we still may reference some objects via SP
140 /// even when FP is available in Thumb2 mode.
141 bool
144 }
145 
147  const MCPhysReg *CSRegs) {
148  // Integer spill area is handled with "pop".
149  if (isPopOpcode(MI.getOpcode())) {
150  // The first two operands are predicates. The last two are
151  // imp-def and imp-use of SP. Check everything in between.
152  for (int i = 5, e = MI.getNumOperands(); i != e; ++i)
153  if (!isCalleeSavedRegister(MI.getOperand(i).getReg(), CSRegs))
154  return false;
155  return true;
156  }
157  if ((MI.getOpcode() == ARM::LDR_POST_IMM ||
158  MI.getOpcode() == ARM::LDR_POST_REG ||
159  MI.getOpcode() == ARM::t2LDR_POST) &&
160  isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs) &&
161  MI.getOperand(1).getReg() == ARM::SP)
162  return true;
163 
164  return false;
165 }
166 
168  bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
169  const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
170  unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
171  ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
172  if (isARM)
173  emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
174  Pred, PredReg, TII, MIFlags);
175  else
176  emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
177  Pred, PredReg, TII, MIFlags);
178 }
179 
180 static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
181  MachineBasicBlock::iterator &MBBI, const DebugLoc &dl,
182  const ARMBaseInstrInfo &TII, int NumBytes,
183  unsigned MIFlags = MachineInstr::NoFlags,
185  unsigned PredReg = 0) {
186  emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
187  MIFlags, Pred, PredReg);
188 }
189 
190 static int sizeOfSPAdjustment(const MachineInstr &MI) {
191  int RegSize;
192  switch (MI.getOpcode()) {
193  case ARM::VSTMDDB_UPD:
194  RegSize = 8;
195  break;
196  case ARM::STMDB_UPD:
197  case ARM::t2STMDB_UPD:
198  RegSize = 4;
199  break;
200  case ARM::t2STR_PRE:
201  case ARM::STR_PRE_IMM:
202  return 4;
203  default:
204  llvm_unreachable("Unknown push or pop like instruction");
205  }
206 
207  int count = 0;
208  // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
209  // pred) so the list starts at 4.
210  for (int i = MI.getNumOperands() - 1; i >= 4; --i)
211  count += RegSize;
212  return count;
213 }
214 
216  size_t StackSizeInBytes) {
217  const MachineFrameInfo &MFI = MF.getFrameInfo();
218  const Function &F = MF.getFunction();
219  unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
220  if (F.hasFnAttribute("stack-probe-size"))
221  F.getFnAttribute("stack-probe-size")
223  .getAsInteger(0, StackProbeSize);
224  return (StackSizeInBytes >= StackProbeSize) &&
225  !F.hasFnAttribute("no-stack-arg-probe");
226 }
227 
228 namespace {
229 
230 struct StackAdjustingInsts {
231  struct InstInfo {
233  unsigned SPAdjust;
234  bool BeforeFPSet;
235  };
236 
238 
239  void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
240  bool BeforeFPSet = false) {
241  InstInfo Info = {I, SPAdjust, BeforeFPSet};
242  Insts.push_back(Info);
243  }
244 
245  void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
246  auto Info =
247  llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
248  assert(Info != Insts.end() && "invalid sp adjusting instruction");
249  Info->SPAdjust += ExtraBytes;
250  }
251 
252  void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl,
253  const ARMBaseInstrInfo &TII, bool HasFP) {
254  MachineFunction &MF = *MBB.getParent();
255  unsigned CFAOffset = 0;
256  for (auto &Info : Insts) {
257  if (HasFP && !Info.BeforeFPSet)
258  return;
259 
260  CFAOffset -= Info.SPAdjust;
261  unsigned CFIIndex = MF.addFrameInst(
262  MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
263  BuildMI(MBB, std::next(Info.I), dl,
264  TII.get(TargetOpcode::CFI_INSTRUCTION))
265  .addCFIIndex(CFIIndex)
267  }
268  }
269 };
270 
271 } // end anonymous namespace
272 
273 /// Emit an instruction sequence that will align the address in
274 /// register Reg by zero-ing out the lower bits. For versions of the
275 /// architecture that support Neon, this must be done in a single
276 /// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
277 /// single instruction. That function only gets called when optimizing
278 /// spilling of D registers on a core with the Neon instruction set
279 /// present.
281  const TargetInstrInfo &TII,
282  MachineBasicBlock &MBB,
284  const DebugLoc &DL, const unsigned Reg,
285  const unsigned Alignment,
286  const bool MustBeSingleInstruction) {
287  const ARMSubtarget &AST =
288  static_cast<const ARMSubtarget &>(MF.getSubtarget());
289  const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
290  const unsigned AlignMask = Alignment - 1;
291  const unsigned NrBitsToZero = countTrailingZeros(Alignment);
292  assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
293  if (!AFI->isThumbFunction()) {
294  // if the BFC instruction is available, use that to zero the lower
295  // bits:
296  // bfc Reg, #0, log2(Alignment)
297  // otherwise use BIC, if the mask to zero the required number of bits
298  // can be encoded in the bic immediate field
299  // bic Reg, Reg, Alignment-1
300  // otherwise, emit
301  // lsr Reg, Reg, log2(Alignment)
302  // lsl Reg, Reg, log2(Alignment)
303  if (CanUseBFC) {
304  BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
305  .addReg(Reg, RegState::Kill)
306  .addImm(~AlignMask)
307  .add(predOps(ARMCC::AL));
308  } else if (AlignMask <= 255) {
309  BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
310  .addReg(Reg, RegState::Kill)
311  .addImm(AlignMask)
313  .add(condCodeOp());
314  } else {
315  assert(!MustBeSingleInstruction &&
316  "Shouldn't call emitAligningInstructions demanding a single "
317  "instruction to be emitted for large stack alignment for a target "
318  "without BFC.");
319  BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
320  .addReg(Reg, RegState::Kill)
321  .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
323  .add(condCodeOp());
324  BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
325  .addReg(Reg, RegState::Kill)
326  .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
328  .add(condCodeOp());
329  }
330  } else {
331  // Since this is only reached for Thumb-2 targets, the BFC instruction
332  // should always be available.
333  assert(CanUseBFC);
334  BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
335  .addReg(Reg, RegState::Kill)
336  .addImm(~AlignMask)
337  .add(predOps(ARMCC::AL));
338  }
339 }
340 
341 /// We need the offset of the frame pointer relative to other MachineFrameInfo
342 /// offsets which are encoded relative to SP at function begin.
343 /// See also emitPrologue() for how the FP is set up.
344 /// Unfortunately we cannot determine this value in determineCalleeSaves() yet
345 /// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
346 /// this to produce a conservative estimate that we check in an assert() later.
347 static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI) {
348  // This is a conservative estimation: Assume the frame pointer being r7 and
349  // pc("r15") up to r8 getting spilled before (= 8 registers).
350  return -AFI.getArgRegsSaveSize() - (8 * 4);
351 }
352 
354  MachineBasicBlock &MBB) const {
355  MachineBasicBlock::iterator MBBI = MBB.begin();
356  MachineFrameInfo &MFI = MF.getFrameInfo();
358  MachineModuleInfo &MMI = MF.getMMI();
359  MCContext &Context = MMI.getContext();
360  const TargetMachine &TM = MF.getTarget();
361  const MCRegisterInfo *MRI = Context.getRegisterInfo();
362  const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
363  const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
364  assert(!AFI->isThumb1OnlyFunction() &&
365  "This emitPrologue does not support Thumb1!");
366  bool isARM = !AFI->isThumbFunction();
368  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
369  unsigned NumBytes = MFI.getStackSize();
370  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
371 
372  // Debug location must be unknown since the first debug location is used
373  // to determine the end of the prologue.
374  DebugLoc dl;
375 
376  unsigned FramePtr = RegInfo->getFrameRegister(MF);
377 
378  // Determine the sizes of each callee-save spill areas and record which frame
379  // belongs to which callee-save spill areas.
380  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
381  int FramePtrSpillFI = 0;
382  int D8SpillFI = 0;
383 
384  // All calls are tail calls in GHC calling conv, and functions have no
385  // prologue/epilogue.
387  return;
388 
389  StackAdjustingInsts DefCFAOffsetCandidates;
390  bool HasFP = hasFP(MF);
391 
392  // Allocate the vararg register save area.
393  if (ArgRegsSaveSize) {
394  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
396  DefCFAOffsetCandidates.addInst(std::prev(MBBI), ArgRegsSaveSize, true);
397  }
398 
399  if (!AFI->hasStackFrame() &&
400  (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
401  if (NumBytes - ArgRegsSaveSize != 0) {
402  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize),
404  DefCFAOffsetCandidates.addInst(std::prev(MBBI),
405  NumBytes - ArgRegsSaveSize, true);
406  }
407  DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
408  return;
409  }
410 
411  // Determine spill area sizes.
412  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
413  unsigned Reg = CSI[i].getReg();
414  int FI = CSI[i].getFrameIdx();
415  switch (Reg) {
416  case ARM::R8:
417  case ARM::R9:
418  case ARM::R10:
419  case ARM::R11:
420  case ARM::R12:
421  if (STI.splitFramePushPop(MF)) {
422  GPRCS2Size += 4;
423  break;
424  }
426  case ARM::R0:
427  case ARM::R1:
428  case ARM::R2:
429  case ARM::R3:
430  case ARM::R4:
431  case ARM::R5:
432  case ARM::R6:
433  case ARM::R7:
434  case ARM::LR:
435  if (Reg == FramePtr)
436  FramePtrSpillFI = FI;
437  GPRCS1Size += 4;
438  break;
439  default:
440  // This is a DPR. Exclude the aligned DPRCS2 spills.
441  if (Reg == ARM::D8)
442  D8SpillFI = FI;
443  if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
444  DPRCSSize += 8;
445  }
446  }
447 
448  // Move past area 1.
449  MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
450  if (GPRCS1Size > 0) {
451  GPRCS1Push = LastPush = MBBI++;
452  DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
453  }
454 
455  // Determine starting offsets of spill areas.
456  unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size;
457  unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
458  unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U;
459  unsigned DPRGapSize = (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign;
460  unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
461  int FramePtrOffsetInPush = 0;
462  if (HasFP) {
463  int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
464  assert(getMaxFPOffset(MF.getFunction(), *AFI) <= FPOffset &&
465  "Max FP estimation is wrong");
466  FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize;
467  AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
468  NumBytes);
469  }
470  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
471  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
472  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
473 
474  // Move past area 2.
475  if (GPRCS2Size > 0) {
476  GPRCS2Push = LastPush = MBBI++;
477  DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
478  }
479 
480  // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
481  // .cfi_offset operations will reflect that.
482  if (DPRGapSize) {
483  assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
484  if (LastPush != MBB.end() &&
485  tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
486  DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
487  else {
488  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
490  DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
491  }
492  }
493 
494  // Move past area 3.
495  if (DPRCSSize > 0) {
496  // Since vpush register list cannot have gaps, there may be multiple vpush
497  // instructions in the prologue.
498  while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
499  DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));
500  LastPush = MBBI++;
501  }
502  }
503 
504  // Move past the aligned DPRCS2 area.
505  if (AFI->getNumAlignedDPRCS2Regs() > 0) {
507  // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
508  // leaves the stack pointer pointing to the DPRCS2 area.
509  //
510  // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
511  NumBytes += MFI.getObjectOffset(D8SpillFI);
512  } else
513  NumBytes = DPRCSOffset;
514 
515  if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
516  uint32_t NumWords = NumBytes >> 2;
517 
518  if (NumWords < 65536)
519  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
520  .addImm(NumWords)
522  .add(predOps(ARMCC::AL));
523  else
524  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4)
525  .addImm(NumWords)
527 
528  switch (TM.getCodeModel()) {
529  case CodeModel::Tiny:
530  llvm_unreachable("Tiny code model not available on ARM.");
531  case CodeModel::Small:
532  case CodeModel::Medium:
533  case CodeModel::Kernel:
534  BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
536  .addExternalSymbol("__chkstk")
537  .addReg(ARM::R4, RegState::Implicit)
538  .setMIFlags(MachineInstr::FrameSetup);
539  break;
540  case CodeModel::Large:
541  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
542  .addExternalSymbol("__chkstk")
544 
545  BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
547  .addReg(ARM::R12, RegState::Kill)
548  .addReg(ARM::R4, RegState::Implicit)
549  .setMIFlags(MachineInstr::FrameSetup);
550  break;
551  }
552 
553  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
554  .addReg(ARM::SP, RegState::Kill)
558  .add(condCodeOp());
559  NumBytes = 0;
560  }
561 
562  if (NumBytes) {
563  // Adjust SP after all the callee-save spills.
564  if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
565  tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes))
566  DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
567  else {
568  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
570  DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
571  }
572 
573  if (HasFP && isARM)
574  // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
575  // Note it's not safe to do this in Thumb2 mode because it would have
576  // taken two instructions:
577  // mov sp, r7
578  // sub sp, #24
579  // If an interrupt is taken between the two instructions, then sp is in
580  // an inconsistent state (pointing to the middle of callee-saved area).
581  // The interrupt handler can end up clobbering the registers.
582  AFI->setShouldRestoreSPFromFP(true);
583  }
584 
585  // Set FP to point to the stack slot that contains the previous FP.
586  // For iOS, FP is R7, which has now been stored in spill area 1.
587  // Otherwise, if this is not iOS, all the callee-saved registers go
588  // into spill area 1, including the FP in R11. In either case, it
589  // is in area one and the adjustment needs to take place just after
590  // that push.
591  if (HasFP) {
592  MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push);
593  unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
594  emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush,
595  dl, TII, FramePtr, ARM::SP,
596  PushSize + FramePtrOffsetInPush,
598  if (FramePtrOffsetInPush + PushSize != 0) {
599  unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
600  nullptr, MRI->getDwarfRegNum(FramePtr, true),
601  -(ArgRegsSaveSize - FramePtrOffsetInPush)));
602  BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
603  .addCFIIndex(CFIIndex)
605  } else {
606  unsigned CFIIndex =
608  nullptr, MRI->getDwarfRegNum(FramePtr, true)));
609  BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
610  .addCFIIndex(CFIIndex)
612  }
613  }
614 
615  // Now that the prologue's actual instructions are finalised, we can insert
616  // the necessary DWARF cf instructions to describe the situation. Start by
617  // recording where each register ended up:
618  if (GPRCS1Size > 0) {
619  MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
620  int CFIIndex;
621  for (const auto &Entry : CSI) {
622  unsigned Reg = Entry.getReg();
623  int FI = Entry.getFrameIdx();
624  switch (Reg) {
625  case ARM::R8:
626  case ARM::R9:
627  case ARM::R10:
628  case ARM::R11:
629  case ARM::R12:
630  if (STI.splitFramePushPop(MF))
631  break;
633  case ARM::R0:
634  case ARM::R1:
635  case ARM::R2:
636  case ARM::R3:
637  case ARM::R4:
638  case ARM::R5:
639  case ARM::R6:
640  case ARM::R7:
641  case ARM::LR:
643  nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
644  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
645  .addCFIIndex(CFIIndex)
647  break;
648  }
649  }
650  }
651 
652  if (GPRCS2Size > 0) {
653  MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
654  for (const auto &Entry : CSI) {
655  unsigned Reg = Entry.getReg();
656  int FI = Entry.getFrameIdx();
657  switch (Reg) {
658  case ARM::R8:
659  case ARM::R9:
660  case ARM::R10:
661  case ARM::R11:
662  case ARM::R12:
663  if (STI.splitFramePushPop(MF)) {
664  unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
665  unsigned Offset = MFI.getObjectOffset(FI);
666  unsigned CFIIndex = MF.addFrameInst(
667  MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
668  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
669  .addCFIIndex(CFIIndex)
671  }
672  break;
673  }
674  }
675  }
676 
677  if (DPRCSSize > 0) {
678  // Since vpush register list cannot have gaps, there may be multiple vpush
679  // instructions in the prologue.
680  MachineBasicBlock::iterator Pos = std::next(LastPush);
681  for (const auto &Entry : CSI) {
682  unsigned Reg = Entry.getReg();
683  int FI = Entry.getFrameIdx();
684  if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
685  (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
686  unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
687  unsigned Offset = MFI.getObjectOffset(FI);
688  unsigned CFIIndex = MF.addFrameInst(
689  MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
690  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
691  .addCFIIndex(CFIIndex)
693  }
694  }
695  }
696 
697  // Now we can emit descriptions of where the canonical frame address was
698  // throughout the process. If we have a frame pointer, it takes over the job
699  // half-way through, so only the first few .cfi_def_cfa_offset instructions
700  // actually get emitted.
701  DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
702 
703  if (STI.isTargetELF() && hasFP(MF))
705  AFI->getFramePtrSpillOffset());
706 
707  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
708  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
709  AFI->setDPRCalleeSavedGapSize(DPRGapSize);
710  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
711 
712  // If we need dynamic stack realignment, do it here. Be paranoid and make
713  // sure if we also have VLAs, we have a base pointer for frame access.
714  // If aligned NEON registers were spilled, the stack has already been
715  // realigned.
716  if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) {
717  unsigned MaxAlign = MFI.getMaxAlignment();
718  assert(!AFI->isThumb1OnlyFunction());
719  if (!AFI->isThumbFunction()) {
720  emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
721  false);
722  } else {
723  // We cannot use sp as source/dest register here, thus we're using r4 to
724  // perform the calculations. We're emitting the following sequence:
725  // mov r4, sp
726  // -- use emitAligningInstructions to produce best sequence to zero
727  // -- out lower bits in r4
728  // mov sp, r4
729  // FIXME: It will be better just to find spare register here.
730  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
731  .addReg(ARM::SP, RegState::Kill)
732  .add(predOps(ARMCC::AL));
733  emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
734  false);
735  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
736  .addReg(ARM::R4, RegState::Kill)
737  .add(predOps(ARMCC::AL));
738  }
739 
740  AFI->setShouldRestoreSPFromFP(true);
741  }
742 
743  // If we need a base pointer, set it up here. It's whatever the value
744  // of the stack pointer is at this point. Any variable size objects
745  // will be allocated after this, so we can still use the base pointer
746  // to reference locals.
747  // FIXME: Clarify FrameSetup flags here.
748  if (RegInfo->hasBasePointer(MF)) {
749  if (isARM)
750  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
751  .addReg(ARM::SP)
753  .add(condCodeOp());
754  else
755  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
756  .addReg(ARM::SP)
757  .add(predOps(ARMCC::AL));
758  }
759 
760  // If the frame has variable sized objects then the epilogue must restore
761  // the sp from fp. We can assume there's an FP here since hasFP already
762  // checks for hasVarSizedObjects.
763  if (MFI.hasVarSizedObjects())
764  AFI->setShouldRestoreSPFromFP(true);
765 }
766 
768  MachineBasicBlock &MBB) const {
769  MachineFrameInfo &MFI = MF.getFrameInfo();
771  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
772  const ARMBaseInstrInfo &TII =
773  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
774  assert(!AFI->isThumb1OnlyFunction() &&
775  "This emitEpilogue does not support Thumb1!");
776  bool isARM = !AFI->isThumbFunction();
777 
778  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
779  int NumBytes = (int)MFI.getStackSize();
780  unsigned FramePtr = RegInfo->getFrameRegister(MF);
781 
782  // All calls are tail calls in GHC calling conv, and functions have no
783  // prologue/epilogue.
785  return;
786 
787  // First put ourselves on the first (from top) terminator instructions.
789  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
790 
791  if (!AFI->hasStackFrame()) {
792  if (NumBytes - ArgRegsSaveSize != 0)
793  emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize);
794  } else {
795  // Unwind MBBI to point to first LDR / VLDRD.
796  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
797  if (MBBI != MBB.begin()) {
798  do {
799  --MBBI;
800  } while (MBBI != MBB.begin() && isCSRestore(*MBBI, TII, CSRegs));
801  if (!isCSRestore(*MBBI, TII, CSRegs))
802  ++MBBI;
803  }
804 
805  // Move SP to start of FP callee save spill area.
806  NumBytes -= (ArgRegsSaveSize +
809  AFI->getDPRCalleeSavedGapSize() +
811 
812  // Reset SP based on frame pointer only if the stack frame extends beyond
813  // frame pointer stack slot or target is ELF and the function has FP.
814  if (AFI->shouldRestoreSPFromFP()) {
815  NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
816  if (NumBytes) {
817  if (isARM)
818  emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
819  ARMCC::AL, 0, TII);
820  else {
821  // It's not possible to restore SP from FP in a single instruction.
822  // For iOS, this looks like:
823  // mov sp, r7
824  // sub sp, #24
825  // This is bad, if an interrupt is taken after the mov, sp is in an
826  // inconsistent state.
827  // Use the first callee-saved register as a scratch register.
828  assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
829  "No scratch register to restore SP from FP!");
830  emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
831  ARMCC::AL, 0, TII);
832  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
833  .addReg(ARM::R4)
834  .add(predOps(ARMCC::AL));
835  }
836  } else {
837  // Thumb2 or ARM.
838  if (isARM)
839  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
840  .addReg(FramePtr)
842  .add(condCodeOp());
843  else
844  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
845  .addReg(FramePtr)
846  .add(predOps(ARMCC::AL));
847  }
848  } else if (NumBytes &&
849  !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
850  emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
851 
852  // Increment past our save areas.
853  if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
854  MBBI++;
855  // Since vpop register list cannot have gaps, there may be multiple vpop
856  // instructions in the epilogue.
857  while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
858  MBBI++;
859  }
860  if (AFI->getDPRCalleeSavedGapSize()) {
861  assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
862  "unexpected DPR alignment gap");
863  emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize());
864  }
865 
866  if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
867  if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
868  }
869 
870  if (ArgRegsSaveSize)
871  emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
872 }
873 
874 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
875 /// debug info. It's the same as what we use for resolving the code-gen
876 /// references for now. FIXME: This can go wrong when references are
877 /// SP-relative and simple call frames aren't used.
878 int
880  unsigned &FrameReg) const {
881  return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
882 }
883 
884 int
886  int FI, unsigned &FrameReg,
887  int SPAdj) const {
888  const MachineFrameInfo &MFI = MF.getFrameInfo();
889  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
891  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
892  int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
893  int FPOffset = Offset - AFI->getFramePtrSpillOffset();
894  bool isFixed = MFI.isFixedObjectIndex(FI);
895 
896  FrameReg = ARM::SP;
897  Offset += SPAdj;
898 
899  // SP can move around if there are allocas. We may also lose track of SP
900  // when emergency spilling inside a non-reserved call frame setup.
901  bool hasMovingSP = !hasReservedCallFrame(MF);
902 
903  // When dynamically realigning the stack, use the frame pointer for
904  // parameters, and the stack/base pointer for locals.
905  if (RegInfo->needsStackRealignment(MF)) {
906  assert(hasFP(MF) && "dynamic stack realignment without a FP!");
907  if (isFixed) {
908  FrameReg = RegInfo->getFrameRegister(MF);
909  Offset = FPOffset;
910  } else if (hasMovingSP) {
911  assert(RegInfo->hasBasePointer(MF) &&
912  "VLAs and dynamic stack alignment, but missing base pointer!");
913  FrameReg = RegInfo->getBaseRegister();
914  }
915  return Offset;
916  }
917 
918  // If there is a frame pointer, use it when we can.
919  if (hasFP(MF) && AFI->hasStackFrame()) {
920  // Use frame pointer to reference fixed objects. Use it for locals if
921  // there are VLAs (and thus the SP isn't reliable as a base).
922  if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
923  FrameReg = RegInfo->getFrameRegister(MF);
924  return FPOffset;
925  } else if (hasMovingSP) {
926  assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
927  if (AFI->isThumb2Function()) {
928  // Try to use the frame pointer if we can, else use the base pointer
929  // since it's available. This is handy for the emergency spill slot, in
930  // particular.
931  if (FPOffset >= -255 && FPOffset < 0) {
932  FrameReg = RegInfo->getFrameRegister(MF);
933  return FPOffset;
934  }
935  }
936  } else if (AFI->isThumbFunction()) {
937  // Prefer SP to base pointer, if the offset is suitably aligned and in
938  // range as the effective range of the immediate offset is bigger when
939  // basing off SP.
940  // Use add <rd>, sp, #<imm8>
941  // ldr <rd>, [sp, #<imm8>]
942  if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
943  return Offset;
944  // In Thumb2 mode, the negative offset is very limited. Try to avoid
945  // out of range references. ldr <rt>,[<rn>, #-<imm8>]
946  if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
947  FrameReg = RegInfo->getFrameRegister(MF);
948  return FPOffset;
949  }
950  } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
951  // Otherwise, use SP or FP, whichever is closer to the stack slot.
952  FrameReg = RegInfo->getFrameRegister(MF);
953  return FPOffset;
954  }
955  }
956  // Use the base pointer if we have one.
957  if (RegInfo->hasBasePointer(MF))
958  FrameReg = RegInfo->getBaseRegister();
959  return Offset;
960 }
961 
962 void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
964  const std::vector<CalleeSavedInfo> &CSI,
965  unsigned StmOpc, unsigned StrOpc,
966  bool NoGap,
967  bool(*Func)(unsigned, bool),
968  unsigned NumAlignedDPRCS2Regs,
969  unsigned MIFlags) const {
970  MachineFunction &MF = *MBB.getParent();
971  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
973 
974  DebugLoc DL;
975 
976  using RegAndKill = std::pair<unsigned, bool>;
977 
979  unsigned i = CSI.size();
980  while (i != 0) {
981  unsigned LastReg = 0;
982  for (; i != 0; --i) {
983  unsigned Reg = CSI[i-1].getReg();
984  if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
985 
986  // D-registers in the aligned area DPRCS2 are NOT spilled here.
987  if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
988  continue;
989 
990  const MachineRegisterInfo &MRI = MF.getRegInfo();
991  bool isLiveIn = MRI.isLiveIn(Reg);
992  if (!isLiveIn && !MRI.isReserved(Reg))
993  MBB.addLiveIn(Reg);
994  // If NoGap is true, push consecutive registers and then leave the rest
995  // for other instructions. e.g.
996  // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
997  if (NoGap && LastReg && LastReg != Reg-1)
998  break;
999  LastReg = Reg;
1000  // Do not set a kill flag on values that are also marked as live-in. This
1001  // happens with the @llvm-returnaddress intrinsic and with arguments
1002  // passed in callee saved registers.
1003  // Omitting the kill flags is conservatively correct even if the live-in
1004  // is not used after all.
1005  Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn));
1006  }
1007 
1008  if (Regs.empty())
1009  continue;
1010 
1011  llvm::sort(Regs.begin(), Regs.end(), [&](const RegAndKill &LHS,
1012  const RegAndKill &RHS) {
1013  return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
1014  });
1015 
1016  if (Regs.size() > 1 || StrOpc== 0) {
1017  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
1018  .addReg(ARM::SP)
1019  .setMIFlags(MIFlags)
1020  .add(predOps(ARMCC::AL));
1021  for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1022  MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
1023  } else if (Regs.size() == 1) {
1024  BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
1025  .addReg(Regs[0].first, getKillRegState(Regs[0].second))
1026  .addReg(ARM::SP)
1027  .setMIFlags(MIFlags)
1028  .addImm(-4)
1029  .add(predOps(ARMCC::AL));
1030  }
1031  Regs.clear();
1032 
1033  // Put any subsequent vpush instructions before this one: they will refer to
1034  // higher register numbers so need to be pushed first in order to preserve
1035  // monotonicity.
1036  if (MI != MBB.begin())
1037  --MI;
1038  }
1039 }
1040 
1041 void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1043  std::vector<CalleeSavedInfo> &CSI,
1044  unsigned LdmOpc, unsigned LdrOpc,
1045  bool isVarArg, bool NoGap,
1046  bool(*Func)(unsigned, bool),
1047  unsigned NumAlignedDPRCS2Regs) const {
1048  MachineFunction &MF = *MBB.getParent();
1049  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1052  DebugLoc DL;
1053  bool isTailCall = false;
1054  bool isInterrupt = false;
1055  bool isTrap = false;
1056  if (MBB.end() != MI) {
1057  DL = MI->getDebugLoc();
1058  unsigned RetOpcode = MI->getOpcode();
1059  isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri);
1060  isInterrupt =
1061  RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1062  isTrap =
1063  RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
1064  RetOpcode == ARM::tTRAP;
1065  }
1066 
1068  unsigned i = CSI.size();
1069  while (i != 0) {
1070  unsigned LastReg = 0;
1071  bool DeleteRet = false;
1072  for (; i != 0; --i) {
1073  CalleeSavedInfo &Info = CSI[i-1];
1074  unsigned Reg = Info.getReg();
1075  if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1076 
1077  // The aligned reloads from area DPRCS2 are not inserted here.
1078  if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1079  continue;
1080 
1081  if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1082  !isTrap && STI.hasV5TOps()) {
1083  if (MBB.succ_empty()) {
1084  Reg = ARM::PC;
1085  // Fold the return instruction into the LDM.
1086  DeleteRet = true;
1087  LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1088  // We 'restore' LR into PC so it is not live out of the return block:
1089  // Clear Restored bit.
1090  Info.setRestored(false);
1091  } else
1092  LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
1093  }
1094 
1095  // If NoGap is true, pop consecutive registers and then leave the rest
1096  // for other instructions. e.g.
1097  // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1098  if (NoGap && LastReg && LastReg != Reg-1)
1099  break;
1100 
1101  LastReg = Reg;
1102  Regs.push_back(Reg);
1103  }
1104 
1105  if (Regs.empty())
1106  continue;
1107 
1108  llvm::sort(Regs.begin(), Regs.end(), [&](unsigned LHS, unsigned RHS) {
1109  return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
1110  });
1111 
1112  if (Regs.size() > 1 || LdrOpc == 0) {
1113  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
1114  .addReg(ARM::SP)
1115  .add(predOps(ARMCC::AL));
1116  for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1117  MIB.addReg(Regs[i], getDefRegState(true));
1118  if (DeleteRet) {
1119  if (MI != MBB.end()) {
1120  MIB.copyImplicitOps(*MI);
1121  MI->eraseFromParent();
1122  }
1123  }
1124  MI = MIB;
1125  } else if (Regs.size() == 1) {
1126  // If we adjusted the reg to PC from LR above, switch it back here. We
1127  // only do that for LDM.
1128  if (Regs[0] == ARM::PC)
1129  Regs[0] = ARM::LR;
1130  MachineInstrBuilder MIB =
1131  BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
1132  .addReg(ARM::SP, RegState::Define)
1133  .addReg(ARM::SP);
1134  // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1135  // that refactoring is complete (eventually).
1136  if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1137  MIB.addReg(0);
1139  } else
1140  MIB.addImm(4);
1141  MIB.add(predOps(ARMCC::AL));
1142  }
1143  Regs.clear();
1144 
1145  // Put any subsequent vpop instructions after this one: they will refer to
1146  // higher register numbers so need to be popped afterwards.
1147  if (MI != MBB.end())
1148  ++MI;
1149  }
1150 }
1151 
1152 /// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1153 /// starting from d8. Also insert stack realignment code and leave the stack
1154 /// pointer pointing to the d8 spill slot.
1157  unsigned NumAlignedDPRCS2Regs,
1158  const std::vector<CalleeSavedInfo> &CSI,
1159  const TargetRegisterInfo *TRI) {
1160  MachineFunction &MF = *MBB.getParent();
1162  DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1163  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1164  MachineFrameInfo &MFI = MF.getFrameInfo();
1165 
1166  // Mark the D-register spill slots as properly aligned. Since MFI computes
1167  // stack slot layout backwards, this can actually mean that the d-reg stack
1168  // slot offsets can be wrong. The offset for d8 will always be correct.
1169  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1170  unsigned DNum = CSI[i].getReg() - ARM::D8;
1171  if (DNum > NumAlignedDPRCS2Regs - 1)
1172  continue;
1173  int FI = CSI[i].getFrameIdx();
1174  // The even-numbered registers will be 16-byte aligned, the odd-numbered
1175  // registers will be 8-byte aligned.
1176  MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16);
1177 
1178  // The stack slot for D8 needs to be maximally aligned because this is
1179  // actually the point where we align the stack pointer. MachineFrameInfo
1180  // computes all offsets relative to the incoming stack pointer which is a
1181  // bit weird when realigning the stack. Any extra padding for this
1182  // over-alignment is not realized because the code inserted below adjusts
1183  // the stack pointer by numregs * 8 before aligning the stack pointer.
1184  if (DNum == 0)
1185  MFI.setObjectAlignment(FI, MFI.getMaxAlignment());
1186  }
1187 
1188  // Move the stack pointer to the d8 spill slot, and align it at the same
1189  // time. Leave the stack slot address in the scratch register r4.
1190  //
1191  // sub r4, sp, #numregs * 8
1192  // bic r4, r4, #align - 1
1193  // mov sp, r4
1194  //
1195  bool isThumb = AFI->isThumbFunction();
1196  assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1197  AFI->setShouldRestoreSPFromFP(true);
1198 
1199  // sub r4, sp, #numregs * 8
1200  // The immediate is <= 64, so it doesn't need any special encoding.
1201  unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1202  BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1203  .addReg(ARM::SP)
1204  .addImm(8 * NumAlignedDPRCS2Regs)
1205  .add(predOps(ARMCC::AL))
1206  .add(condCodeOp());
1207 
1208  unsigned MaxAlign = MF.getFrameInfo().getMaxAlignment();
1209  // We must set parameter MustBeSingleInstruction to true, since
1210  // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1211  // stack alignment. Luckily, this can always be done since all ARM
1212  // architecture versions that support Neon also support the BFC
1213  // instruction.
1214  emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
1215 
1216  // mov sp, r4
1217  // The stack pointer must be adjusted before spilling anything, otherwise
1218  // the stack slots could be clobbered by an interrupt handler.
1219  // Leave r4 live, it is used below.
1220  Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
1221  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
1222  .addReg(ARM::R4)
1223  .add(predOps(ARMCC::AL));
1224  if (!isThumb)
1225  MIB.add(condCodeOp());
1226 
1227  // Now spill NumAlignedDPRCS2Regs registers starting from d8.
1228  // r4 holds the stack slot address.
1229  unsigned NextReg = ARM::D8;
1230 
1231  // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
1232  // The writeback is only needed when emitting two vst1.64 instructions.
1233  if (NumAlignedDPRCS2Regs >= 6) {
1234  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1235  &ARM::QQPRRegClass);
1236  MBB.addLiveIn(SupReg);
1237  BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
1238  .addReg(ARM::R4, RegState::Kill)
1239  .addImm(16)
1240  .addReg(NextReg)
1241  .addReg(SupReg, RegState::ImplicitKill)
1242  .add(predOps(ARMCC::AL));
1243  NextReg += 4;
1244  NumAlignedDPRCS2Regs -= 4;
1245  }
1246 
1247  // We won't modify r4 beyond this point. It currently points to the next
1248  // register to be spilled.
1249  unsigned R4BaseReg = NextReg;
1250 
1251  // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
1252  if (NumAlignedDPRCS2Regs >= 4) {
1253  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1254  &ARM::QQPRRegClass);
1255  MBB.addLiveIn(SupReg);
1256  BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
1257  .addReg(ARM::R4)
1258  .addImm(16)
1259  .addReg(NextReg)
1260  .addReg(SupReg, RegState::ImplicitKill)
1261  .add(predOps(ARMCC::AL));
1262  NextReg += 4;
1263  NumAlignedDPRCS2Regs -= 4;
1264  }
1265 
1266  // 16-byte aligned vst1.64 with 2 d-regs.
1267  if (NumAlignedDPRCS2Regs >= 2) {
1268  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1269  &ARM::QPRRegClass);
1270  MBB.addLiveIn(SupReg);
1271  BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
1272  .addReg(ARM::R4)
1273  .addImm(16)
1274  .addReg(SupReg)
1275  .add(predOps(ARMCC::AL));
1276  NextReg += 2;
1277  NumAlignedDPRCS2Regs -= 2;
1278  }
1279 
1280  // Finally, use a vanilla vstr.64 for the odd last register.
1281  if (NumAlignedDPRCS2Regs) {
1282  MBB.addLiveIn(NextReg);
1283  // vstr.64 uses addrmode5 which has an offset scale of 4.
1284  BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
1285  .addReg(NextReg)
1286  .addReg(ARM::R4)
1287  .addImm((NextReg - R4BaseReg) * 2)
1288  .add(predOps(ARMCC::AL));
1289  }
1290 
1291  // The last spill instruction inserted should kill the scratch register r4.
1292  std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1293 }
1294 
1295 /// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
1296 /// iterator to the following instruction.
1299  unsigned NumAlignedDPRCS2Regs) {
1300  // sub r4, sp, #numregs * 8
1301  // bic r4, r4, #align - 1
1302  // mov sp, r4
1303  ++MI; ++MI; ++MI;
1304  assert(MI->mayStore() && "Expecting spill instruction");
1305 
1306  // These switches all fall through.
1307  switch(NumAlignedDPRCS2Regs) {
1308  case 7:
1309  ++MI;
1310  assert(MI->mayStore() && "Expecting spill instruction");
1312  default:
1313  ++MI;
1314  assert(MI->mayStore() && "Expecting spill instruction");
1316  case 1:
1317  case 2:
1318  case 4:
1319  assert(MI->killsRegister(ARM::R4) && "Missed kill flag");
1320  ++MI;
1321  }
1322  return MI;
1323 }
1324 
1325 /// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
1326 /// starting from d8. These instructions are assumed to execute while the
1327 /// stack is still aligned, unlike the code inserted by emitPopInst.
1330  unsigned NumAlignedDPRCS2Regs,
1331  const std::vector<CalleeSavedInfo> &CSI,
1332  const TargetRegisterInfo *TRI) {
1333  MachineFunction &MF = *MBB.getParent();
1335  DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1336  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1337 
1338  // Find the frame index assigned to d8.
1339  int D8SpillFI = 0;
1340  for (unsigned i = 0, e = CSI.size(); i != e; ++i)
1341  if (CSI[i].getReg() == ARM::D8) {
1342  D8SpillFI = CSI[i].getFrameIdx();
1343  break;
1344  }
1345 
1346  // Materialize the address of the d8 spill slot into the scratch register r4.
1347  // This can be fairly complicated if the stack frame is large, so just use
1348  // the normal frame index elimination mechanism to do it. This code runs as
1349  // the initial part of the epilog where the stack and base pointers haven't
1350  // been changed yet.
1351  bool isThumb = AFI->isThumbFunction();
1352  assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1353 
1354  unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
1355  BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1356  .addFrameIndex(D8SpillFI)
1357  .addImm(0)
1358  .add(predOps(ARMCC::AL))
1359  .add(condCodeOp());
1360 
1361  // Now restore NumAlignedDPRCS2Regs registers starting from d8.
1362  unsigned NextReg = ARM::D8;
1363 
1364  // 16-byte aligned vld1.64 with 4 d-regs and writeback.
1365  if (NumAlignedDPRCS2Regs >= 6) {
1366  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1367  &ARM::QQPRRegClass);
1368  BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
1369  .addReg(ARM::R4, RegState::Define)
1371  .addImm(16)
1373  .add(predOps(ARMCC::AL));
1374  NextReg += 4;
1375  NumAlignedDPRCS2Regs -= 4;
1376  }
1377 
1378  // We won't modify r4 beyond this point. It currently points to the next
1379  // register to be spilled.
1380  unsigned R4BaseReg = NextReg;
1381 
1382  // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
1383  if (NumAlignedDPRCS2Regs >= 4) {
1384  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1385  &ARM::QQPRRegClass);
1386  BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
1387  .addReg(ARM::R4)
1388  .addImm(16)
1390  .add(predOps(ARMCC::AL));
1391  NextReg += 4;
1392  NumAlignedDPRCS2Regs -= 4;
1393  }
1394 
1395  // 16-byte aligned vld1.64 with 2 d-regs.
1396  if (NumAlignedDPRCS2Regs >= 2) {
1397  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1398  &ARM::QPRRegClass);
1399  BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
1400  .addReg(ARM::R4)
1401  .addImm(16)
1402  .add(predOps(ARMCC::AL));
1403  NextReg += 2;
1404  NumAlignedDPRCS2Regs -= 2;
1405  }
1406 
1407  // Finally, use a vanilla vldr.64 for the remaining odd register.
1408  if (NumAlignedDPRCS2Regs)
1409  BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
1410  .addReg(ARM::R4)
1411  .addImm(2 * (NextReg - R4BaseReg))
1412  .add(predOps(ARMCC::AL));
1413 
1414  // Last store kills r4.
1415  std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1416 }
1417 
1420  const std::vector<CalleeSavedInfo> &CSI,
1421  const TargetRegisterInfo *TRI) const {
1422  if (CSI.empty())
1423  return false;
1424 
1425  MachineFunction &MF = *MBB.getParent();
1427 
1428  unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
1429  unsigned PushOneOpc = AFI->isThumbFunction() ?
1430  ARM::t2STR_PRE : ARM::STR_PRE_IMM;
1431  unsigned FltOpc = ARM::VSTMDDB_UPD;
1432  unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1433  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,
1435  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,
1437  emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
1438  NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
1439 
1440  // The code above does not insert spill code for the aligned DPRCS2 registers.
1441  // The stack realignment code will be inserted between the push instructions
1442  // and these spills.
1443  if (NumAlignedDPRCS2Regs)
1444  emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
1445 
1446  return true;
1447 }
1448 
1451  std::vector<CalleeSavedInfo> &CSI,
1452  const TargetRegisterInfo *TRI) const {
1453  if (CSI.empty())
1454  return false;
1455 
1456  MachineFunction &MF = *MBB.getParent();
1458  bool isVarArg = AFI->getArgRegsSaveSize() > 0;
1459  unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1460 
1461  // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
1462  // registers. Do that here instead.
1463  if (NumAlignedDPRCS2Regs)
1464  emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
1465 
1466  unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
1467  unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
1468  unsigned FltOpc = ARM::VLDMDIA_UPD;
1469  emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
1470  NumAlignedDPRCS2Regs);
1471  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1472  &isARMArea2Register, 0);
1473  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1474  &isARMArea1Register, 0);
1475 
1476  return true;
1477 }
1478 
1479 // FIXME: Make generic?
1480 static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
1481  const ARMBaseInstrInfo &TII) {
1482  unsigned FnSize = 0;
1483  for (auto &MBB : MF) {
1484  for (auto &MI : MBB)
1485  FnSize += TII.getInstSizeInBytes(MI);
1486  }
1487  return FnSize;
1488 }
1489 
1490 /// estimateRSStackSizeLimit - Look at each instruction that references stack
1491 /// frames and return the stack size limit beyond which some of these
1492 /// instructions will require a scratch register during their expansion later.
1493 // FIXME: Move to TII?
1495  const TargetFrameLowering *TFI) {
1496  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1497  unsigned Limit = (1 << 12) - 1;
1498  for (auto &MBB : MF) {
1499  for (auto &MI : MBB) {
1500  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1501  if (!MI.getOperand(i).isFI())
1502  continue;
1503 
1504  // When using ADDri to get the address of a stack object, 255 is the
1505  // largest offset guaranteed to fit in the immediate offset.
1506  if (MI.getOpcode() == ARM::ADDri) {
1507  Limit = std::min(Limit, (1U << 8) - 1);
1508  break;
1509  }
1510 
1511  // Otherwise check the addressing mode.
1512  switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
1513  case ARMII::AddrMode3:
1514  case ARMII::AddrModeT2_i8:
1515  Limit = std::min(Limit, (1U << 8) - 1);
1516  break;
1517  case ARMII::AddrMode5:
1520  Limit = std::min(Limit, ((1U << 8) - 1) * 4);
1521  break;
1522  case ARMII::AddrModeT2_i12:
1523  // i12 supports only positive offset so these will be converted to
1524  // i8 opcodes. See llvm::rewriteT2FrameIndex.
1525  if (TFI->hasFP(MF) && AFI->hasStackFrame())
1526  Limit = std::min(Limit, (1U << 8) - 1);
1527  break;
1528  case ARMII::AddrMode4:
1529  case ARMII::AddrMode6:
1530  // Addressing modes 4 & 6 (load/store) instructions can't encode an
1531  // immediate offset for stack references.
1532  return 0;
1533  default:
1534  break;
1535  }
1536  break; // At most one FI per instruction
1537  }
1538  }
1539  }
1540 
1541  return Limit;
1542 }
1543 
1544 // In functions that realign the stack, it can be an advantage to spill the
1545 // callee-saved vector registers after realigning the stack. The vst1 and vld1
1546 // instructions take alignment hints that can improve performance.
1547 static void
1549  MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
1550  if (!SpillAlignedNEONRegs)
1551  return;
1552 
1553  // Naked functions don't spill callee-saved registers.
1554  if (MF.getFunction().hasFnAttribute(Attribute::Naked))
1555  return;
1556 
1557  // We are planning to use NEON instructions vst1 / vld1.
1558  if (!static_cast<const ARMSubtarget &>(MF.getSubtarget()).hasNEON())
1559  return;
1560 
1561  // Don't bother if the default stack alignment is sufficiently high.
1562  if (MF.getSubtarget().getFrameLowering()->getStackAlignment() >= 8)
1563  return;
1564 
1565  // Aligned spills require stack realignment.
1566  if (!static_cast<const ARMBaseRegisterInfo *>(
1567  MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))
1568  return;
1569 
1570  // We always spill contiguous d-registers starting from d8. Count how many
1571  // needs spilling. The register allocator will almost always use the
1572  // callee-saved registers in order, but it can happen that there are holes in
1573  // the range. Registers above the hole will be spilled to the standard DPRCS
1574  // area.
1575  unsigned NumSpills = 0;
1576  for (; NumSpills < 8; ++NumSpills)
1577  if (!SavedRegs.test(ARM::D8 + NumSpills))
1578  break;
1579 
1580  // Don't do this for just one d-register. It's not worth it.
1581  if (NumSpills < 2)
1582  return;
1583 
1584  // Spill the first NumSpills D-registers after realigning the stack.
1585  MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
1586 
1587  // A scratch register is required for the vst1 / vld1 instructions.
1588  SavedRegs.set(ARM::R4);
1589 }
1590 
1592  BitVector &SavedRegs,
1593  RegScavenger *RS) const {
1594  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1595  // This tells PEI to spill the FP as if it is any other callee-save register
1596  // to take advantage the eliminateFrameIndex machinery. This also ensures it
1597  // is spilled in the order specified by getCalleeSavedRegs() to make it easier
1598  // to combine multiple loads / stores.
1599  bool CanEliminateFrame = true;
1600  bool CS1Spilled = false;
1601  bool LRSpilled = false;
1602  unsigned NumGPRSpills = 0;
1603  unsigned NumFPRSpills = 0;
1604  SmallVector<unsigned, 4> UnspilledCS1GPRs;
1605  SmallVector<unsigned, 4> UnspilledCS2GPRs;
1606  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1607  MF.getSubtarget().getRegisterInfo());
1608  const ARMBaseInstrInfo &TII =
1609  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1611  MachineFrameInfo &MFI = MF.getFrameInfo();
1614  (void)TRI; // Silence unused warning in non-assert builds.
1615  unsigned FramePtr = RegInfo->getFrameRegister(MF);
1616 
1617  // Spill R4 if Thumb2 function requires stack realignment - it will be used as
1618  // scratch register. Also spill R4 if Thumb2 function has varsized objects,
1619  // since it's not always possible to restore sp from fp in a single
1620  // instruction.
1621  // FIXME: It will be better just to find spare register here.
1622  if (AFI->isThumb2Function() &&
1623  (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
1624  SavedRegs.set(ARM::R4);
1625 
1626  // If a stack probe will be emitted, spill R4 and LR, since they are
1627  // clobbered by the stack probe call.
1628  // This estimate should be a safe, conservative estimate. The actual
1629  // stack probe is enabled based on the size of the local objects;
1630  // this estimate also includes the varargs store size.
1631  if (STI.isTargetWindows() &&
1632  WindowsRequiresStackProbe(MF, MFI.estimateStackSize(MF))) {
1633  SavedRegs.set(ARM::R4);
1634  SavedRegs.set(ARM::LR);
1635  }
1636 
1637  if (AFI->isThumb1OnlyFunction()) {
1638  // Spill LR if Thumb1 function uses variable length argument lists.
1639  if (AFI->getArgRegsSaveSize() > 0)
1640  SavedRegs.set(ARM::LR);
1641 
1642  // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
1643  // requires stack alignment. We don't know for sure what the stack size
1644  // will be, but for this, an estimate is good enough. If there anything
1645  // changes it, it'll be a spill, which implies we've used all the registers
1646  // and so R4 is already used, so not marking it here will be OK.
1647  // FIXME: It will be better just to find spare register here.
1648  if (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF) ||
1649  MFI.estimateStackSize(MF) > 508)
1650  SavedRegs.set(ARM::R4);
1651  }
1652 
1653  // See if we can spill vector registers to aligned stack.
1654  checkNumAlignedDPRCS2Regs(MF, SavedRegs);
1655 
1656  // Spill the BasePtr if it's used.
1657  if (RegInfo->hasBasePointer(MF))
1658  SavedRegs.set(RegInfo->getBaseRegister());
1659 
1660  // Don't spill FP if the frame can be eliminated. This is determined
1661  // by scanning the callee-save registers to see if any is modified.
1662  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
1663  for (unsigned i = 0; CSRegs[i]; ++i) {
1664  unsigned Reg = CSRegs[i];
1665  bool Spilled = false;
1666  if (SavedRegs.test(Reg)) {
1667  Spilled = true;
1668  CanEliminateFrame = false;
1669  }
1670 
1671  if (!ARM::GPRRegClass.contains(Reg)) {
1672  if (Spilled) {
1673  if (ARM::SPRRegClass.contains(Reg))
1674  NumFPRSpills++;
1675  else if (ARM::DPRRegClass.contains(Reg))
1676  NumFPRSpills += 2;
1677  else if (ARM::QPRRegClass.contains(Reg))
1678  NumFPRSpills += 4;
1679  }
1680  continue;
1681  }
1682 
1683  if (Spilled) {
1684  NumGPRSpills++;
1685 
1686  if (!STI.splitFramePushPop(MF)) {
1687  if (Reg == ARM::LR)
1688  LRSpilled = true;
1689  CS1Spilled = true;
1690  continue;
1691  }
1692 
1693  // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
1694  switch (Reg) {
1695  case ARM::LR:
1696  LRSpilled = true;
1698  case ARM::R0: case ARM::R1:
1699  case ARM::R2: case ARM::R3:
1700  case ARM::R4: case ARM::R5:
1701  case ARM::R6: case ARM::R7:
1702  CS1Spilled = true;
1703  break;
1704  default:
1705  break;
1706  }
1707  } else {
1708  if (!STI.splitFramePushPop(MF)) {
1709  UnspilledCS1GPRs.push_back(Reg);
1710  continue;
1711  }
1712 
1713  switch (Reg) {
1714  case ARM::R0: case ARM::R1:
1715  case ARM::R2: case ARM::R3:
1716  case ARM::R4: case ARM::R5:
1717  case ARM::R6: case ARM::R7:
1718  case ARM::LR:
1719  UnspilledCS1GPRs.push_back(Reg);
1720  break;
1721  default:
1722  UnspilledCS2GPRs.push_back(Reg);
1723  break;
1724  }
1725  }
1726  }
1727 
1728  bool ForceLRSpill = false;
1729  if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
1730  unsigned FnSize = GetFunctionSizeInBytes(MF, TII);
1731  // Force LR to be spilled if the Thumb function size is > 2048. This enables
1732  // use of BL to implement far jump. If it turns out that it's not needed
1733  // then the branch fix up path will undo it.
1734  if (FnSize >= (1 << 11)) {
1735  CanEliminateFrame = false;
1736  ForceLRSpill = true;
1737  }
1738  }
1739 
1740  // If any of the stack slot references may be out of range of an immediate
1741  // offset, make sure a register (or a spill slot) is available for the
1742  // register scavenger. Note that if we're indexing off the frame pointer, the
1743  // effective stack size is 4 bytes larger since the FP points to the stack
1744  // slot of the previous FP. Also, if we have variable sized objects in the
1745  // function, stack slot references will often be negative, and some of
1746  // our instructions are positive-offset only, so conservatively consider
1747  // that case to want a spill slot (or register) as well. Similarly, if
1748  // the function adjusts the stack pointer during execution and the
1749  // adjustments aren't already part of our stack size estimate, our offset
1750  // calculations may be off, so be conservative.
1751  // FIXME: We could add logic to be more precise about negative offsets
1752  // and which instructions will need a scratch register for them. Is it
1753  // worth the effort and added fragility?
1754  unsigned EstimatedStackSize =
1755  MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
1756 
1757  // Determine biggest (positive) SP offset in MachineFrameInfo.
1758  int MaxFixedOffset = 0;
1759  for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
1760  int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
1761  MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
1762  }
1763 
1764  bool HasFP = hasFP(MF);
1765  if (HasFP) {
1766  if (AFI->hasStackFrame())
1767  EstimatedStackSize += 4;
1768  } else {
1769  // If FP is not used, SP will be used to access arguments, so count the
1770  // size of arguments into the estimation.
1771  EstimatedStackSize += MaxFixedOffset;
1772  }
1773  EstimatedStackSize += 16; // For possible paddings.
1774 
1775  unsigned EstimatedRSStackSizeLimit = estimateRSStackSizeLimit(MF, this);
1776  int MaxFPOffset = getMaxFPOffset(MF.getFunction(), *AFI);
1777  bool BigFrameOffsets = EstimatedStackSize >= EstimatedRSStackSizeLimit ||
1778  MFI.hasVarSizedObjects() ||
1779  (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF)) ||
1780  // For large argument stacks fp relative addressed may overflow.
1781  (HasFP && (MaxFixedOffset - MaxFPOffset) >= (int)EstimatedRSStackSizeLimit);
1782  if (BigFrameOffsets ||
1783  !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
1784  AFI->setHasStackFrame(true);
1785 
1786  if (HasFP) {
1787  SavedRegs.set(FramePtr);
1788  // If the frame pointer is required by the ABI, also spill LR so that we
1789  // emit a complete frame record.
1790  if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) {
1791  SavedRegs.set(ARM::LR);
1792  LRSpilled = true;
1793  NumGPRSpills++;
1794  auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
1795  if (LRPos != UnspilledCS1GPRs.end())
1796  UnspilledCS1GPRs.erase(LRPos);
1797  }
1798  auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);
1799  if (FPPos != UnspilledCS1GPRs.end())
1800  UnspilledCS1GPRs.erase(FPPos);
1801  NumGPRSpills++;
1802  if (FramePtr == ARM::R7)
1803  CS1Spilled = true;
1804  }
1805 
1806  // This is true when we inserted a spill for an unused register that can now
1807  // be used for register scavenging.
1808  bool ExtraCSSpill = false;
1809 
1810  if (AFI->isThumb1OnlyFunction()) {
1811  // For Thumb1-only targets, we need some low registers when we save and
1812  // restore the high registers (which aren't allocatable, but could be
1813  // used by inline assembly) because the push/pop instructions can not
1814  // access high registers. If necessary, we might need to push more low
1815  // registers to ensure that there is at least one free that can be used
1816  // for the saving & restoring, and preferably we should ensure that as
1817  // many as are needed are available so that fewer push/pop instructions
1818  // are required.
1819 
1820  // Low registers which are not currently pushed, but could be (r4-r7).
1821  SmallVector<unsigned, 4> AvailableRegs;
1822 
1823  // Unused argument registers (r0-r3) can be clobbered in the prologue for
1824  // free.
1825  int EntryRegDeficit = 0;
1826  for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
1827  if (!MF.getRegInfo().isLiveIn(Reg)) {
1828  --EntryRegDeficit;
1829  LLVM_DEBUG(dbgs()
1830  << printReg(Reg, TRI)
1831  << " is unused argument register, EntryRegDeficit = "
1832  << EntryRegDeficit << "\n");
1833  }
1834  }
1835 
1836  // Unused return registers can be clobbered in the epilogue for free.
1837  int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
1838  LLVM_DEBUG(dbgs() << AFI->getReturnRegsCount()
1839  << " return regs used, ExitRegDeficit = "
1840  << ExitRegDeficit << "\n");
1841 
1842  int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);
1843  LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
1844 
1845  // r4-r6 can be used in the prologue if they are pushed by the first push
1846  // instruction.
1847  for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
1848  if (SavedRegs.test(Reg)) {
1849  --RegDeficit;
1850  LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
1851  << " is saved low register, RegDeficit = "
1852  << RegDeficit << "\n");
1853  } else {
1854  AvailableRegs.push_back(Reg);
1855  LLVM_DEBUG(
1856  dbgs()
1857  << printReg(Reg, TRI)
1858  << " is non-saved low register, adding to AvailableRegs\n");
1859  }
1860  }
1861 
1862  // r7 can be used if it is not being used as the frame pointer.
1863  if (!HasFP) {
1864  if (SavedRegs.test(ARM::R7)) {
1865  --RegDeficit;
1866  LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
1867  << RegDeficit << "\n");
1868  } else {
1869  AvailableRegs.push_back(ARM::R7);
1870  LLVM_DEBUG(
1871  dbgs()
1872  << "%r7 is non-saved low register, adding to AvailableRegs\n");
1873  }
1874  }
1875 
1876  // Each of r8-r11 needs to be copied to a low register, then pushed.
1877  for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
1878  if (SavedRegs.test(Reg)) {
1879  ++RegDeficit;
1880  LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
1881  << " is saved high register, RegDeficit = "
1882  << RegDeficit << "\n");
1883  }
1884  }
1885 
1886  // LR can only be used by PUSH, not POP, and can't be used at all if the
1887  // llvm.returnaddress intrinsic is used. This is only worth doing if we
1888  // are more limited at function entry than exit.
1889  if ((EntryRegDeficit > ExitRegDeficit) &&
1890  !(MF.getRegInfo().isLiveIn(ARM::LR) &&
1892  if (SavedRegs.test(ARM::LR)) {
1893  --RegDeficit;
1894  LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
1895  << RegDeficit << "\n");
1896  } else {
1897  AvailableRegs.push_back(ARM::LR);
1898  LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
1899  }
1900  }
1901 
1902  // If there are more high registers that need pushing than low registers
1903  // available, push some more low registers so that we can use fewer push
1904  // instructions. This might not reduce RegDeficit all the way to zero,
1905  // because we can only guarantee that r4-r6 are available, but r8-r11 may
1906  // need saving.
1907  LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
1908  for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
1909  unsigned Reg = AvailableRegs.pop_back_val();
1910  LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
1911  << " to make up reg deficit\n");
1912  SavedRegs.set(Reg);
1913  NumGPRSpills++;
1914  CS1Spilled = true;
1915  assert(!MRI.isReserved(Reg) && "Should not be reserved");
1916  if (!MRI.isPhysRegUsed(Reg))
1917  ExtraCSSpill = true;
1918  UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
1919  if (Reg == ARM::LR)
1920  LRSpilled = true;
1921  }
1922  LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
1923  << "\n");
1924  }
1925 
1926  // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
1927  // restore LR in that case.
1928  bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
1929 
1930  // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
1931  // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
1932  if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
1933  SavedRegs.set(ARM::LR);
1934  NumGPRSpills++;
1936  LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
1937  if (LRPos != UnspilledCS1GPRs.end())
1938  UnspilledCS1GPRs.erase(LRPos);
1939 
1940  ForceLRSpill = false;
1941  if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR))
1942  ExtraCSSpill = true;
1943  }
1944 
1945  // If stack and double are 8-byte aligned and we are spilling an odd number
1946  // of GPRs, spill one extra callee save GPR so we won't have to pad between
1947  // the integer and double callee save areas.
1948  LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
1949  unsigned TargetAlign = getStackAlignment();
1950  if (TargetAlign >= 8 && (NumGPRSpills & 1)) {
1951  if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
1952  for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
1953  unsigned Reg = UnspilledCS1GPRs[i];
1954  // Don't spill high register if the function is thumb. In the case of
1955  // Windows on ARM, accept R11 (frame pointer)
1956  if (!AFI->isThumbFunction() ||
1957  (STI.isTargetWindows() && Reg == ARM::R11) ||
1958  isARMLowRegister(Reg) ||
1959  (Reg == ARM::LR && !ExpensiveLRRestore)) {
1960  SavedRegs.set(Reg);
1961  LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
1962  << " to make up alignment\n");
1963  if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
1964  ExtraCSSpill = true;
1965  break;
1966  }
1967  }
1968  } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
1969  unsigned Reg = UnspilledCS2GPRs.front();
1970  SavedRegs.set(Reg);
1971  LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
1972  << " to make up alignment\n");
1973  if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
1974  ExtraCSSpill = true;
1975  }
1976  }
1977 
1978  // Estimate if we might need to scavenge a register at some point in order
1979  // to materialize a stack offset. If so, either spill one additional
1980  // callee-saved register or reserve a special spill slot to facilitate
1981  // register scavenging. Thumb1 needs a spill slot for stack pointer
1982  // adjustments also, even when the frame itself is small.
1983  if (BigFrameOffsets && !ExtraCSSpill) {
1984  // If any non-reserved CS register isn't spilled, just spill one or two
1985  // extra. That should take care of it!
1986  unsigned NumExtras = TargetAlign / 4;
1987  SmallVector<unsigned, 2> Extras;
1988  while (NumExtras && !UnspilledCS1GPRs.empty()) {
1989  unsigned Reg = UnspilledCS1GPRs.back();
1990  UnspilledCS1GPRs.pop_back();
1991  if (!MRI.isReserved(Reg) &&
1992  (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
1993  Reg == ARM::LR)) {
1994  Extras.push_back(Reg);
1995  NumExtras--;
1996  }
1997  }
1998  // For non-Thumb1 functions, also check for hi-reg CS registers
1999  if (!AFI->isThumb1OnlyFunction()) {
2000  while (NumExtras && !UnspilledCS2GPRs.empty()) {
2001  unsigned Reg = UnspilledCS2GPRs.back();
2002  UnspilledCS2GPRs.pop_back();
2003  if (!MRI.isReserved(Reg)) {
2004  Extras.push_back(Reg);
2005  NumExtras--;
2006  }
2007  }
2008  }
2009  if (NumExtras == 0) {
2010  for (unsigned Reg : Extras) {
2011  SavedRegs.set(Reg);
2012  if (!MRI.isPhysRegUsed(Reg))
2013  ExtraCSSpill = true;
2014  }
2015  }
2016  if (!ExtraCSSpill && !AFI->isThumb1OnlyFunction()) {
2017  // note: Thumb1 functions spill to R12, not the stack. Reserve a slot
2018  // closest to SP or frame pointer.
2019  assert(RS && "Register scavenging not provided");
2020  const TargetRegisterClass &RC = ARM::GPRRegClass;
2021  unsigned Size = TRI->getSpillSize(RC);
2022  unsigned Align = TRI->getSpillAlignment(RC);
2023  RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2024  }
2025  }
2026  }
2027 
2028  if (ForceLRSpill) {
2029  SavedRegs.set(ARM::LR);
2030  AFI->setLRIsSpilledForFarJump(true);
2031  }
2032 }
2033 
2034 MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
2037  const ARMBaseInstrInfo &TII =
2038  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2039  if (!hasReservedCallFrame(MF)) {
2040  // If we have alloca, convert as follows:
2041  // ADJCALLSTACKDOWN -> sub, sp, sp, amount
2042  // ADJCALLSTACKUP -> add, sp, sp, amount
2043  MachineInstr &Old = *I;
2044  DebugLoc dl = Old.getDebugLoc();
2045  unsigned Amount = TII.getFrameSize(Old);
2046  if (Amount != 0) {
2047  // We need to keep the stack aligned properly. To do this, we round the
2048  // amount of space needed for the outgoing arguments up to the next
2049  // alignment boundary.
2050  Amount = alignSPAdjust(Amount);
2051 
2053  assert(!AFI->isThumb1OnlyFunction() &&
2054  "This eliminateCallFramePseudoInstr does not support Thumb1!");
2055  bool isARM = !AFI->isThumbFunction();
2056 
2057  // Replace the pseudo instruction with a new instruction...
2058  unsigned Opc = Old.getOpcode();
2059  int PIdx = Old.findFirstPredOperandIdx();
2060  ARMCC::CondCodes Pred =
2061  (PIdx == -1) ? ARMCC::AL
2062  : (ARMCC::CondCodes)Old.getOperand(PIdx).getImm();
2063  unsigned PredReg = TII.getFramePred(Old);
2064  if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
2065  emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
2066  Pred, PredReg);
2067  } else {
2068  assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
2069  emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
2070  Pred, PredReg);
2071  }
2072  }
2073  }
2074  return MBB.erase(I);
2075 }
2076 
2077 /// Get the minimum constant for ARM that is greater than or equal to the
2078 /// argument. In ARM, constants can have any value that can be produced by
2079 /// rotating an 8-bit value to the right by an even number of bits within a
2080 /// 32-bit word.
2082  unsigned Shifted = 0;
2083 
2084  if (Value == 0)
2085  return 0;
2086 
2087  while (!(Value & 0xC0000000)) {
2088  Value = Value << 2;
2089  Shifted += 2;
2090  }
2091 
2092  bool Carry = (Value & 0x00FFFFFF);
2093  Value = ((Value & 0xFF000000) >> 24) + Carry;
2094 
2095  if (Value & 0x0000100)
2096  Value = Value & 0x000001FC;
2097 
2098  if (Shifted > 24)
2099  Value = Value >> (Shifted - 24);
2100  else
2101  Value = Value << (24 - Shifted);
2102 
2103  return Value;
2104 }
2105 
2106 // The stack limit in the TCB is set to this many bytes above the actual
2107 // stack limit.
2108 static const uint64_t kSplitStackAvailable = 256;
2109 
2110 // Adjust the function prologue to enable split stacks. This currently only
2111 // supports android and linux.
2112 //
2113 // The ABI of the segmented stack prologue is a little arbitrarily chosen, but
2114 // must be well defined in order to allow for consistent implementations of the
2115 // __morestack helper function. The ABI is also not a normal ABI in that it
2116 // doesn't follow the normal calling conventions because this allows the
2117 // prologue of each function to be optimized further.
2118 //
2119 // Currently, the ABI looks like (when calling __morestack)
2120 //
2121 // * r4 holds the minimum stack size requested for this function call
2122 // * r5 holds the stack size of the arguments to the function
2123 // * the beginning of the function is 3 instructions after the call to
2124 // __morestack
2125 //
2126 // Implementations of __morestack should use r4 to allocate a new stack, r5 to
2127 // place the arguments on to the new stack, and the 3-instruction knowledge to
2128 // jump directly to the body of the function when working on the new stack.
2129 //
2130 // An old (and possibly no longer compatible) implementation of __morestack for
2131 // ARM can be found at [1].
2132 //
2133 // [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
2135  MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
2136  unsigned Opcode;
2137  unsigned CFIIndex;
2138  const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
2139  bool Thumb = ST->isThumb();
2140 
2141  // Sadly, this currently doesn't support varargs, platforms other than
2142  // android/linux. Note that thumb1/thumb2 are support for android/linux.
2143  if (MF.getFunction().isVarArg())
2144  report_fatal_error("Segmented stacks do not support vararg functions.");
2145  if (!ST->isTargetAndroid() && !ST->isTargetLinux())
2146  report_fatal_error("Segmented stacks not supported on this platform.");
2147 
2148  MachineFrameInfo &MFI = MF.getFrameInfo();
2149  MachineModuleInfo &MMI = MF.getMMI();
2150  MCContext &Context = MMI.getContext();
2151  const MCRegisterInfo *MRI = Context.getRegisterInfo();
2152  const ARMBaseInstrInfo &TII =
2153  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2154  ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
2155  DebugLoc DL;
2156 
2157  uint64_t StackSize = MFI.getStackSize();
2158 
2159  // Do not generate a prologue for leaf functions with a stack of size zero.
2160  // For non-leaf functions we have to allow for the possibility that the
2161  // call is to a non-split function, as in PR37807.
2162  if (StackSize == 0 && !MFI.hasTailCall())
2163  return;
2164 
2165  // Use R4 and R5 as scratch registers.
2166  // We save R4 and R5 before use and restore them before leaving the function.
2167  unsigned ScratchReg0 = ARM::R4;
2168  unsigned ScratchReg1 = ARM::R5;
2169  uint64_t AlignedStackSize;
2170 
2171  MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
2172  MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
2173  MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock();
2176 
2177  // Grab everything that reaches PrologueMBB to update there liveness as well.
2178  SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
2180  WalkList.push_back(&PrologueMBB);
2181 
2182  do {
2183  MachineBasicBlock *CurMBB = WalkList.pop_back_val();
2184  for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
2185  if (BeforePrologueRegion.insert(PredBB).second)
2186  WalkList.push_back(PredBB);
2187  }
2188  } while (!WalkList.empty());
2189 
2190  // The order in that list is important.
2191  // The blocks will all be inserted before PrologueMBB using that order.
2192  // Therefore the block that should appear first in the CFG should appear
2193  // first in the list.
2194  MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
2195  PostStackMBB};
2196 
2197  for (MachineBasicBlock *B : AddedBlocks)
2198  BeforePrologueRegion.insert(B);
2199 
2200  for (const auto &LI : PrologueMBB.liveins()) {
2201  for (MachineBasicBlock *PredBB : BeforePrologueRegion)
2202  PredBB->addLiveIn(LI);
2203  }
2204 
2205  // Remove the newly added blocks from the list, since we know
2206  // we do not have to do the following updates for them.
2207  for (MachineBasicBlock *B : AddedBlocks) {
2208  BeforePrologueRegion.erase(B);
2209  MF.insert(PrologueMBB.getIterator(), B);
2210  }
2211 
2212  for (MachineBasicBlock *MBB : BeforePrologueRegion) {
2213  // Make sure the LiveIns are still sorted and unique.
2214  MBB->sortUniqueLiveIns();
2215  // Replace the edges to PrologueMBB by edges to the sequences
2216  // we are about to add.
2217  MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
2218  }
2219 
2220  // The required stack size that is aligned to ARM constant criterion.
2221  AlignedStackSize = alignToARMConstant(StackSize);
2222 
2223  // When the frame size is less than 256 we just compare the stack
2224  // boundary directly to the value of the stack pointer, per gcc.
2225  bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
2226 
2227  // We will use two of the callee save registers as scratch registers so we
2228  // need to save those registers onto the stack.
2229  // We will use SR0 to hold stack limit and SR1 to hold the stack size
2230  // requested and arguments for __morestack().
2231  // SR0: Scratch Register #0
2232  // SR1: Scratch Register #1
2233  // push {SR0, SR1}
2234  if (Thumb) {
2235  BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
2236  .add(predOps(ARMCC::AL))
2237  .addReg(ScratchReg0)
2238  .addReg(ScratchReg1);
2239  } else {
2240  BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
2241  .addReg(ARM::SP, RegState::Define)
2242  .addReg(ARM::SP)
2243  .add(predOps(ARMCC::AL))
2244  .addReg(ScratchReg0)
2245  .addReg(ScratchReg1);
2246  }
2247 
2248  // Emit the relevant DWARF information about the change in stack pointer as
2249  // well as where to find both r4 and r5 (the callee-save registers)
2250  CFIIndex =
2252  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2253  .addCFIIndex(CFIIndex);
2255  nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
2256  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2257  .addCFIIndex(CFIIndex);
2259  nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
2260  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2261  .addCFIIndex(CFIIndex);
2262 
2263  // mov SR1, sp
2264  if (Thumb) {
2265  BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
2266  .addReg(ARM::SP)
2267  .add(predOps(ARMCC::AL));
2268  } else if (CompareStackPointer) {
2269  BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
2270  .addReg(ARM::SP)
2271  .add(predOps(ARMCC::AL))
2272  .add(condCodeOp());
2273  }
2274 
2275  // sub SR1, sp, #StackSize
2276  if (!CompareStackPointer && Thumb) {
2277  BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
2278  .add(condCodeOp())
2279  .addReg(ScratchReg1)
2280  .addImm(AlignedStackSize)
2281  .add(predOps(ARMCC::AL));
2282  } else if (!CompareStackPointer) {
2283  BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
2284  .addReg(ARM::SP)
2285  .addImm(AlignedStackSize)
2286  .add(predOps(ARMCC::AL))
2287  .add(condCodeOp());
2288  }
2289 
2290  if (Thumb && ST->isThumb1Only()) {
2291  unsigned PCLabelId = ARMFI->createPICLabelUId();
2293  MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0);
2295  unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4);
2296 
2297  // ldr SR0, [pc, offset(STACK_LIMIT)]
2298  BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
2299  .addConstantPoolIndex(CPI)
2300  .add(predOps(ARMCC::AL));
2301 
2302  // ldr SR0, [SR0]
2303  BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
2304  .addReg(ScratchReg0)
2305  .addImm(0)
2306  .add(predOps(ARMCC::AL));
2307  } else {
2308  // Get TLS base address from the coprocessor
2309  // mrc p15, #0, SR0, c13, c0, #3
2310  BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0)
2311  .addImm(15)
2312  .addImm(0)
2313  .addImm(13)
2314  .addImm(0)
2315  .addImm(3)
2316  .add(predOps(ARMCC::AL));
2317 
2318  // Use the last tls slot on android and a private field of the TCP on linux.
2319  assert(ST->isTargetAndroid() || ST->isTargetLinux());
2320  unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
2321 
2322  // Get the stack limit from the right offset
2323  // ldr SR0, [sr0, #4 * TlsOffset]
2324  BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0)
2325  .addReg(ScratchReg0)
2326  .addImm(4 * TlsOffset)
2327  .add(predOps(ARMCC::AL));
2328  }
2329 
2330  // Compare stack limit with stack size requested.
2331  // cmp SR0, SR1
2332  Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
2333  BuildMI(GetMBB, DL, TII.get(Opcode))
2334  .addReg(ScratchReg0)
2335  .addReg(ScratchReg1)
2336  .add(predOps(ARMCC::AL));
2337 
2338  // This jump is taken if StackLimit < SP - stack required.
2339  Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
2340  BuildMI(GetMBB, DL, TII.get(Opcode)).addMBB(PostStackMBB)
2341  .addImm(ARMCC::LO)
2342  .addReg(ARM::CPSR);
2343 
2344 
2345  // Calling __morestack(StackSize, Size of stack arguments).
2346  // __morestack knows that the stack size requested is in SR0(r4)
2347  // and amount size of stack arguments is in SR1(r5).
2348 
2349  // Pass first argument for the __morestack by Scratch Register #0.
2350  // The amount size of stack required
2351  if (Thumb) {
2352  BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
2353  .add(condCodeOp())
2354  .addImm(AlignedStackSize)
2355  .add(predOps(ARMCC::AL));
2356  } else {
2357  BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
2358  .addImm(AlignedStackSize)
2359  .add(predOps(ARMCC::AL))
2360  .add(condCodeOp());
2361  }
2362  // Pass second argument for the __morestack by Scratch Register #1.
2363  // The amount size of stack consumed to save function arguments.
2364  if (Thumb) {
2365  BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
2366  .add(condCodeOp())
2367  .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
2368  .add(predOps(ARMCC::AL));
2369  } else {
2370  BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
2371  .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
2372  .add(predOps(ARMCC::AL))
2373  .add(condCodeOp());
2374  }
2375 
2376  // push {lr} - Save return address of this function.
2377  if (Thumb) {
2378  BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
2379  .add(predOps(ARMCC::AL))
2380  .addReg(ARM::LR);
2381  } else {
2382  BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
2383  .addReg(ARM::SP, RegState::Define)
2384  .addReg(ARM::SP)
2385  .add(predOps(ARMCC::AL))
2386  .addReg(ARM::LR);
2387  }
2388 
2389  // Emit the DWARF info about the change in stack as well as where to find the
2390  // previous link register
2391  CFIIndex =
2393  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2394  .addCFIIndex(CFIIndex);
2396  nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
2397  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2398  .addCFIIndex(CFIIndex);
2399 
2400  // Call __morestack().
2401  if (Thumb) {
2402  BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
2403  .add(predOps(ARMCC::AL))
2404  .addExternalSymbol("__morestack");
2405  } else {
2406  BuildMI(AllocMBB, DL, TII.get(ARM::BL))
2407  .addExternalSymbol("__morestack");
2408  }
2409 
2410  // pop {lr} - Restore return address of this original function.
2411  if (Thumb) {
2412  if (ST->isThumb1Only()) {
2413  BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
2414  .add(predOps(ARMCC::AL))
2415  .addReg(ScratchReg0);
2416  BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
2417  .addReg(ScratchReg0)
2418  .add(predOps(ARMCC::AL));
2419  } else {
2420  BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
2421  .addReg(ARM::LR, RegState::Define)
2422  .addReg(ARM::SP, RegState::Define)
2423  .addReg(ARM::SP)
2424  .addImm(4)
2425  .add(predOps(ARMCC::AL));
2426  }
2427  } else {
2428  BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
2429  .addReg(ARM::SP, RegState::Define)
2430  .addReg(ARM::SP)
2431  .add(predOps(ARMCC::AL))
2432  .addReg(ARM::LR);
2433  }
2434 
2435  // Restore SR0 and SR1 in case of __morestack() was called.
2436  // __morestack() will skip PostStackMBB block so we need to restore
2437  // scratch registers from here.
2438  // pop {SR0, SR1}
2439  if (Thumb) {
2440  BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
2441  .add(predOps(ARMCC::AL))
2442  .addReg(ScratchReg0)
2443  .addReg(ScratchReg1);
2444  } else {
2445  BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
2446  .addReg(ARM::SP, RegState::Define)
2447  .addReg(ARM::SP)
2448  .add(predOps(ARMCC::AL))
2449  .addReg(ScratchReg0)
2450  .addReg(ScratchReg1);
2451  }
2452 
2453  // Update the CFA offset now that we've popped
2454  CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
2455  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2456  .addCFIIndex(CFIIndex);
2457 
2458  // Return from this function.
2459  BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
2460 
2461  // Restore SR0 and SR1 in case of __morestack() was not called.
2462  // pop {SR0, SR1}
2463  if (Thumb) {
2464  BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
2465  .add(predOps(ARMCC::AL))
2466  .addReg(ScratchReg0)
2467  .addReg(ScratchReg1);
2468  } else {
2469  BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
2470  .addReg(ARM::SP, RegState::Define)
2471  .addReg(ARM::SP)
2472  .add(predOps(ARMCC::AL))
2473  .addReg(ScratchReg0)
2474  .addReg(ScratchReg1);
2475  }
2476 
2477  // Update the CFA offset now that we've popped
2478  CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
2479  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2480  .addCFIIndex(CFIIndex);
2481 
2482  // Tell debuggers that r4 and r5 are now the same as they were in the
2483  // previous function, that they're the "Same Value".
2485  nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
2486  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2487  .addCFIIndex(CFIIndex);
2489  nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
2490  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2491  .addCFIIndex(CFIIndex);
2492 
2493  // Organizing MBB lists
2494  PostStackMBB->addSuccessor(&PrologueMBB);
2495 
2496  AllocMBB->addSuccessor(PostStackMBB);
2497 
2498  GetMBB->addSuccessor(PostStackMBB);
2499  GetMBB->addSuccessor(AllocMBB);
2500 
2501  McrMBB->addSuccessor(GetMBB);
2502 
2503  PrevStackMBB->addSuccessor(McrMBB);
2504 
2505 #ifdef EXPENSIVE_CHECKS
2506  MF.verify();
2507 #endif
2508 }
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:177
const MachineInstrBuilder & add(const MachineOperand &MO) const
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BitVector & set()
Definition: BitVector.h:398
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
#define R4(n)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
LLVMContext & Context
int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate...
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
bool isThumb() const
Definition: ARMSubtarget.h:705
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static cl::opt< bool > SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), cl::desc("Align ARM NEON spills in prolog and epilog"))
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:383
ARMConstantPoolValue - ARM specific constantpool value.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getReg() const
getReg - Returns the register number.
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset)
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:487
unsigned Reg
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override
Adjust the prologue to have the function use segmented stacks.
bool hasV7Ops() const
Definition: ARMSubtarget.h:535
bool test(unsigned Idx) const
Definition: BitVector.h:502
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:321
unsigned second
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
F(f)
MachineModuleInfo & getMMI() const
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
bool isThumb1Only() const
Definition: ARMSubtarget.h:706
#define R2(n)
void setGPRCalleeSavedArea2Offset(unsigned o)
static uint32_t alignToARMConstant(uint32_t Value)
Get the minimum constant for ARM that is greater than or equal to the argument.
static MCCFIInstruction createDefCfaOffset(MCSymbol *L, int Offset)
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:474
static bool isThumb(const MCSubtargetInfo &STI)
unsigned getSpillSize(const TargetRegisterClass &RC) const
Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...
return AArch64::GPR64RegClass contains(Reg)
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:486
static const uint64_t kSplitStackAvailable
bool isTargetELF() const
Definition: ARMSubtarget.h:650
unsigned getSpillAlignment(const TargetRegisterClass &RC) const
Return the minimum required alignment in bytes for a spill slot for a register of this class...
void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Given a machine basic block that branched to &#39;Old&#39;, change the code and CFG so that it branches to &#39;N...
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
unsigned getFramePred(const MachineInstr &MI) const
Returns predicate register associated with the given frame instruction.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:412
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
unsigned getFrameRegister(const MachineFunction &MF) const override
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required, we reserve argument space for call sites in the function immediately on entry to the current function.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
This file contains the simple types necessary to represent the attributes associated with functions a...
static int sizeOfSPAdjustment(const MachineInstr &MI)
virtual bool noFramePointerElim(const MachineFunction &MF) const
Return true if the target needs to disable frame pointer elimination.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
int getDwarfRegNum(unsigned RegNum, bool isEH) const
Map a target register to an equivalent dwarf register number.
void setDPRCalleeSavedAreaOffset(unsigned o)
LLVM_NODISCARD unsigned addFrameInst(const MCCFIInstruction &Inst)
bool enableCalleeSaveSkip(const MachineFunction &MF) const override
Returns true if the target can safely skip saving callee-saved registers for noreturn nounwind functi...
unsigned getArgRegsSaveSize() const
void setFramePtrSpillOffset(unsigned o)
Context object for machine code objects.
Definition: MCContext.h:63
static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg, unsigned SrcReg, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
unsigned getDPRCalleeSavedAreaSize() const
bool splitFramePushPop(const MachineFunction &MF) const
Returns true if the frame setup is split into two separate pushes (first r0-r7,lr then r8-r11)...
Definition: ARMSubtarget.h:726
void setDPRCalleeSavedGapSize(unsigned s)
unsigned getNumAlignedDPRCS2Regs() const
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
const MCContext & getContext() const
int alignSPAdjust(int SPAdj) const
alignSPAdjust - This method aligns the stack adjustment to the correct alignment. ...
bool hasV6T2Ops() const
Definition: ARMSubtarget.h:534
virtual const TargetInstrInfo * getInstrInfo() const
static bool isARMArea1Register(unsigned Reg, bool isIOS)
isARMArea1Register - Returns true if the register is a low register (r0-r7) or a stack/pc register th...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1108
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
TargetInstrInfo - Interface to description of machine instruction set.
unsigned getDefRegState(bool B)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static MachineBasicBlock::iterator skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs)
Skip past the code inserted by emitAlignedDPRCS2Spills, and return an iterator to the following instr...
static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register)
.cfi_same_value Current value of Register is the same as in the previous frame.
Definition: MCDwarf.h:527
static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs)
int ResolveFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg, int SPAdj) const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
This file declares the machine register scavenger class.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:120
static MCCFIInstruction createDefCfa(MCSymbol *L, unsigned Register, int Offset)
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it...
Definition: MCDwarf.h:460
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool cannotEliminateFrame(const MachineFunction &MF) const
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register)
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:467
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Returns the size of the specified MachineInstr.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
unsigned getGPRCalleeSavedArea1Size() const
int getStackProtectorIndex() const
Return the index for the stack protector object.
unsigned getMaxAlignment() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
bool hasBasePointer(const MachineFunction &MF) const
void setGPRCalleeSavedArea2Size(unsigned s)
static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI)
We need the offset of the frame pointer relative to other MachineFrameInfo offsets which are encoded ...
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:727
unsigned getReturnRegsCount() const
static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI)
Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers starting from d8...
self_iterator getIterator()
Definition: ilist_node.h:82
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
iterator_range< pred_iterator > predecessors()
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1070
const ARMFrameLowering * getFrameLowering() const override
Definition: ARMSubtarget.h:494
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:194
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
iterator erase(const_iterator CI)
Definition: SmallVector.h:445
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
size_t size() const
Definition: SmallVector.h:53
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1063
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned first
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:972
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:497
int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override
getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.
unsigned getDPRCalleeSavedGapSize() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI)
Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers starting from d8...
#define R6(n)
void setGPRCalleeSavedArea1Size(unsigned s)
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call...
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:378
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
bool isTargetLinux() const
Definition: ARMSubtarget.h:644
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
static bool isARMArea2Register(unsigned Reg, bool isIOS)
bool isTargetAndroid() const
Definition: ARMSubtarget.h:691
Information about stack frame layout on the target.
static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, const TargetInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const unsigned Reg, const unsigned Alignment, const bool MustBeSingleInstruction)
Emit an instruction sequence that will align the address in register Reg by zero-ing out the lower bi...
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
void setOffsetAdjustment(int Adj)
Set the correction for frame offsets.
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
CodeModel::Model getCodeModel() const
Returns the code model.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
ARMFrameLowering(const ARMSubtarget &sti)
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
unsigned getReturnOpcode() const
Returns the correct return opcode for the current feature set.
Definition: ARMSubtarget.h:796
unsigned getArgumentStackSize() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
static unsigned estimateRSStackSizeLimit(MachineFunction &MF, const TargetFrameLowering *TFI)
estimateRSStackSizeLimit - Look at each instruction that references stack frames and return the stack...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
static bool isPopOpcode(int Opc)
bool isLiveIn(unsigned Reg) const
bool noFramePointerElim(const MachineFunction &MF) const override
Return true if the target needs to disable frame pointer elimination.
static bool isARMLowRegister(unsigned Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:161
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
uint16_t getEncodingValue(unsigned RegNo) const
Returns the encoding for RegNo.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
unsigned getGPRCalleeSavedArea2Size() const
void setGPRCalleeSavedArea1Offset(unsigned o)
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use...
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg...
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
bool isReturnAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
TargetOptions Options
Definition: TargetMachine.h:98
#define I(x, y, z)
Definition: MD5.cpp:58
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:498
virtual const TargetFrameLowering * getFrameLowering() const
void setDPRCalleeSavedAreaSize(unsigned s)
int getOffsetAdjustment() const
Return the correction for frame offsets.
uint32_t Size
Definition: Profile.cpp:47
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool hasV5TOps() const
Definition: ARMSubtarget.h:529
iterator_range< livein_iterator > liveins() const
static bool isARMArea3Register(unsigned Reg, bool isIOS)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
static bool WindowsRequiresStackProbe(const MachineFunction &MF, size_t StackSizeInBytes)
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:295
bool needsStackRealignment(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
LLVM Value Representation.
Definition: Value.h:73
static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, const ARMBaseInstrInfo &TII)
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
unsigned getFramePtrSpillOffset() const
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:331
static const unsigned FramePtr
bool isTargetWindows() const
Definition: ARMSubtarget.h:647
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
bool addRegisterKilled(unsigned IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
IRTranslator LLVM IR MI
static bool isCSRestore(MachineInstr &MI, const ARMBaseInstrInfo &TII, const MCPhysReg *CSRegs)
BitVector getPristineRegs(const MachineFunction &MF) const
Return a set of physical registers that are pristine.
const ARMSubtarget & STI
static void checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs)
#define LLVM_DEBUG(X)
Definition: Debug.h:123
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
unsigned getReg() const
bool hasTailCall() const
Returns true if the function contains a tail call.
void setObjectAlignment(int ObjectIdx, unsigned Align)
setObjectAlignment - Change the alignment of the specified stack object.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects...
unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one...
static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
bool isReserved(unsigned PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
This class contains meta information specific to a module.