LLVM  6.0.0svn
ARMFrameLowering.cpp
Go to the documentation of this file.
1 //===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the ARM implementation of TargetFrameLowering class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMFrameLowering.h"
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMBaseRegisterInfo.h"
17 #include "ARMConstantPoolValue.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMSubtarget.h"
22 #include "Utils/ARMBaseInfo.h"
23 #include "llvm/ADT/BitVector.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallVector.h"
41 #include "llvm/IR/Attributes.h"
42 #include "llvm/IR/CallingConv.h"
43 #include "llvm/IR/DebugLoc.h"
44 #include "llvm/IR/Function.h"
45 #include "llvm/MC/MCContext.h"
46 #include "llvm/MC/MCDwarf.h"
47 #include "llvm/MC/MCInstrDesc.h"
48 #include "llvm/MC/MCRegisterInfo.h"
49 #include "llvm/Support/CodeGen.h"
51 #include "llvm/Support/Compiler.h"
52 #include "llvm/Support/Debug.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <cstddef>
61 #include <cstdint>
62 #include <iterator>
63 #include <utility>
64 #include <vector>
65 
66 #define DEBUG_TYPE "arm-frame-lowering"
67 
68 using namespace llvm;
69 
70 static cl::opt<bool>
71 SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
72  cl::desc("Align ARM NEON spills in prolog and epilog"));
73 
76  unsigned NumAlignedDPRCS2Regs);
77 
79  : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
80  STI(sti) {}
81 
83  // iOS always has a FP for backtracking, force other targets to keep their FP
84  // when doing FastISel. The emitted code is currently superior, and in cases
85  // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
87  MF.getSubtarget<ARMSubtarget>().useFastISel();
88 }
89 
90 /// hasFP - Return true if the specified function should have a dedicated frame
91 /// pointer register. This is true if the function has variable sized allocas
92 /// or if frame pointer elimination is disabled.
94  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
95  const MachineFrameInfo &MFI = MF.getFrameInfo();
96 
97  // ABI-required frame pointer.
99  return true;
100 
101  // Frame pointer required for use within this function.
102  return (RegInfo->needsStackRealignment(MF) ||
103  MFI.hasVarSizedObjects() ||
104  MFI.isFrameAddressTaken());
105 }
106 
107 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
108 /// not required, we reserve argument space for call sites in the function
109 /// immediately on entry to the current function. This eliminates the need for
110 /// add/sub sp brackets around call sites. Returns true if the call frame is
111 /// included as part of the stack frame.
113  const MachineFrameInfo &MFI = MF.getFrameInfo();
114  unsigned CFSize = MFI.getMaxCallFrameSize();
115  // It's not always a good idea to include the call frame as part of the
116  // stack frame. ARM (especially Thumb) has small immediate offset to
117  // address the stack frame. So a large call frame can cause poor codegen
118  // and may even makes it impossible to scavenge a register.
119  if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
120  return false;
121 
122  return !MFI.hasVarSizedObjects();
123 }
124 
125 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the
126 /// call frame pseudos can be simplified. Unlike most targets, having a FP
127 /// is not sufficient here since we still may reference some objects via SP
128 /// even when FP is available in Thumb2 mode.
129 bool
132 }
133 
135  const MCPhysReg *CSRegs) {
136  // Integer spill area is handled with "pop".
137  if (isPopOpcode(MI.getOpcode())) {
138  // The first two operands are predicates. The last two are
139  // imp-def and imp-use of SP. Check everything in between.
140  for (int i = 5, e = MI.getNumOperands(); i != e; ++i)
141  if (!isCalleeSavedRegister(MI.getOperand(i).getReg(), CSRegs))
142  return false;
143  return true;
144  }
145  if ((MI.getOpcode() == ARM::LDR_POST_IMM ||
146  MI.getOpcode() == ARM::LDR_POST_REG ||
147  MI.getOpcode() == ARM::t2LDR_POST) &&
148  isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs) &&
149  MI.getOperand(1).getReg() == ARM::SP)
150  return true;
151 
152  return false;
153 }
154 
156  bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
157  const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
158  unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
159  ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
160  if (isARM)
161  emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
162  Pred, PredReg, TII, MIFlags);
163  else
164  emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
165  Pred, PredReg, TII, MIFlags);
166 }
167 
168 static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
169  MachineBasicBlock::iterator &MBBI, const DebugLoc &dl,
170  const ARMBaseInstrInfo &TII, int NumBytes,
171  unsigned MIFlags = MachineInstr::NoFlags,
173  unsigned PredReg = 0) {
174  emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
175  MIFlags, Pred, PredReg);
176 }
177 
178 static int sizeOfSPAdjustment(const MachineInstr &MI) {
179  int RegSize;
180  switch (MI.getOpcode()) {
181  case ARM::VSTMDDB_UPD:
182  RegSize = 8;
183  break;
184  case ARM::STMDB_UPD:
185  case ARM::t2STMDB_UPD:
186  RegSize = 4;
187  break;
188  case ARM::t2STR_PRE:
189  case ARM::STR_PRE_IMM:
190  return 4;
191  default:
192  llvm_unreachable("Unknown push or pop like instruction");
193  }
194 
195  int count = 0;
196  // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
197  // pred) so the list starts at 4.
198  for (int i = MI.getNumOperands() - 1; i >= 4; --i)
199  count += RegSize;
200  return count;
201 }
202 
204  size_t StackSizeInBytes) {
205  const MachineFrameInfo &MFI = MF.getFrameInfo();
206  const Function *F = MF.getFunction();
207  unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
208  if (F->hasFnAttribute("stack-probe-size"))
209  F->getFnAttribute("stack-probe-size")
211  .getAsInteger(0, StackProbeSize);
212  return StackSizeInBytes >= StackProbeSize;
213 }
214 
215 namespace {
216 
217 struct StackAdjustingInsts {
218  struct InstInfo {
220  unsigned SPAdjust;
221  bool BeforeFPSet;
222  };
223 
225 
226  void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
227  bool BeforeFPSet = false) {
228  InstInfo Info = {I, SPAdjust, BeforeFPSet};
229  Insts.push_back(Info);
230  }
231 
232  void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
233  auto Info =
234  llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
235  assert(Info != Insts.end() && "invalid sp adjusting instruction");
236  Info->SPAdjust += ExtraBytes;
237  }
238 
239  void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl,
240  const ARMBaseInstrInfo &TII, bool HasFP) {
241  MachineFunction &MF = *MBB.getParent();
242  unsigned CFAOffset = 0;
243  for (auto &Info : Insts) {
244  if (HasFP && !Info.BeforeFPSet)
245  return;
246 
247  CFAOffset -= Info.SPAdjust;
248  unsigned CFIIndex = MF.addFrameInst(
249  MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
250  BuildMI(MBB, std::next(Info.I), dl,
251  TII.get(TargetOpcode::CFI_INSTRUCTION))
252  .addCFIIndex(CFIIndex)
254  }
255  }
256 };
257 
258 } // end anonymous namespace
259 
260 /// Emit an instruction sequence that will align the address in
261 /// register Reg by zero-ing out the lower bits. For versions of the
262 /// architecture that support Neon, this must be done in a single
263 /// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
264 /// single instruction. That function only gets called when optimizing
265 /// spilling of D registers on a core with the Neon instruction set
266 /// present.
268  const TargetInstrInfo &TII,
269  MachineBasicBlock &MBB,
271  const DebugLoc &DL, const unsigned Reg,
272  const unsigned Alignment,
273  const bool MustBeSingleInstruction) {
274  const ARMSubtarget &AST =
275  static_cast<const ARMSubtarget &>(MF.getSubtarget());
276  const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
277  const unsigned AlignMask = Alignment - 1;
278  const unsigned NrBitsToZero = countTrailingZeros(Alignment);
279  assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
280  if (!AFI->isThumbFunction()) {
281  // if the BFC instruction is available, use that to zero the lower
282  // bits:
283  // bfc Reg, #0, log2(Alignment)
284  // otherwise use BIC, if the mask to zero the required number of bits
285  // can be encoded in the bic immediate field
286  // bic Reg, Reg, Alignment-1
287  // otherwise, emit
288  // lsr Reg, Reg, log2(Alignment)
289  // lsl Reg, Reg, log2(Alignment)
290  if (CanUseBFC) {
291  BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
292  .addReg(Reg, RegState::Kill)
293  .addImm(~AlignMask)
294  .add(predOps(ARMCC::AL));
295  } else if (AlignMask <= 255) {
296  BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
297  .addReg(Reg, RegState::Kill)
298  .addImm(AlignMask)
300  .add(condCodeOp());
301  } else {
302  assert(!MustBeSingleInstruction &&
303  "Shouldn't call emitAligningInstructions demanding a single "
304  "instruction to be emitted for large stack alignment for a target "
305  "without BFC.");
306  BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
307  .addReg(Reg, RegState::Kill)
308  .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
310  .add(condCodeOp());
311  BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
312  .addReg(Reg, RegState::Kill)
313  .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
315  .add(condCodeOp());
316  }
317  } else {
318  // Since this is only reached for Thumb-2 targets, the BFC instruction
319  // should always be available.
320  assert(CanUseBFC);
321  BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
322  .addReg(Reg, RegState::Kill)
323  .addImm(~AlignMask)
324  .add(predOps(ARMCC::AL));
325  }
326 }
327 
328 /// We need the offset of the frame pointer relative to other MachineFrameInfo
329 /// offsets which are encoded relative to SP at function begin.
330 /// See also emitPrologue() for how the FP is set up.
331 /// Unfortunately we cannot determine this value in determineCalleeSaves() yet
332 /// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
333 /// this to produce a conservative estimate that we check in an assert() later.
334 static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI) {
335  // This is a conservative estimation: Assume the frame pointer being r7 and
336  // pc("r15") up to r8 getting spilled before (= 8 registers).
337  return -AFI.getArgRegsSaveSize() - (8 * 4);
338 }
339 
341  MachineBasicBlock &MBB) const {
342  MachineBasicBlock::iterator MBBI = MBB.begin();
343  MachineFrameInfo &MFI = MF.getFrameInfo();
345  MachineModuleInfo &MMI = MF.getMMI();
346  MCContext &Context = MMI.getContext();
347  const TargetMachine &TM = MF.getTarget();
348  const MCRegisterInfo *MRI = Context.getRegisterInfo();
349  const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
350  const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
351  assert(!AFI->isThumb1OnlyFunction() &&
352  "This emitPrologue does not support Thumb1!");
353  bool isARM = !AFI->isThumbFunction();
355  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
356  unsigned NumBytes = MFI.getStackSize();
357  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
358 
359  // Debug location must be unknown since the first debug location is used
360  // to determine the end of the prologue.
361  DebugLoc dl;
362 
363  unsigned FramePtr = RegInfo->getFrameRegister(MF);
364 
365  // Determine the sizes of each callee-save spill areas and record which frame
366  // belongs to which callee-save spill areas.
367  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
368  int FramePtrSpillFI = 0;
369  int D8SpillFI = 0;
370 
371  // All calls are tail calls in GHC calling conv, and functions have no
372  // prologue/epilogue.
374  return;
375 
376  StackAdjustingInsts DefCFAOffsetCandidates;
377  bool HasFP = hasFP(MF);
378 
379  // Allocate the vararg register save area.
380  if (ArgRegsSaveSize) {
381  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
383  DefCFAOffsetCandidates.addInst(std::prev(MBBI), ArgRegsSaveSize, true);
384  }
385 
386  if (!AFI->hasStackFrame() &&
387  (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
388  if (NumBytes - ArgRegsSaveSize != 0) {
389  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize),
391  DefCFAOffsetCandidates.addInst(std::prev(MBBI),
392  NumBytes - ArgRegsSaveSize, true);
393  }
394  DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
395  return;
396  }
397 
398  // Determine spill area sizes.
399  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
400  unsigned Reg = CSI[i].getReg();
401  int FI = CSI[i].getFrameIdx();
402  switch (Reg) {
403  case ARM::R8:
404  case ARM::R9:
405  case ARM::R10:
406  case ARM::R11:
407  case ARM::R12:
408  if (STI.splitFramePushPop(MF)) {
409  GPRCS2Size += 4;
410  break;
411  }
413  case ARM::R0:
414  case ARM::R1:
415  case ARM::R2:
416  case ARM::R3:
417  case ARM::R4:
418  case ARM::R5:
419  case ARM::R6:
420  case ARM::R7:
421  case ARM::LR:
422  if (Reg == FramePtr)
423  FramePtrSpillFI = FI;
424  GPRCS1Size += 4;
425  break;
426  default:
427  // This is a DPR. Exclude the aligned DPRCS2 spills.
428  if (Reg == ARM::D8)
429  D8SpillFI = FI;
430  if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
431  DPRCSSize += 8;
432  }
433  }
434 
435  // Move past area 1.
436  MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
437  if (GPRCS1Size > 0) {
438  GPRCS1Push = LastPush = MBBI++;
439  DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
440  }
441 
442  // Determine starting offsets of spill areas.
443  unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size;
444  unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
445  unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U;
446  unsigned DPRGapSize = (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign;
447  unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
448  int FramePtrOffsetInPush = 0;
449  if (HasFP) {
450  int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
451  assert(getMaxFPOffset(*MF.getFunction(), *AFI) <= FPOffset &&
452  "Max FP estimation is wrong");
453  FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize;
454  AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
455  NumBytes);
456  }
457  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
458  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
459  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
460 
461  // Move past area 2.
462  if (GPRCS2Size > 0) {
463  GPRCS2Push = LastPush = MBBI++;
464  DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
465  }
466 
467  // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
468  // .cfi_offset operations will reflect that.
469  if (DPRGapSize) {
470  assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
471  if (LastPush != MBB.end() &&
472  tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
473  DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
474  else {
475  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
477  DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
478  }
479  }
480 
481  // Move past area 3.
482  if (DPRCSSize > 0) {
483  // Since vpush register list cannot have gaps, there may be multiple vpush
484  // instructions in the prologue.
485  while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
486  DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));
487  LastPush = MBBI++;
488  }
489  }
490 
491  // Move past the aligned DPRCS2 area.
492  if (AFI->getNumAlignedDPRCS2Regs() > 0) {
494  // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
495  // leaves the stack pointer pointing to the DPRCS2 area.
496  //
497  // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
498  NumBytes += MFI.getObjectOffset(D8SpillFI);
499  } else
500  NumBytes = DPRCSOffset;
501 
502  if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
503  uint32_t NumWords = NumBytes >> 2;
504 
505  if (NumWords < 65536)
506  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
507  .addImm(NumWords)
509  .add(predOps(ARMCC::AL));
510  else
511  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4)
512  .addImm(NumWords)
514 
515  switch (TM.getCodeModel()) {
516  case CodeModel::Small:
517  case CodeModel::Medium:
518  case CodeModel::Kernel:
519  BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
521  .addExternalSymbol("__chkstk")
522  .addReg(ARM::R4, RegState::Implicit)
523  .setMIFlags(MachineInstr::FrameSetup);
524  break;
525  case CodeModel::Large:
526  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
527  .addExternalSymbol("__chkstk")
529 
530  BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
532  .addReg(ARM::R12, RegState::Kill)
533  .addReg(ARM::R4, RegState::Implicit)
534  .setMIFlags(MachineInstr::FrameSetup);
535  break;
536  }
537 
538  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
539  .addReg(ARM::SP, RegState::Kill)
543  .add(condCodeOp());
544  NumBytes = 0;
545  }
546 
547  if (NumBytes) {
548  // Adjust SP after all the callee-save spills.
549  if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
550  tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes))
551  DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
552  else {
553  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
555  DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
556  }
557 
558  if (HasFP && isARM)
559  // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
560  // Note it's not safe to do this in Thumb2 mode because it would have
561  // taken two instructions:
562  // mov sp, r7
563  // sub sp, #24
564  // If an interrupt is taken between the two instructions, then sp is in
565  // an inconsistent state (pointing to the middle of callee-saved area).
566  // The interrupt handler can end up clobbering the registers.
567  AFI->setShouldRestoreSPFromFP(true);
568  }
569 
570  // Set FP to point to the stack slot that contains the previous FP.
571  // For iOS, FP is R7, which has now been stored in spill area 1.
572  // Otherwise, if this is not iOS, all the callee-saved registers go
573  // into spill area 1, including the FP in R11. In either case, it
574  // is in area one and the adjustment needs to take place just after
575  // that push.
576  if (HasFP) {
577  MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push);
578  unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
579  emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush,
580  dl, TII, FramePtr, ARM::SP,
581  PushSize + FramePtrOffsetInPush,
583  if (FramePtrOffsetInPush + PushSize != 0) {
584  unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
585  nullptr, MRI->getDwarfRegNum(FramePtr, true),
586  -(ArgRegsSaveSize - FramePtrOffsetInPush)));
587  BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
588  .addCFIIndex(CFIIndex)
590  } else {
591  unsigned CFIIndex =
593  nullptr, MRI->getDwarfRegNum(FramePtr, true)));
594  BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
595  .addCFIIndex(CFIIndex)
597  }
598  }
599 
600  // Now that the prologue's actual instructions are finalised, we can insert
601  // the necessary DWARF cf instructions to describe the situation. Start by
602  // recording where each register ended up:
603  if (GPRCS1Size > 0) {
604  MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
605  int CFIIndex;
606  for (const auto &Entry : CSI) {
607  unsigned Reg = Entry.getReg();
608  int FI = Entry.getFrameIdx();
609  switch (Reg) {
610  case ARM::R8:
611  case ARM::R9:
612  case ARM::R10:
613  case ARM::R11:
614  case ARM::R12:
615  if (STI.splitFramePushPop(MF))
616  break;
618  case ARM::R0:
619  case ARM::R1:
620  case ARM::R2:
621  case ARM::R3:
622  case ARM::R4:
623  case ARM::R5:
624  case ARM::R6:
625  case ARM::R7:
626  case ARM::LR:
628  nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
629  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
630  .addCFIIndex(CFIIndex)
632  break;
633  }
634  }
635  }
636 
637  if (GPRCS2Size > 0) {
638  MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
639  for (const auto &Entry : CSI) {
640  unsigned Reg = Entry.getReg();
641  int FI = Entry.getFrameIdx();
642  switch (Reg) {
643  case ARM::R8:
644  case ARM::R9:
645  case ARM::R10:
646  case ARM::R11:
647  case ARM::R12:
648  if (STI.splitFramePushPop(MF)) {
649  unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
650  unsigned Offset = MFI.getObjectOffset(FI);
651  unsigned CFIIndex = MF.addFrameInst(
652  MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
653  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
654  .addCFIIndex(CFIIndex)
656  }
657  break;
658  }
659  }
660  }
661 
662  if (DPRCSSize > 0) {
663  // Since vpush register list cannot have gaps, there may be multiple vpush
664  // instructions in the prologue.
665  MachineBasicBlock::iterator Pos = std::next(LastPush);
666  for (const auto &Entry : CSI) {
667  unsigned Reg = Entry.getReg();
668  int FI = Entry.getFrameIdx();
669  if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
670  (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
671  unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
672  unsigned Offset = MFI.getObjectOffset(FI);
673  unsigned CFIIndex = MF.addFrameInst(
674  MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
675  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
676  .addCFIIndex(CFIIndex)
678  }
679  }
680  }
681 
682  // Now we can emit descriptions of where the canonical frame address was
683  // throughout the process. If we have a frame pointer, it takes over the job
684  // half-way through, so only the first few .cfi_def_cfa_offset instructions
685  // actually get emitted.
686  DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
687 
688  if (STI.isTargetELF() && hasFP(MF))
690  AFI->getFramePtrSpillOffset());
691 
692  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
693  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
694  AFI->setDPRCalleeSavedGapSize(DPRGapSize);
695  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
696 
697  // If we need dynamic stack realignment, do it here. Be paranoid and make
698  // sure if we also have VLAs, we have a base pointer for frame access.
699  // If aligned NEON registers were spilled, the stack has already been
700  // realigned.
701  if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) {
702  unsigned MaxAlign = MFI.getMaxAlignment();
703  assert(!AFI->isThumb1OnlyFunction());
704  if (!AFI->isThumbFunction()) {
705  emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
706  false);
707  } else {
708  // We cannot use sp as source/dest register here, thus we're using r4 to
709  // perform the calculations. We're emitting the following sequence:
710  // mov r4, sp
711  // -- use emitAligningInstructions to produce best sequence to zero
712  // -- out lower bits in r4
713  // mov sp, r4
714  // FIXME: It will be better just to find spare register here.
715  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
716  .addReg(ARM::SP, RegState::Kill)
717  .add(predOps(ARMCC::AL));
718  emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
719  false);
720  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
721  .addReg(ARM::R4, RegState::Kill)
722  .add(predOps(ARMCC::AL));
723  }
724 
725  AFI->setShouldRestoreSPFromFP(true);
726  }
727 
728  // If we need a base pointer, set it up here. It's whatever the value
729  // of the stack pointer is at this point. Any variable size objects
730  // will be allocated after this, so we can still use the base pointer
731  // to reference locals.
732  // FIXME: Clarify FrameSetup flags here.
733  if (RegInfo->hasBasePointer(MF)) {
734  if (isARM)
735  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
736  .addReg(ARM::SP)
738  .add(condCodeOp());
739  else
740  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
741  .addReg(ARM::SP)
742  .add(predOps(ARMCC::AL));
743  }
744 
745  // If the frame has variable sized objects then the epilogue must restore
746  // the sp from fp. We can assume there's an FP here since hasFP already
747  // checks for hasVarSizedObjects.
748  if (MFI.hasVarSizedObjects())
749  AFI->setShouldRestoreSPFromFP(true);
750 }
751 
753  MachineBasicBlock &MBB) const {
754  MachineFrameInfo &MFI = MF.getFrameInfo();
756  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
757  const ARMBaseInstrInfo &TII =
758  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
759  assert(!AFI->isThumb1OnlyFunction() &&
760  "This emitEpilogue does not support Thumb1!");
761  bool isARM = !AFI->isThumbFunction();
762 
763  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
764  int NumBytes = (int)MFI.getStackSize();
765  unsigned FramePtr = RegInfo->getFrameRegister(MF);
766 
767  // All calls are tail calls in GHC calling conv, and functions have no
768  // prologue/epilogue.
770  return;
771 
772  // First put ourselves on the first (from top) terminator instructions.
774  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
775 
776  if (!AFI->hasStackFrame()) {
777  if (NumBytes - ArgRegsSaveSize != 0)
778  emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize);
779  } else {
780  // Unwind MBBI to point to first LDR / VLDRD.
781  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
782  if (MBBI != MBB.begin()) {
783  do {
784  --MBBI;
785  } while (MBBI != MBB.begin() && isCSRestore(*MBBI, TII, CSRegs));
786  if (!isCSRestore(*MBBI, TII, CSRegs))
787  ++MBBI;
788  }
789 
790  // Move SP to start of FP callee save spill area.
791  NumBytes -= (ArgRegsSaveSize +
794  AFI->getDPRCalleeSavedGapSize() +
796 
797  // Reset SP based on frame pointer only if the stack frame extends beyond
798  // frame pointer stack slot or target is ELF and the function has FP.
799  if (AFI->shouldRestoreSPFromFP()) {
800  NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
801  if (NumBytes) {
802  if (isARM)
803  emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
804  ARMCC::AL, 0, TII);
805  else {
806  // It's not possible to restore SP from FP in a single instruction.
807  // For iOS, this looks like:
808  // mov sp, r7
809  // sub sp, #24
810  // This is bad, if an interrupt is taken after the mov, sp is in an
811  // inconsistent state.
812  // Use the first callee-saved register as a scratch register.
813  assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
814  "No scratch register to restore SP from FP!");
815  emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
816  ARMCC::AL, 0, TII);
817  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
818  .addReg(ARM::R4)
819  .add(predOps(ARMCC::AL));
820  }
821  } else {
822  // Thumb2 or ARM.
823  if (isARM)
824  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
825  .addReg(FramePtr)
827  .add(condCodeOp());
828  else
829  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
830  .addReg(FramePtr)
831  .add(predOps(ARMCC::AL));
832  }
833  } else if (NumBytes &&
834  !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
835  emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
836 
837  // Increment past our save areas.
838  if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
839  MBBI++;
840  // Since vpop register list cannot have gaps, there may be multiple vpop
841  // instructions in the epilogue.
842  while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
843  MBBI++;
844  }
845  if (AFI->getDPRCalleeSavedGapSize()) {
846  assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
847  "unexpected DPR alignment gap");
848  emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize());
849  }
850 
851  if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
852  if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
853  }
854 
855  if (ArgRegsSaveSize)
856  emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
857 }
858 
859 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
860 /// debug info. It's the same as what we use for resolving the code-gen
861 /// references for now. FIXME: This can go wrong when references are
862 /// SP-relative and simple call frames aren't used.
863 int
865  unsigned &FrameReg) const {
866  return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
867 }
868 
869 int
871  int FI, unsigned &FrameReg,
872  int SPAdj) const {
873  const MachineFrameInfo &MFI = MF.getFrameInfo();
874  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
876  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
877  int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
878  int FPOffset = Offset - AFI->getFramePtrSpillOffset();
879  bool isFixed = MFI.isFixedObjectIndex(FI);
880 
881  FrameReg = ARM::SP;
882  Offset += SPAdj;
883 
884  // SP can move around if there are allocas. We may also lose track of SP
885  // when emergency spilling inside a non-reserved call frame setup.
886  bool hasMovingSP = !hasReservedCallFrame(MF);
887 
888  // When dynamically realigning the stack, use the frame pointer for
889  // parameters, and the stack/base pointer for locals.
890  if (RegInfo->needsStackRealignment(MF)) {
891  assert(hasFP(MF) && "dynamic stack realignment without a FP!");
892  if (isFixed) {
893  FrameReg = RegInfo->getFrameRegister(MF);
894  Offset = FPOffset;
895  } else if (hasMovingSP) {
896  assert(RegInfo->hasBasePointer(MF) &&
897  "VLAs and dynamic stack alignment, but missing base pointer!");
898  FrameReg = RegInfo->getBaseRegister();
899  }
900  return Offset;
901  }
902 
903  // If there is a frame pointer, use it when we can.
904  if (hasFP(MF) && AFI->hasStackFrame()) {
905  // Use frame pointer to reference fixed objects. Use it for locals if
906  // there are VLAs (and thus the SP isn't reliable as a base).
907  if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
908  FrameReg = RegInfo->getFrameRegister(MF);
909  return FPOffset;
910  } else if (hasMovingSP) {
911  assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
912  if (AFI->isThumb2Function()) {
913  // Try to use the frame pointer if we can, else use the base pointer
914  // since it's available. This is handy for the emergency spill slot, in
915  // particular.
916  if (FPOffset >= -255 && FPOffset < 0) {
917  FrameReg = RegInfo->getFrameRegister(MF);
918  return FPOffset;
919  }
920  }
921  } else if (AFI->isThumb2Function()) {
922  // Use add <rd>, sp, #<imm8>
923  // ldr <rd>, [sp, #<imm8>]
924  // if at all possible to save space.
925  if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
926  return Offset;
927  // In Thumb2 mode, the negative offset is very limited. Try to avoid
928  // out of range references. ldr <rt>,[<rn>, #-<imm8>]
929  if (FPOffset >= -255 && FPOffset < 0) {
930  FrameReg = RegInfo->getFrameRegister(MF);
931  return FPOffset;
932  }
933  } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
934  // Otherwise, use SP or FP, whichever is closer to the stack slot.
935  FrameReg = RegInfo->getFrameRegister(MF);
936  return FPOffset;
937  }
938  }
939  // Use the base pointer if we have one.
940  if (RegInfo->hasBasePointer(MF))
941  FrameReg = RegInfo->getBaseRegister();
942  return Offset;
943 }
944 
945 void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
947  const std::vector<CalleeSavedInfo> &CSI,
948  unsigned StmOpc, unsigned StrOpc,
949  bool NoGap,
950  bool(*Func)(unsigned, bool),
951  unsigned NumAlignedDPRCS2Regs,
952  unsigned MIFlags) const {
953  MachineFunction &MF = *MBB.getParent();
954  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
955  const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
956 
957  DebugLoc DL;
958 
959  using RegAndKill = std::pair<unsigned, bool>;
960 
962  unsigned i = CSI.size();
963  while (i != 0) {
964  unsigned LastReg = 0;
965  for (; i != 0; --i) {
966  unsigned Reg = CSI[i-1].getReg();
967  if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
968 
969  // D-registers in the aligned area DPRCS2 are NOT spilled here.
970  if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
971  continue;
972 
973  const MachineRegisterInfo &MRI = MF.getRegInfo();
974  bool isLiveIn = MRI.isLiveIn(Reg);
975  if (!isLiveIn && !MRI.isReserved(Reg))
976  MBB.addLiveIn(Reg);
977  // If NoGap is true, push consecutive registers and then leave the rest
978  // for other instructions. e.g.
979  // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
980  if (NoGap && LastReg && LastReg != Reg-1)
981  break;
982  LastReg = Reg;
983  // Do not set a kill flag on values that are also marked as live-in. This
984  // happens with the @llvm-returnaddress intrinsic and with arguments
985  // passed in callee saved registers.
986  // Omitting the kill flags is conservatively correct even if the live-in
987  // is not used after all.
988  Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn));
989  }
990 
991  if (Regs.empty())
992  continue;
993 
994  std::sort(Regs.begin(), Regs.end(), [&](const RegAndKill &LHS,
995  const RegAndKill &RHS) {
996  return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
997  });
998 
999  if (Regs.size() > 1 || StrOpc== 0) {
1000  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
1001  .addReg(ARM::SP)
1002  .setMIFlags(MIFlags)
1003  .add(predOps(ARMCC::AL));
1004  for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1005  MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
1006  } else if (Regs.size() == 1) {
1007  BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
1008  .addReg(Regs[0].first, getKillRegState(Regs[0].second))
1009  .addReg(ARM::SP)
1010  .setMIFlags(MIFlags)
1011  .addImm(-4)
1012  .add(predOps(ARMCC::AL));
1013  }
1014  Regs.clear();
1015 
1016  // Put any subsequent vpush instructions before this one: they will refer to
1017  // higher register numbers so need to be pushed first in order to preserve
1018  // monotonicity.
1019  if (MI != MBB.begin())
1020  --MI;
1021  }
1022 }
1023 
1024 void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1026  std::vector<CalleeSavedInfo> &CSI,
1027  unsigned LdmOpc, unsigned LdrOpc,
1028  bool isVarArg, bool NoGap,
1029  bool(*Func)(unsigned, bool),
1030  unsigned NumAlignedDPRCS2Regs) const {
1031  MachineFunction &MF = *MBB.getParent();
1032  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1033  const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1035  DebugLoc DL;
1036  bool isTailCall = false;
1037  bool isInterrupt = false;
1038  bool isTrap = false;
1039  if (MBB.end() != MI) {
1040  DL = MI->getDebugLoc();
1041  unsigned RetOpcode = MI->getOpcode();
1042  isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri);
1043  isInterrupt =
1044  RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1045  isTrap =
1046  RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
1047  RetOpcode == ARM::tTRAP;
1048  }
1049 
1051  unsigned i = CSI.size();
1052  while (i != 0) {
1053  unsigned LastReg = 0;
1054  bool DeleteRet = false;
1055  for (; i != 0; --i) {
1056  CalleeSavedInfo &Info = CSI[i-1];
1057  unsigned Reg = Info.getReg();
1058  if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1059 
1060  // The aligned reloads from area DPRCS2 are not inserted here.
1061  if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1062  continue;
1063 
1064  if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1065  !isTrap && STI.hasV5TOps()) {
1066  if (MBB.succ_empty()) {
1067  Reg = ARM::PC;
1068  DeleteRet = true;
1069  LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1070  // We 'restore' LR into PC so it is not live out of the return block:
1071  // Clear Restored bit.
1072  Info.setRestored(false);
1073  } else
1074  LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
1075  // Fold the return instruction into the LDM.
1076  }
1077 
1078  // If NoGap is true, pop consecutive registers and then leave the rest
1079  // for other instructions. e.g.
1080  // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1081  if (NoGap && LastReg && LastReg != Reg-1)
1082  break;
1083 
1084  LastReg = Reg;
1085  Regs.push_back(Reg);
1086  }
1087 
1088  if (Regs.empty())
1089  continue;
1090 
1091  std::sort(Regs.begin(), Regs.end(), [&](unsigned LHS, unsigned RHS) {
1092  return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
1093  });
1094 
1095  if (Regs.size() > 1 || LdrOpc == 0) {
1096  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
1097  .addReg(ARM::SP)
1098  .add(predOps(ARMCC::AL));
1099  for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1100  MIB.addReg(Regs[i], getDefRegState(true));
1101  if (DeleteRet) {
1102  if (MI != MBB.end()) {
1103  MIB.copyImplicitOps(*MI);
1104  MI->eraseFromParent();
1105  }
1106  }
1107  MI = MIB;
1108  } else if (Regs.size() == 1) {
1109  // If we adjusted the reg to PC from LR above, switch it back here. We
1110  // only do that for LDM.
1111  if (Regs[0] == ARM::PC)
1112  Regs[0] = ARM::LR;
1113  MachineInstrBuilder MIB =
1114  BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
1115  .addReg(ARM::SP, RegState::Define)
1116  .addReg(ARM::SP);
1117  // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1118  // that refactoring is complete (eventually).
1119  if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1120  MIB.addReg(0);
1122  } else
1123  MIB.addImm(4);
1124  MIB.add(predOps(ARMCC::AL));
1125  }
1126  Regs.clear();
1127 
1128  // Put any subsequent vpop instructions after this one: they will refer to
1129  // higher register numbers so need to be popped afterwards.
1130  if (MI != MBB.end())
1131  ++MI;
1132  }
1133 }
1134 
1135 /// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1136 /// starting from d8. Also insert stack realignment code and leave the stack
1137 /// pointer pointing to the d8 spill slot.
1140  unsigned NumAlignedDPRCS2Regs,
1141  const std::vector<CalleeSavedInfo> &CSI,
1142  const TargetRegisterInfo *TRI) {
1143  MachineFunction &MF = *MBB.getParent();
1145  DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1146  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1147  MachineFrameInfo &MFI = MF.getFrameInfo();
1148 
1149  // Mark the D-register spill slots as properly aligned. Since MFI computes
1150  // stack slot layout backwards, this can actually mean that the d-reg stack
1151  // slot offsets can be wrong. The offset for d8 will always be correct.
1152  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1153  unsigned DNum = CSI[i].getReg() - ARM::D8;
1154  if (DNum > NumAlignedDPRCS2Regs - 1)
1155  continue;
1156  int FI = CSI[i].getFrameIdx();
1157  // The even-numbered registers will be 16-byte aligned, the odd-numbered
1158  // registers will be 8-byte aligned.
1159  MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16);
1160 
1161  // The stack slot for D8 needs to be maximally aligned because this is
1162  // actually the point where we align the stack pointer. MachineFrameInfo
1163  // computes all offsets relative to the incoming stack pointer which is a
1164  // bit weird when realigning the stack. Any extra padding for this
1165  // over-alignment is not realized because the code inserted below adjusts
1166  // the stack pointer by numregs * 8 before aligning the stack pointer.
1167  if (DNum == 0)
1168  MFI.setObjectAlignment(FI, MFI.getMaxAlignment());
1169  }
1170 
1171  // Move the stack pointer to the d8 spill slot, and align it at the same
1172  // time. Leave the stack slot address in the scratch register r4.
1173  //
1174  // sub r4, sp, #numregs * 8
1175  // bic r4, r4, #align - 1
1176  // mov sp, r4
1177  //
1178  bool isThumb = AFI->isThumbFunction();
1179  assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1180  AFI->setShouldRestoreSPFromFP(true);
1181 
1182  // sub r4, sp, #numregs * 8
1183  // The immediate is <= 64, so it doesn't need any special encoding.
1184  unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1185  BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1186  .addReg(ARM::SP)
1187  .addImm(8 * NumAlignedDPRCS2Regs)
1188  .add(predOps(ARMCC::AL))
1189  .add(condCodeOp());
1190 
1191  unsigned MaxAlign = MF.getFrameInfo().getMaxAlignment();
1192  // We must set parameter MustBeSingleInstruction to true, since
1193  // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1194  // stack alignment. Luckily, this can always be done since all ARM
1195  // architecture versions that support Neon also support the BFC
1196  // instruction.
1197  emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
1198 
1199  // mov sp, r4
1200  // The stack pointer must be adjusted before spilling anything, otherwise
1201  // the stack slots could be clobbered by an interrupt handler.
1202  // Leave r4 live, it is used below.
1203  Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
1204  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
1205  .addReg(ARM::R4)
1206  .add(predOps(ARMCC::AL));
1207  if (!isThumb)
1208  MIB.add(condCodeOp());
1209 
1210  // Now spill NumAlignedDPRCS2Regs registers starting from d8.
1211  // r4 holds the stack slot address.
1212  unsigned NextReg = ARM::D8;
1213 
1214  // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
1215  // The writeback is only needed when emitting two vst1.64 instructions.
1216  if (NumAlignedDPRCS2Regs >= 6) {
1217  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1218  &ARM::QQPRRegClass);
1219  MBB.addLiveIn(SupReg);
1220  BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
1221  .addReg(ARM::R4, RegState::Kill)
1222  .addImm(16)
1223  .addReg(NextReg)
1224  .addReg(SupReg, RegState::ImplicitKill)
1225  .add(predOps(ARMCC::AL));
1226  NextReg += 4;
1227  NumAlignedDPRCS2Regs -= 4;
1228  }
1229 
1230  // We won't modify r4 beyond this point. It currently points to the next
1231  // register to be spilled.
1232  unsigned R4BaseReg = NextReg;
1233 
1234  // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
1235  if (NumAlignedDPRCS2Regs >= 4) {
1236  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1237  &ARM::QQPRRegClass);
1238  MBB.addLiveIn(SupReg);
1239  BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
1240  .addReg(ARM::R4)
1241  .addImm(16)
1242  .addReg(NextReg)
1243  .addReg(SupReg, RegState::ImplicitKill)
1244  .add(predOps(ARMCC::AL));
1245  NextReg += 4;
1246  NumAlignedDPRCS2Regs -= 4;
1247  }
1248 
1249  // 16-byte aligned vst1.64 with 2 d-regs.
1250  if (NumAlignedDPRCS2Regs >= 2) {
1251  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1252  &ARM::QPRRegClass);
1253  MBB.addLiveIn(SupReg);
1254  BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
1255  .addReg(ARM::R4)
1256  .addImm(16)
1257  .addReg(SupReg)
1258  .add(predOps(ARMCC::AL));
1259  NextReg += 2;
1260  NumAlignedDPRCS2Regs -= 2;
1261  }
1262 
1263  // Finally, use a vanilla vstr.64 for the odd last register.
1264  if (NumAlignedDPRCS2Regs) {
1265  MBB.addLiveIn(NextReg);
1266  // vstr.64 uses addrmode5 which has an offset scale of 4.
1267  BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
1268  .addReg(NextReg)
1269  .addReg(ARM::R4)
1270  .addImm((NextReg - R4BaseReg) * 2)
1271  .add(predOps(ARMCC::AL));
1272  }
1273 
1274  // The last spill instruction inserted should kill the scratch register r4.
1275  std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1276 }
1277 
1278 /// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
1279 /// iterator to the following instruction.
1282  unsigned NumAlignedDPRCS2Regs) {
1283  // sub r4, sp, #numregs * 8
1284  // bic r4, r4, #align - 1
1285  // mov sp, r4
1286  ++MI; ++MI; ++MI;
1287  assert(MI->mayStore() && "Expecting spill instruction");
1288 
1289  // These switches all fall through.
1290  switch(NumAlignedDPRCS2Regs) {
1291  case 7:
1292  ++MI;
1293  assert(MI->mayStore() && "Expecting spill instruction");
1295  default:
1296  ++MI;
1297  assert(MI->mayStore() && "Expecting spill instruction");
1299  case 1:
1300  case 2:
1301  case 4:
1302  assert(MI->killsRegister(ARM::R4) && "Missed kill flag");
1303  ++MI;
1304  }
1305  return MI;
1306 }
1307 
1308 /// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
1309 /// starting from d8. These instructions are assumed to execute while the
1310 /// stack is still aligned, unlike the code inserted by emitPopInst.
1313  unsigned NumAlignedDPRCS2Regs,
1314  const std::vector<CalleeSavedInfo> &CSI,
1315  const TargetRegisterInfo *TRI) {
1316  MachineFunction &MF = *MBB.getParent();
1318  DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1319  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1320 
1321  // Find the frame index assigned to d8.
1322  int D8SpillFI = 0;
1323  for (unsigned i = 0, e = CSI.size(); i != e; ++i)
1324  if (CSI[i].getReg() == ARM::D8) {
1325  D8SpillFI = CSI[i].getFrameIdx();
1326  break;
1327  }
1328 
1329  // Materialize the address of the d8 spill slot into the scratch register r4.
1330  // This can be fairly complicated if the stack frame is large, so just use
1331  // the normal frame index elimination mechanism to do it. This code runs as
1332  // the initial part of the epilog where the stack and base pointers haven't
1333  // been changed yet.
1334  bool isThumb = AFI->isThumbFunction();
1335  assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1336 
1337  unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
1338  BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1339  .addFrameIndex(D8SpillFI)
1340  .addImm(0)
1341  .add(predOps(ARMCC::AL))
1342  .add(condCodeOp());
1343 
1344  // Now restore NumAlignedDPRCS2Regs registers starting from d8.
1345  unsigned NextReg = ARM::D8;
1346 
1347  // 16-byte aligned vld1.64 with 4 d-regs and writeback.
1348  if (NumAlignedDPRCS2Regs >= 6) {
1349  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1350  &ARM::QQPRRegClass);
1351  BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
1352  .addReg(ARM::R4, RegState::Define)
1354  .addImm(16)
1356  .add(predOps(ARMCC::AL));
1357  NextReg += 4;
1358  NumAlignedDPRCS2Regs -= 4;
1359  }
1360 
1361  // We won't modify r4 beyond this point. It currently points to the next
1362  // register to be spilled.
1363  unsigned R4BaseReg = NextReg;
1364 
1365  // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
1366  if (NumAlignedDPRCS2Regs >= 4) {
1367  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1368  &ARM::QQPRRegClass);
1369  BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
1370  .addReg(ARM::R4)
1371  .addImm(16)
1373  .add(predOps(ARMCC::AL));
1374  NextReg += 4;
1375  NumAlignedDPRCS2Regs -= 4;
1376  }
1377 
1378  // 16-byte aligned vld1.64 with 2 d-regs.
1379  if (NumAlignedDPRCS2Regs >= 2) {
1380  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1381  &ARM::QPRRegClass);
1382  BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
1383  .addReg(ARM::R4)
1384  .addImm(16)
1385  .add(predOps(ARMCC::AL));
1386  NextReg += 2;
1387  NumAlignedDPRCS2Regs -= 2;
1388  }
1389 
1390  // Finally, use a vanilla vldr.64 for the remaining odd register.
1391  if (NumAlignedDPRCS2Regs)
1392  BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
1393  .addReg(ARM::R4)
1394  .addImm(2 * (NextReg - R4BaseReg))
1395  .add(predOps(ARMCC::AL));
1396 
1397  // Last store kills r4.
1398  std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1399 }
1400 
1403  const std::vector<CalleeSavedInfo> &CSI,
1404  const TargetRegisterInfo *TRI) const {
1405  if (CSI.empty())
1406  return false;
1407 
1408  MachineFunction &MF = *MBB.getParent();
1410 
1411  unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
1412  unsigned PushOneOpc = AFI->isThumbFunction() ?
1413  ARM::t2STR_PRE : ARM::STR_PRE_IMM;
1414  unsigned FltOpc = ARM::VSTMDDB_UPD;
1415  unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1416  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,
1418  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,
1420  emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
1421  NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
1422 
1423  // The code above does not insert spill code for the aligned DPRCS2 registers.
1424  // The stack realignment code will be inserted between the push instructions
1425  // and these spills.
1426  if (NumAlignedDPRCS2Regs)
1427  emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
1428 
1429  return true;
1430 }
1431 
1434  std::vector<CalleeSavedInfo> &CSI,
1435  const TargetRegisterInfo *TRI) const {
1436  if (CSI.empty())
1437  return false;
1438 
1439  MachineFunction &MF = *MBB.getParent();
1441  bool isVarArg = AFI->getArgRegsSaveSize() > 0;
1442  unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1443 
1444  // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
1445  // registers. Do that here instead.
1446  if (NumAlignedDPRCS2Regs)
1447  emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
1448 
1449  unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
1450  unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
1451  unsigned FltOpc = ARM::VLDMDIA_UPD;
1452  emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
1453  NumAlignedDPRCS2Regs);
1454  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1455  &isARMArea2Register, 0);
1456  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1457  &isARMArea1Register, 0);
1458 
1459  return true;
1460 }
1461 
1462 // FIXME: Make generic?
1463 static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
1464  const ARMBaseInstrInfo &TII) {
1465  unsigned FnSize = 0;
1466  for (auto &MBB : MF) {
1467  for (auto &MI : MBB)
1468  FnSize += TII.getInstSizeInBytes(MI);
1469  }
1470  return FnSize;
1471 }
1472 
1473 /// estimateRSStackSizeLimit - Look at each instruction that references stack
1474 /// frames and return the stack size limit beyond which some of these
1475 /// instructions will require a scratch register during their expansion later.
1476 // FIXME: Move to TII?
1478  const TargetFrameLowering *TFI) {
1479  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1480  unsigned Limit = (1 << 12) - 1;
1481  for (auto &MBB : MF) {
1482  for (auto &MI : MBB) {
1483  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1484  if (!MI.getOperand(i).isFI())
1485  continue;
1486 
1487  // When using ADDri to get the address of a stack object, 255 is the
1488  // largest offset guaranteed to fit in the immediate offset.
1489  if (MI.getOpcode() == ARM::ADDri) {
1490  Limit = std::min(Limit, (1U << 8) - 1);
1491  break;
1492  }
1493 
1494  // Otherwise check the addressing mode.
1495  switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
1496  case ARMII::AddrMode3:
1497  case ARMII::AddrModeT2_i8:
1498  Limit = std::min(Limit, (1U << 8) - 1);
1499  break;
1500  case ARMII::AddrMode5:
1502  Limit = std::min(Limit, ((1U << 8) - 1) * 4);
1503  break;
1504  case ARMII::AddrModeT2_i12:
1505  // i12 supports only positive offset so these will be converted to
1506  // i8 opcodes. See llvm::rewriteT2FrameIndex.
1507  if (TFI->hasFP(MF) && AFI->hasStackFrame())
1508  Limit = std::min(Limit, (1U << 8) - 1);
1509  break;
1510  case ARMII::AddrMode4:
1511  case ARMII::AddrMode6:
1512  // Addressing modes 4 & 6 (load/store) instructions can't encode an
1513  // immediate offset for stack references.
1514  return 0;
1515  default:
1516  break;
1517  }
1518  break; // At most one FI per instruction
1519  }
1520  }
1521  }
1522 
1523  return Limit;
1524 }
1525 
1526 // In functions that realign the stack, it can be an advantage to spill the
1527 // callee-saved vector registers after realigning the stack. The vst1 and vld1
1528 // instructions take alignment hints that can improve performance.
1529 static void
1531  MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
1532  if (!SpillAlignedNEONRegs)
1533  return;
1534 
1535  // Naked functions don't spill callee-saved registers.
1536  if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
1537  return;
1538 
1539  // We are planning to use NEON instructions vst1 / vld1.
1540  if (!static_cast<const ARMSubtarget &>(MF.getSubtarget()).hasNEON())
1541  return;
1542 
1543  // Don't bother if the default stack alignment is sufficiently high.
1544  if (MF.getSubtarget().getFrameLowering()->getStackAlignment() >= 8)
1545  return;
1546 
1547  // Aligned spills require stack realignment.
1548  if (!static_cast<const ARMBaseRegisterInfo *>(
1549  MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))
1550  return;
1551 
1552  // We always spill contiguous d-registers starting from d8. Count how many
1553  // needs spilling. The register allocator will almost always use the
1554  // callee-saved registers in order, but it can happen that there are holes in
1555  // the range. Registers above the hole will be spilled to the standard DPRCS
1556  // area.
1557  unsigned NumSpills = 0;
1558  for (; NumSpills < 8; ++NumSpills)
1559  if (!SavedRegs.test(ARM::D8 + NumSpills))
1560  break;
1561 
1562  // Don't do this for just one d-register. It's not worth it.
1563  if (NumSpills < 2)
1564  return;
1565 
1566  // Spill the first NumSpills D-registers after realigning the stack.
1567  MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
1568 
1569  // A scratch register is required for the vst1 / vld1 instructions.
1570  SavedRegs.set(ARM::R4);
1571 }
1572 
1574  BitVector &SavedRegs,
1575  RegScavenger *RS) const {
1576  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1577  // This tells PEI to spill the FP as if it is any other callee-save register
1578  // to take advantage the eliminateFrameIndex machinery. This also ensures it
1579  // is spilled in the order specified by getCalleeSavedRegs() to make it easier
1580  // to combine multiple loads / stores.
1581  bool CanEliminateFrame = true;
1582  bool CS1Spilled = false;
1583  bool LRSpilled = false;
1584  unsigned NumGPRSpills = 0;
1585  unsigned NumFPRSpills = 0;
1586  SmallVector<unsigned, 4> UnspilledCS1GPRs;
1587  SmallVector<unsigned, 4> UnspilledCS2GPRs;
1588  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1589  MF.getSubtarget().getRegisterInfo());
1590  const ARMBaseInstrInfo &TII =
1591  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1593  MachineFrameInfo &MFI = MF.getFrameInfo();
1595  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1596  (void)TRI; // Silence unused warning in non-assert builds.
1597  unsigned FramePtr = RegInfo->getFrameRegister(MF);
1598 
1599  // Spill R4 if Thumb2 function requires stack realignment - it will be used as
1600  // scratch register. Also spill R4 if Thumb2 function has varsized objects,
1601  // since it's not always possible to restore sp from fp in a single
1602  // instruction.
1603  // FIXME: It will be better just to find spare register here.
1604  if (AFI->isThumb2Function() &&
1605  (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
1606  SavedRegs.set(ARM::R4);
1607 
1608  if (AFI->isThumb1OnlyFunction()) {
1609  // Spill LR if Thumb1 function uses variable length argument lists.
1610  if (AFI->getArgRegsSaveSize() > 0)
1611  SavedRegs.set(ARM::LR);
1612 
1613  // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
1614  // requires stack alignment. We don't know for sure what the stack size
1615  // will be, but for this, an estimate is good enough. If there anything
1616  // changes it, it'll be a spill, which implies we've used all the registers
1617  // and so R4 is already used, so not marking it here will be OK.
1618  // FIXME: It will be better just to find spare register here.
1619  if (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF) ||
1620  MFI.estimateStackSize(MF) > 508)
1621  SavedRegs.set(ARM::R4);
1622  }
1623 
1624  // See if we can spill vector registers to aligned stack.
1625  checkNumAlignedDPRCS2Regs(MF, SavedRegs);
1626 
1627  // Spill the BasePtr if it's used.
1628  if (RegInfo->hasBasePointer(MF))
1629  SavedRegs.set(RegInfo->getBaseRegister());
1630 
1631  // Don't spill FP if the frame can be eliminated. This is determined
1632  // by scanning the callee-save registers to see if any is modified.
1633  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
1634  for (unsigned i = 0; CSRegs[i]; ++i) {
1635  unsigned Reg = CSRegs[i];
1636  bool Spilled = false;
1637  if (SavedRegs.test(Reg)) {
1638  Spilled = true;
1639  CanEliminateFrame = false;
1640  }
1641 
1642  if (!ARM::GPRRegClass.contains(Reg)) {
1643  if (Spilled) {
1644  if (ARM::SPRRegClass.contains(Reg))
1645  NumFPRSpills++;
1646  else if (ARM::DPRRegClass.contains(Reg))
1647  NumFPRSpills += 2;
1648  else if (ARM::QPRRegClass.contains(Reg))
1649  NumFPRSpills += 4;
1650  }
1651  continue;
1652  }
1653 
1654  if (Spilled) {
1655  NumGPRSpills++;
1656 
1657  if (!STI.splitFramePushPop(MF)) {
1658  if (Reg == ARM::LR)
1659  LRSpilled = true;
1660  CS1Spilled = true;
1661  continue;
1662  }
1663 
1664  // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
1665  switch (Reg) {
1666  case ARM::LR:
1667  LRSpilled = true;
1669  case ARM::R0: case ARM::R1:
1670  case ARM::R2: case ARM::R3:
1671  case ARM::R4: case ARM::R5:
1672  case ARM::R6: case ARM::R7:
1673  CS1Spilled = true;
1674  break;
1675  default:
1676  break;
1677  }
1678  } else {
1679  if (!STI.splitFramePushPop(MF)) {
1680  UnspilledCS1GPRs.push_back(Reg);
1681  continue;
1682  }
1683 
1684  switch (Reg) {
1685  case ARM::R0: case ARM::R1:
1686  case ARM::R2: case ARM::R3:
1687  case ARM::R4: case ARM::R5:
1688  case ARM::R6: case ARM::R7:
1689  case ARM::LR:
1690  UnspilledCS1GPRs.push_back(Reg);
1691  break;
1692  default:
1693  UnspilledCS2GPRs.push_back(Reg);
1694  break;
1695  }
1696  }
1697  }
1698 
1699  bool ForceLRSpill = false;
1700  if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
1701  unsigned FnSize = GetFunctionSizeInBytes(MF, TII);
1702  // Force LR to be spilled if the Thumb function size is > 2048. This enables
1703  // use of BL to implement far jump. If it turns out that it's not needed
1704  // then the branch fix up path will undo it.
1705  if (FnSize >= (1 << 11)) {
1706  CanEliminateFrame = false;
1707  ForceLRSpill = true;
1708  }
1709  }
1710 
1711  // If any of the stack slot references may be out of range of an immediate
1712  // offset, make sure a register (or a spill slot) is available for the
1713  // register scavenger. Note that if we're indexing off the frame pointer, the
1714  // effective stack size is 4 bytes larger since the FP points to the stack
1715  // slot of the previous FP. Also, if we have variable sized objects in the
1716  // function, stack slot references will often be negative, and some of
1717  // our instructions are positive-offset only, so conservatively consider
1718  // that case to want a spill slot (or register) as well. Similarly, if
1719  // the function adjusts the stack pointer during execution and the
1720  // adjustments aren't already part of our stack size estimate, our offset
1721  // calculations may be off, so be conservative.
1722  // FIXME: We could add logic to be more precise about negative offsets
1723  // and which instructions will need a scratch register for them. Is it
1724  // worth the effort and added fragility?
1725  unsigned EstimatedStackSize =
1726  MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
1727 
1728  // Determine biggest (positive) SP offset in MachineFrameInfo.
1729  int MaxFixedOffset = 0;
1730  for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
1731  int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
1732  MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
1733  }
1734 
1735  bool HasFP = hasFP(MF);
1736  if (HasFP) {
1737  if (AFI->hasStackFrame())
1738  EstimatedStackSize += 4;
1739  } else {
1740  // If FP is not used, SP will be used to access arguments, so count the
1741  // size of arguments into the estimation.
1742  EstimatedStackSize += MaxFixedOffset;
1743  }
1744  EstimatedStackSize += 16; // For possible paddings.
1745 
1746  unsigned EstimatedRSStackSizeLimit = estimateRSStackSizeLimit(MF, this);
1747  int MaxFPOffset = getMaxFPOffset(*MF.getFunction(), *AFI);
1748  bool BigFrameOffsets = EstimatedStackSize >= EstimatedRSStackSizeLimit ||
1749  MFI.hasVarSizedObjects() ||
1750  (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF)) ||
1751  // For large argument stacks fp relative addressed may overflow.
1752  (HasFP && (MaxFixedOffset - MaxFPOffset) >= (int)EstimatedRSStackSizeLimit);
1753  if (BigFrameOffsets ||
1754  !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
1755  AFI->setHasStackFrame(true);
1756 
1757  if (HasFP) {
1758  SavedRegs.set(FramePtr);
1759  // If the frame pointer is required by the ABI, also spill LR so that we
1760  // emit a complete frame record.
1761  if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) {
1762  SavedRegs.set(ARM::LR);
1763  LRSpilled = true;
1764  NumGPRSpills++;
1765  auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
1766  if (LRPos != UnspilledCS1GPRs.end())
1767  UnspilledCS1GPRs.erase(LRPos);
1768  }
1769  auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);
1770  if (FPPos != UnspilledCS1GPRs.end())
1771  UnspilledCS1GPRs.erase(FPPos);
1772  NumGPRSpills++;
1773  if (FramePtr == ARM::R7)
1774  CS1Spilled = true;
1775  }
1776 
1777  // This is true when we inserted a spill for an unused register that can now
1778  // be used for register scavenging.
1779  bool ExtraCSSpill = false;
1780 
1781  if (AFI->isThumb1OnlyFunction()) {
1782  // For Thumb1-only targets, we need some low registers when we save and
1783  // restore the high registers (which aren't allocatable, but could be
1784  // used by inline assembly) because the push/pop instructions can not
1785  // access high registers. If necessary, we might need to push more low
1786  // registers to ensure that there is at least one free that can be used
1787  // for the saving & restoring, and preferably we should ensure that as
1788  // many as are needed are available so that fewer push/pop instructions
1789  // are required.
1790 
1791  // Low registers which are not currently pushed, but could be (r4-r7).
1792  SmallVector<unsigned, 4> AvailableRegs;
1793 
1794  // Unused argument registers (r0-r3) can be clobbered in the prologue for
1795  // free.
1796  int EntryRegDeficit = 0;
1797  for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
1798  if (!MF.getRegInfo().isLiveIn(Reg)) {
1799  --EntryRegDeficit;
1800  DEBUG(dbgs() << PrintReg(Reg, TRI)
1801  << " is unused argument register, EntryRegDeficit = "
1802  << EntryRegDeficit << "\n");
1803  }
1804  }
1805 
1806  // Unused return registers can be clobbered in the epilogue for free.
1807  int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
1808  DEBUG(dbgs() << AFI->getReturnRegsCount()
1809  << " return regs used, ExitRegDeficit = " << ExitRegDeficit
1810  << "\n");
1811 
1812  int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);
1813  DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
1814 
1815  // r4-r6 can be used in the prologue if they are pushed by the first push
1816  // instruction.
1817  for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
1818  if (SavedRegs.test(Reg)) {
1819  --RegDeficit;
1820  DEBUG(dbgs() << PrintReg(Reg, TRI)
1821  << " is saved low register, RegDeficit = " << RegDeficit
1822  << "\n");
1823  } else {
1824  AvailableRegs.push_back(Reg);
1825  DEBUG(dbgs()
1826  << PrintReg(Reg, TRI)
1827  << " is non-saved low register, adding to AvailableRegs\n");
1828  }
1829  }
1830 
1831  // r7 can be used if it is not being used as the frame pointer.
1832  if (!HasFP) {
1833  if (SavedRegs.test(ARM::R7)) {
1834  --RegDeficit;
1835  DEBUG(dbgs() << "%R7 is saved low register, RegDeficit = "
1836  << RegDeficit << "\n");
1837  } else {
1838  AvailableRegs.push_back(ARM::R7);
1839  DEBUG(dbgs()
1840  << "%R7 is non-saved low register, adding to AvailableRegs\n");
1841  }
1842  }
1843 
1844  // Each of r8-r11 needs to be copied to a low register, then pushed.
1845  for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
1846  if (SavedRegs.test(Reg)) {
1847  ++RegDeficit;
1848  DEBUG(dbgs() << PrintReg(Reg, TRI)
1849  << " is saved high register, RegDeficit = " << RegDeficit
1850  << "\n");
1851  }
1852  }
1853 
1854  // LR can only be used by PUSH, not POP, and can't be used at all if the
1855  // llvm.returnaddress intrinsic is used. This is only worth doing if we
1856  // are more limited at function entry than exit.
1857  if ((EntryRegDeficit > ExitRegDeficit) &&
1858  !(MF.getRegInfo().isLiveIn(ARM::LR) &&
1860  if (SavedRegs.test(ARM::LR)) {
1861  --RegDeficit;
1862  DEBUG(dbgs() << "%LR is saved register, RegDeficit = " << RegDeficit
1863  << "\n");
1864  } else {
1865  AvailableRegs.push_back(ARM::LR);
1866  DEBUG(dbgs() << "%LR is not saved, adding to AvailableRegs\n");
1867  }
1868  }
1869 
1870  // If there are more high registers that need pushing than low registers
1871  // available, push some more low registers so that we can use fewer push
1872  // instructions. This might not reduce RegDeficit all the way to zero,
1873  // because we can only guarantee that r4-r6 are available, but r8-r11 may
1874  // need saving.
1875  DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
1876  for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
1877  unsigned Reg = AvailableRegs.pop_back_val();
1878  DEBUG(dbgs() << "Spilling " << PrintReg(Reg, TRI)
1879  << " to make up reg deficit\n");
1880  SavedRegs.set(Reg);
1881  NumGPRSpills++;
1882  CS1Spilled = true;
1883  assert(!MRI.isReserved(Reg) && "Should not be reserved");
1884  if (!MRI.isPhysRegUsed(Reg))
1885  ExtraCSSpill = true;
1886  UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
1887  if (Reg == ARM::LR)
1888  LRSpilled = true;
1889  }
1890  DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit << "\n");
1891  }
1892 
1893  // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
1894  // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
1895  if (!LRSpilled && CS1Spilled) {
1896  SavedRegs.set(ARM::LR);
1897  NumGPRSpills++;
1899  LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
1900  if (LRPos != UnspilledCS1GPRs.end())
1901  UnspilledCS1GPRs.erase(LRPos);
1902 
1903  ForceLRSpill = false;
1904  if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR))
1905  ExtraCSSpill = true;
1906  }
1907 
1908  // If stack and double are 8-byte aligned and we are spilling an odd number
1909  // of GPRs, spill one extra callee save GPR so we won't have to pad between
1910  // the integer and double callee save areas.
1911  DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
1912  unsigned TargetAlign = getStackAlignment();
1913  if (TargetAlign >= 8 && (NumGPRSpills & 1)) {
1914  if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
1915  for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
1916  unsigned Reg = UnspilledCS1GPRs[i];
1917  // Don't spill high register if the function is thumb. In the case of
1918  // Windows on ARM, accept R11 (frame pointer)
1919  if (!AFI->isThumbFunction() ||
1920  (STI.isTargetWindows() && Reg == ARM::R11) ||
1921  isARMLowRegister(Reg) || Reg == ARM::LR) {
1922  SavedRegs.set(Reg);
1923  DEBUG(dbgs() << "Spilling " << PrintReg(Reg, TRI)
1924  << " to make up alignment\n");
1925  if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
1926  ExtraCSSpill = true;
1927  break;
1928  }
1929  }
1930  } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
1931  unsigned Reg = UnspilledCS2GPRs.front();
1932  SavedRegs.set(Reg);
1933  DEBUG(dbgs() << "Spilling " << PrintReg(Reg, TRI)
1934  << " to make up alignment\n");
1935  if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
1936  ExtraCSSpill = true;
1937  }
1938  }
1939 
1940  // Estimate if we might need to scavenge a register at some point in order
1941  // to materialize a stack offset. If so, either spill one additional
1942  // callee-saved register or reserve a special spill slot to facilitate
1943  // register scavenging. Thumb1 needs a spill slot for stack pointer
1944  // adjustments also, even when the frame itself is small.
1945  if (BigFrameOffsets && !ExtraCSSpill) {
1946  // If any non-reserved CS register isn't spilled, just spill one or two
1947  // extra. That should take care of it!
1948  unsigned NumExtras = TargetAlign / 4;
1949  SmallVector<unsigned, 2> Extras;
1950  while (NumExtras && !UnspilledCS1GPRs.empty()) {
1951  unsigned Reg = UnspilledCS1GPRs.back();
1952  UnspilledCS1GPRs.pop_back();
1953  if (!MRI.isReserved(Reg) &&
1954  (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
1955  Reg == ARM::LR)) {
1956  Extras.push_back(Reg);
1957  NumExtras--;
1958  }
1959  }
1960  // For non-Thumb1 functions, also check for hi-reg CS registers
1961  if (!AFI->isThumb1OnlyFunction()) {
1962  while (NumExtras && !UnspilledCS2GPRs.empty()) {
1963  unsigned Reg = UnspilledCS2GPRs.back();
1964  UnspilledCS2GPRs.pop_back();
1965  if (!MRI.isReserved(Reg)) {
1966  Extras.push_back(Reg);
1967  NumExtras--;
1968  }
1969  }
1970  }
1971  if (NumExtras == 0) {
1972  for (unsigned Reg : Extras) {
1973  SavedRegs.set(Reg);
1974  if (!MRI.isPhysRegUsed(Reg))
1975  ExtraCSSpill = true;
1976  }
1977  }
1978  if (!ExtraCSSpill && !AFI->isThumb1OnlyFunction()) {
1979  // note: Thumb1 functions spill to R12, not the stack. Reserve a slot
1980  // closest to SP or frame pointer.
1981  assert(RS && "Register scavenging not provided");
1982  const TargetRegisterClass &RC = ARM::GPRRegClass;
1983  unsigned Size = TRI->getSpillSize(RC);
1984  unsigned Align = TRI->getSpillAlignment(RC);
1985  RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
1986  }
1987  }
1988  }
1989 
1990  if (ForceLRSpill) {
1991  SavedRegs.set(ARM::LR);
1992  AFI->setLRIsSpilledForFarJump(true);
1993  }
1994 }
1995 
1996 MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
1999  const ARMBaseInstrInfo &TII =
2000  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2001  if (!hasReservedCallFrame(MF)) {
2002  // If we have alloca, convert as follows:
2003  // ADJCALLSTACKDOWN -> sub, sp, sp, amount
2004  // ADJCALLSTACKUP -> add, sp, sp, amount
2005  MachineInstr &Old = *I;
2006  DebugLoc dl = Old.getDebugLoc();
2007  unsigned Amount = TII.getFrameSize(Old);
2008  if (Amount != 0) {
2009  // We need to keep the stack aligned properly. To do this, we round the
2010  // amount of space needed for the outgoing arguments up to the next
2011  // alignment boundary.
2012  Amount = alignSPAdjust(Amount);
2013 
2015  assert(!AFI->isThumb1OnlyFunction() &&
2016  "This eliminateCallFramePseudoInstr does not support Thumb1!");
2017  bool isARM = !AFI->isThumbFunction();
2018 
2019  // Replace the pseudo instruction with a new instruction...
2020  unsigned Opc = Old.getOpcode();
2021  int PIdx = Old.findFirstPredOperandIdx();
2022  ARMCC::CondCodes Pred =
2023  (PIdx == -1) ? ARMCC::AL
2024  : (ARMCC::CondCodes)Old.getOperand(PIdx).getImm();
2025  unsigned PredReg = TII.getFramePred(Old);
2026  if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
2027  emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
2028  Pred, PredReg);
2029  } else {
2030  assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
2031  emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
2032  Pred, PredReg);
2033  }
2034  }
2035  }
2036  return MBB.erase(I);
2037 }
2038 
2039 /// Get the minimum constant for ARM that is greater than or equal to the
2040 /// argument. In ARM, constants can have any value that can be produced by
2041 /// rotating an 8-bit value to the right by an even number of bits within a
2042 /// 32-bit word.
2044  unsigned Shifted = 0;
2045 
2046  if (Value == 0)
2047  return 0;
2048 
2049  while (!(Value & 0xC0000000)) {
2050  Value = Value << 2;
2051  Shifted += 2;
2052  }
2053 
2054  bool Carry = (Value & 0x00FFFFFF);
2055  Value = ((Value & 0xFF000000) >> 24) + Carry;
2056 
2057  if (Value & 0x0000100)
2058  Value = Value & 0x000001FC;
2059 
2060  if (Shifted > 24)
2061  Value = Value >> (Shifted - 24);
2062  else
2063  Value = Value << (24 - Shifted);
2064 
2065  return Value;
2066 }
2067 
2068 // The stack limit in the TCB is set to this many bytes above the actual
2069 // stack limit.
2070 static const uint64_t kSplitStackAvailable = 256;
2071 
2072 // Adjust the function prologue to enable split stacks. This currently only
2073 // supports android and linux.
2074 //
2075 // The ABI of the segmented stack prologue is a little arbitrarily chosen, but
2076 // must be well defined in order to allow for consistent implementations of the
2077 // __morestack helper function. The ABI is also not a normal ABI in that it
2078 // doesn't follow the normal calling conventions because this allows the
2079 // prologue of each function to be optimized further.
2080 //
2081 // Currently, the ABI looks like (when calling __morestack)
2082 //
2083 // * r4 holds the minimum stack size requested for this function call
2084 // * r5 holds the stack size of the arguments to the function
2085 // * the beginning of the function is 3 instructions after the call to
2086 // __morestack
2087 //
2088 // Implementations of __morestack should use r4 to allocate a new stack, r5 to
2089 // place the arguments on to the new stack, and the 3-instruction knowledge to
2090 // jump directly to the body of the function when working on the new stack.
2091 //
2092 // An old (and possibly no longer compatible) implementation of __morestack for
2093 // ARM can be found at [1].
2094 //
2095 // [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
2097  MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
2098  unsigned Opcode;
2099  unsigned CFIIndex;
2100  const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
2101  bool Thumb = ST->isThumb();
2102 
2103  // Sadly, this currently doesn't support varargs, platforms other than
2104  // android/linux. Note that thumb1/thumb2 are support for android/linux.
2105  if (MF.getFunction()->isVarArg())
2106  report_fatal_error("Segmented stacks do not support vararg functions.");
2107  if (!ST->isTargetAndroid() && !ST->isTargetLinux())
2108  report_fatal_error("Segmented stacks not supported on this platform.");
2109 
2110  MachineFrameInfo &MFI = MF.getFrameInfo();
2111  MachineModuleInfo &MMI = MF.getMMI();
2112  MCContext &Context = MMI.getContext();
2113  const MCRegisterInfo *MRI = Context.getRegisterInfo();
2114  const ARMBaseInstrInfo &TII =
2115  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2116  ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
2117  DebugLoc DL;
2118 
2119  uint64_t StackSize = MFI.getStackSize();
2120 
2121  // Do not generate a prologue for functions with a stack of size zero
2122  if (StackSize == 0)
2123  return;
2124 
2125  // Use R4 and R5 as scratch registers.
2126  // We save R4 and R5 before use and restore them before leaving the function.
2127  unsigned ScratchReg0 = ARM::R4;
2128  unsigned ScratchReg1 = ARM::R5;
2129  uint64_t AlignedStackSize;
2130 
2131  MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
2132  MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
2133  MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock();
2136 
2137  // Grab everything that reaches PrologueMBB to update there liveness as well.
2138  SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
2140  WalkList.push_back(&PrologueMBB);
2141 
2142  do {
2143  MachineBasicBlock *CurMBB = WalkList.pop_back_val();
2144  for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
2145  if (BeforePrologueRegion.insert(PredBB).second)
2146  WalkList.push_back(PredBB);
2147  }
2148  } while (!WalkList.empty());
2149 
2150  // The order in that list is important.
2151  // The blocks will all be inserted before PrologueMBB using that order.
2152  // Therefore the block that should appear first in the CFG should appear
2153  // first in the list.
2154  MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
2155  PostStackMBB};
2156 
2157  for (MachineBasicBlock *B : AddedBlocks)
2158  BeforePrologueRegion.insert(B);
2159 
2160  for (const auto &LI : PrologueMBB.liveins()) {
2161  for (MachineBasicBlock *PredBB : BeforePrologueRegion)
2162  PredBB->addLiveIn(LI);
2163  }
2164 
2165  // Remove the newly added blocks from the list, since we know
2166  // we do not have to do the following updates for them.
2167  for (MachineBasicBlock *B : AddedBlocks) {
2168  BeforePrologueRegion.erase(B);
2169  MF.insert(PrologueMBB.getIterator(), B);
2170  }
2171 
2172  for (MachineBasicBlock *MBB : BeforePrologueRegion) {
2173  // Make sure the LiveIns are still sorted and unique.
2174  MBB->sortUniqueLiveIns();
2175  // Replace the edges to PrologueMBB by edges to the sequences
2176  // we are about to add.
2177  MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
2178  }
2179 
2180  // The required stack size that is aligned to ARM constant criterion.
2181  AlignedStackSize = alignToARMConstant(StackSize);
2182 
2183  // When the frame size is less than 256 we just compare the stack
2184  // boundary directly to the value of the stack pointer, per gcc.
2185  bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
2186 
2187  // We will use two of the callee save registers as scratch registers so we
2188  // need to save those registers onto the stack.
2189  // We will use SR0 to hold stack limit and SR1 to hold the stack size
2190  // requested and arguments for __morestack().
2191  // SR0: Scratch Register #0
2192  // SR1: Scratch Register #1
2193  // push {SR0, SR1}
2194  if (Thumb) {
2195  BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
2196  .add(predOps(ARMCC::AL))
2197  .addReg(ScratchReg0)
2198  .addReg(ScratchReg1);
2199  } else {
2200  BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
2201  .addReg(ARM::SP, RegState::Define)
2202  .addReg(ARM::SP)
2203  .add(predOps(ARMCC::AL))
2204  .addReg(ScratchReg0)
2205  .addReg(ScratchReg1);
2206  }
2207 
2208  // Emit the relevant DWARF information about the change in stack pointer as
2209  // well as where to find both r4 and r5 (the callee-save registers)
2210  CFIIndex =
2212  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2213  .addCFIIndex(CFIIndex);
2215  nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
2216  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2217  .addCFIIndex(CFIIndex);
2219  nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
2220  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2221  .addCFIIndex(CFIIndex);
2222 
2223  // mov SR1, sp
2224  if (Thumb) {
2225  BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
2226  .addReg(ARM::SP)
2227  .add(predOps(ARMCC::AL));
2228  } else if (CompareStackPointer) {
2229  BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
2230  .addReg(ARM::SP)
2231  .add(predOps(ARMCC::AL))
2232  .add(condCodeOp());
2233  }
2234 
2235  // sub SR1, sp, #StackSize
2236  if (!CompareStackPointer && Thumb) {
2237  BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
2238  .add(condCodeOp())
2239  .addReg(ScratchReg1)
2240  .addImm(AlignedStackSize)
2241  .add(predOps(ARMCC::AL));
2242  } else if (!CompareStackPointer) {
2243  BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
2244  .addReg(ARM::SP)
2245  .addImm(AlignedStackSize)
2246  .add(predOps(ARMCC::AL))
2247  .add(condCodeOp());
2248  }
2249 
2250  if (Thumb && ST->isThumb1Only()) {
2251  unsigned PCLabelId = ARMFI->createPICLabelUId();
2253  MF.getFunction()->getContext(), "__STACK_LIMIT", PCLabelId, 0);
2255  unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4);
2256 
2257  // ldr SR0, [pc, offset(STACK_LIMIT)]
2258  BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
2259  .addConstantPoolIndex(CPI)
2260  .add(predOps(ARMCC::AL));
2261 
2262  // ldr SR0, [SR0]
2263  BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
2264  .addReg(ScratchReg0)
2265  .addImm(0)
2266  .add(predOps(ARMCC::AL));
2267  } else {
2268  // Get TLS base address from the coprocessor
2269  // mrc p15, #0, SR0, c13, c0, #3
2270  BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0)
2271  .addImm(15)
2272  .addImm(0)
2273  .addImm(13)
2274  .addImm(0)
2275  .addImm(3)
2276  .add(predOps(ARMCC::AL));
2277 
2278  // Use the last tls slot on android and a private field of the TCP on linux.
2279  assert(ST->isTargetAndroid() || ST->isTargetLinux());
2280  unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
2281 
2282  // Get the stack limit from the right offset
2283  // ldr SR0, [sr0, #4 * TlsOffset]
2284  BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0)
2285  .addReg(ScratchReg0)
2286  .addImm(4 * TlsOffset)
2287  .add(predOps(ARMCC::AL));
2288  }
2289 
2290  // Compare stack limit with stack size requested.
2291  // cmp SR0, SR1
2292  Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
2293  BuildMI(GetMBB, DL, TII.get(Opcode))
2294  .addReg(ScratchReg0)
2295  .addReg(ScratchReg1)
2296  .add(predOps(ARMCC::AL));
2297 
2298  // This jump is taken if StackLimit < SP - stack required.
2299  Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
2300  BuildMI(GetMBB, DL, TII.get(Opcode)).addMBB(PostStackMBB)
2301  .addImm(ARMCC::LO)
2302  .addReg(ARM::CPSR);
2303 
2304 
2305  // Calling __morestack(StackSize, Size of stack arguments).
2306  // __morestack knows that the stack size requested is in SR0(r4)
2307  // and amount size of stack arguments is in SR1(r5).
2308 
2309  // Pass first argument for the __morestack by Scratch Register #0.
2310  // The amount size of stack required
2311  if (Thumb) {
2312  BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
2313  .add(condCodeOp())
2314  .addImm(AlignedStackSize)
2315  .add(predOps(ARMCC::AL));
2316  } else {
2317  BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
2318  .addImm(AlignedStackSize)
2319  .add(predOps(ARMCC::AL))
2320  .add(condCodeOp());
2321  }
2322  // Pass second argument for the __morestack by Scratch Register #1.
2323  // The amount size of stack consumed to save function arguments.
2324  if (Thumb) {
2325  BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
2326  .add(condCodeOp())
2327  .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
2328  .add(predOps(ARMCC::AL));
2329  } else {
2330  BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
2331  .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
2332  .add(predOps(ARMCC::AL))
2333  .add(condCodeOp());
2334  }
2335 
2336  // push {lr} - Save return address of this function.
2337  if (Thumb) {
2338  BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
2339  .add(predOps(ARMCC::AL))
2340  .addReg(ARM::LR);
2341  } else {
2342  BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
2343  .addReg(ARM::SP, RegState::Define)
2344  .addReg(ARM::SP)
2345  .add(predOps(ARMCC::AL))
2346  .addReg(ARM::LR);
2347  }
2348 
2349  // Emit the DWARF info about the change in stack as well as where to find the
2350  // previous link register
2351  CFIIndex =
2353  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2354  .addCFIIndex(CFIIndex);
2356  nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
2357  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2358  .addCFIIndex(CFIIndex);
2359 
2360  // Call __morestack().
2361  if (Thumb) {
2362  BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
2363  .add(predOps(ARMCC::AL))
2364  .addExternalSymbol("__morestack");
2365  } else {
2366  BuildMI(AllocMBB, DL, TII.get(ARM::BL))
2367  .addExternalSymbol("__morestack");
2368  }
2369 
2370  // pop {lr} - Restore return address of this original function.
2371  if (Thumb) {
2372  if (ST->isThumb1Only()) {
2373  BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
2374  .add(predOps(ARMCC::AL))
2375  .addReg(ScratchReg0);
2376  BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
2377  .addReg(ScratchReg0)
2378  .add(predOps(ARMCC::AL));
2379  } else {
2380  BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
2381  .addReg(ARM::LR, RegState::Define)
2382  .addReg(ARM::SP, RegState::Define)
2383  .addReg(ARM::SP)
2384  .addImm(4)
2385  .add(predOps(ARMCC::AL));
2386  }
2387  } else {
2388  BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
2389  .addReg(ARM::SP, RegState::Define)
2390  .addReg(ARM::SP)
2391  .add(predOps(ARMCC::AL))
2392  .addReg(ARM::LR);
2393  }
2394 
2395  // Restore SR0 and SR1 in case of __morestack() was called.
2396  // __morestack() will skip PostStackMBB block so we need to restore
2397  // scratch registers from here.
2398  // pop {SR0, SR1}
2399  if (Thumb) {
2400  BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
2401  .add(predOps(ARMCC::AL))
2402  .addReg(ScratchReg0)
2403  .addReg(ScratchReg1);
2404  } else {
2405  BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
2406  .addReg(ARM::SP, RegState::Define)
2407  .addReg(ARM::SP)
2408  .add(predOps(ARMCC::AL))
2409  .addReg(ScratchReg0)
2410  .addReg(ScratchReg1);
2411  }
2412 
2413  // Update the CFA offset now that we've popped
2414  CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
2415  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2416  .addCFIIndex(CFIIndex);
2417 
2418  // Return from this function.
2419  BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
2420 
2421  // Restore SR0 and SR1 in case of __morestack() was not called.
2422  // pop {SR0, SR1}
2423  if (Thumb) {
2424  BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
2425  .add(predOps(ARMCC::AL))
2426  .addReg(ScratchReg0)
2427  .addReg(ScratchReg1);
2428  } else {
2429  BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
2430  .addReg(ARM::SP, RegState::Define)
2431  .addReg(ARM::SP)
2432  .add(predOps(ARMCC::AL))
2433  .addReg(ScratchReg0)
2434  .addReg(ScratchReg1);
2435  }
2436 
2437  // Update the CFA offset now that we've popped
2438  CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
2439  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2440  .addCFIIndex(CFIIndex);
2441 
2442  // Tell debuggers that r4 and r5 are now the same as they were in the
2443  // previous function, that they're the "Same Value".
2445  nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
2446  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2447  .addCFIIndex(CFIIndex);
2449  nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
2450  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2451  .addCFIIndex(CFIIndex);
2452 
2453  // Organizing MBB lists
2454  PostStackMBB->addSuccessor(&PrologueMBB);
2455 
2456  AllocMBB->addSuccessor(PostStackMBB);
2457 
2458  GetMBB->addSuccessor(PostStackMBB);
2459  GetMBB->addSuccessor(AllocMBB);
2460 
2461  McrMBB->addSuccessor(GetMBB);
2462 
2463  PrevStackMBB->addSuccessor(McrMBB);
2464 
2465 #ifdef EXPENSIVE_CHECKS
2466  MF.verify();
2467 #endif
2468 }
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:158
const MachineInstrBuilder & add(const MachineOperand &MO) const
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BitVector & set()
Definition: BitVector.h:398
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
#define R4(n)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
LLVMContext & Context
int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate...
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:115
bool isThumb() const
Definition: ARMSubtarget.h:672
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static cl::opt< bool > SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), cl::desc("Align ARM NEON spills in prolog and epilog"))
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:268
ARMConstantPoolValue - ARM specific constantpool value.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getReg() const
getReg - Returns the register number.
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset)
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:404
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override
Adjust the prologue to have the function use segmented stacks.
bool hasV7Ops() const
Definition: ARMSubtarget.h:504
bool test(unsigned Idx) const
Definition: BitVector.h:502
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:262
unsigned second
A debug info location.
Definition: DebugLoc.h:34
F(f)
MachineModuleInfo & getMMI() const
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
bool isThumb1Only() const
Definition: ARMSubtarget.h:673
#define R2(n)
void setGPRCalleeSavedArea2Offset(unsigned o)
static uint32_t alignToARMConstant(uint32_t Value)
Get the minimum constant for ARM that is greater than or equal to the argument.
static MCCFIInstruction createDefCfaOffset(MCSymbol *L, int Offset)
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:391
static bool isThumb(const MCSubtargetInfo &STI)
unsigned getSpillSize(const TargetRegisterClass &RC) const
Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...
return AArch64::GPR64RegClass contains(Reg)
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:455
static const uint64_t kSplitStackAvailable
bool isTargetELF() const
Definition: ARMSubtarget.h:611
unsigned getSpillAlignment(const TargetRegisterClass &RC) const
Return the minimum required alignment in bytes for a spill slot for a register of this class...
void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Given a machine basic block that branched to &#39;Old&#39;, change the code and CFG so that it branches to &#39;N...
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
unsigned getFramePred(const MachineInstr &MI) const
Returns predicate register associated with the given frame instruction.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:293
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
unsigned getFrameRegister(const MachineFunction &MF) const override
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required, we reserve argument space for call sites in the function immediately on entry to the current function.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
Reg
All possible values of the reg field in the ModR/M byte.
This file contains the simple types necessary to represent the attributes associated with functions a...
static int sizeOfSPAdjustment(const MachineInstr &MI)
virtual bool noFramePointerElim(const MachineFunction &MF) const
Return true if the target needs to disable frame pointer elimination.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:290
int getDwarfRegNum(unsigned RegNum, bool isEH) const
Map a target register to an equivalent dwarf register number.
void setDPRCalleeSavedAreaOffset(unsigned o)
LLVM_NODISCARD unsigned addFrameInst(const MCCFIInstruction &Inst)
unsigned getArgRegsSaveSize() const
void setFramePtrSpillOffset(unsigned o)
Context object for machine code objects.
Definition: MCContext.h:59
static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg, unsigned SrcReg, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
unsigned getDPRCalleeSavedAreaSize() const
bool splitFramePushPop(const MachineFunction &MF) const
Returns true if the frame setup is split into two separate pushes (first r0-r7,lr then r8-r11)...
Definition: ARMSubtarget.h:693
void setDPRCalleeSavedGapSize(unsigned s)
unsigned getNumAlignedDPRCS2Regs() const
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
const MCContext & getContext() const
const RegList & Regs
int alignSPAdjust(int SPAdj) const
alignSPAdjust - This method aligns the stack adjustment to the correct alignment. ...
bool hasV6T2Ops() const
Definition: ARMSubtarget.h:503
virtual const TargetInstrInfo * getInstrInfo() const
static bool isARMArea1Register(unsigned Reg, bool isIOS)
isARMArea1Register - Returns true if the register is a low register (r0-r7) or a stack/pc register th...
Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubRegIdx=0)
Prints virtual and physical registers with or without a TRI instance.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:874
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
TargetInstrInfo - Interface to description of machine instruction set.
unsigned getDefRegState(bool B)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static MachineBasicBlock::iterator skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs)
Skip past the code inserted by emitAlignedDPRCS2Spills, and return an iterator to the following instr...
static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register)
.cfi_same_value Current value of Register is the same as in the previous frame.
Definition: MCDwarf.h:444
static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs)
int ResolveFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg, int SPAdj) const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
This file declares the machine register scavenger class.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:112
static MCCFIInstruction createDefCfa(MCSymbol *L, unsigned Register, int Offset)
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it...
Definition: MCDwarf.h:377
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool cannotEliminateFrame(const MachineFunction &MF) const
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register)
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:384
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Returns the size of the specified MachineInstr.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
unsigned getGPRCalleeSavedArea1Size() const
int getStackProtectorIndex() const
Return the index for the stack protector object.
unsigned getMaxAlignment() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
bool hasBasePointer(const MachineFunction &MF) const
void setGPRCalleeSavedArea2Size(unsigned s)
static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI)
We need the offset of the frame pointer relative to other MachineFrameInfo offsets which are encoded ...
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:734
unsigned getReturnRegsCount() const
static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI)
Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers starting from d8...
self_iterator getIterator()
Definition: ilist_node.h:82
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
iterator_range< pred_iterator > predecessors()
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:841
const ARMFrameLowering * getFrameLowering() const override
Definition: ARMSubtarget.h:463
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:194
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
iterator erase(const_iterator CI)
Definition: SmallVector.h:449
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:834
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned first
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:497
int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override
getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.
unsigned getDPRCalleeSavedGapSize() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI)
Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers starting from d8...
#define R6(n)
void setGPRCalleeSavedArea1Size(unsigned s)
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call...
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:378
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:194
bool isTargetLinux() const
Definition: ARMSubtarget.h:605
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
static bool isARMArea2Register(unsigned Reg, bool isIOS)
bool isTargetAndroid() const
Definition: ARMSubtarget.h:658
Information about stack frame layout on the target.
static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, const TargetInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const unsigned Reg, const unsigned Alignment, const bool MustBeSingleInstruction)
Emit an instruction sequence that will align the address in register Reg by zero-ing out the lower bi...
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:385
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
void setOffsetAdjustment(int Adj)
Set the correction for frame offsets.
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
int64_t getImm() const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
CodeModel::Model getCodeModel() const
Returns the code model.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
ARMFrameLowering(const ARMSubtarget &sti)
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
unsigned getReturnOpcode() const
Returns the correct return opcode for the current feature set.
Definition: ARMSubtarget.h:763
unsigned getArgumentStackSize() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
static unsigned estimateRSStackSizeLimit(MachineFunction &MF, const TargetFrameLowering *TFI)
estimateRSStackSizeLimit - Look at each instruction that references stack frames and return the stack...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
static bool isPopOpcode(int Opc)
bool isLiveIn(unsigned Reg) const
bool noFramePointerElim(const MachineFunction &MF) const override
Return true if the target needs to disable frame pointer elimination.
static bool isARMLowRegister(unsigned Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:147
Representation of each machine instruction.
Definition: MachineInstr.h:59
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
uint16_t getEncodingValue(unsigned RegNo) const
Returns the encoding for RegNo.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
unsigned getGPRCalleeSavedArea2Size() const
void setGPRCalleeSavedArea1Offset(unsigned o)
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use...
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg...
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
bool isReturnAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
TargetOptions Options
Definition: TargetMachine.h:96
#define I(x, y, z)
Definition: MD5.cpp:58
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:467
virtual const TargetFrameLowering * getFrameLowering() const
void setDPRCalleeSavedAreaSize(unsigned s)
int getOffsetAdjustment() const
Return the correction for frame offsets.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool hasV5TOps() const
Definition: ARMSubtarget.h:498
iterator_range< livein_iterator > liveins() const
static bool isARMArea3Register(unsigned Reg, bool isIOS)
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
static bool WindowsRequiresStackProbe(const MachineFunction &MF, size_t StackSizeInBytes)
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:285
bool needsStackRealignment(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
LLVM Value Representation.
Definition: Value.h:73
static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, const ARMBaseInstrInfo &TII)
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
unsigned getFramePtrSpillOffset() const
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:270
static const unsigned FramePtr
#define DEBUG(X)
Definition: Debug.h:118
bool isTargetWindows() const
Definition: ARMSubtarget.h:608
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:57
bool addRegisterKilled(unsigned IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
IRTranslator LLVM IR MI
static bool isCSRestore(MachineInstr &MI, const ARMBaseInstrInfo &TII, const MCPhysReg *CSRegs)
BitVector getPristineRegs(const MachineFunction &MF) const
Return a set of physical registers that are pristine.
const ARMSubtarget & STI
void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End, const Comparator &Comp=Comparator())
Definition: Parallel.h:199
static void checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs)
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:295
unsigned getReg() const
void setObjectAlignment(int ObjectIdx, unsigned Align)
setObjectAlignment - Change the alignment of the specified stack object.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects...
unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one...
static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
bool isReserved(unsigned PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
This class contains meta information specific to a module.