LLVM  9.0.0svn
ARMFrameLowering.cpp
Go to the documentation of this file.
1 //===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the ARM implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "ARMFrameLowering.h"
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMBaseRegisterInfo.h"
16 #include "ARMConstantPoolValue.h"
17 #include "ARMMachineFunctionInfo.h"
18 #include "ARMSubtarget.h"
21 #include "Utils/ARMBaseInfo.h"
22 #include "llvm/ADT/BitVector.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/SmallVector.h"
41 #include "llvm/IR/Attributes.h"
42 #include "llvm/IR/CallingConv.h"
43 #include "llvm/IR/DebugLoc.h"
44 #include "llvm/IR/Function.h"
45 #include "llvm/MC/MCContext.h"
46 #include "llvm/MC/MCDwarf.h"
47 #include "llvm/MC/MCInstrDesc.h"
48 #include "llvm/MC/MCRegisterInfo.h"
49 #include "llvm/Support/CodeGen.h"
51 #include "llvm/Support/Compiler.h"
52 #include "llvm/Support/Debug.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <cstddef>
61 #include <cstdint>
62 #include <iterator>
63 #include <utility>
64 #include <vector>
65 
66 #define DEBUG_TYPE "arm-frame-lowering"
67 
68 using namespace llvm;
69 
70 static cl::opt<bool>
71 SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
72  cl::desc("Align ARM NEON spills in prolog and epilog"));
73 
76  unsigned NumAlignedDPRCS2Regs);
77 
79  : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
80  STI(sti) {}
81 
83  // iOS always has a FP for backtracking, force other targets to keep their FP
84  // when doing FastISel. The emitted code is currently superior, and in cases
85  // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
86  return MF.getSubtarget<ARMSubtarget>().useFastISel();
87 }
88 
89 /// Returns true if the target can safely skip saving callee-saved registers
90 /// for noreturn nounwind functions.
92  assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
93  MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
94  !MF.getFunction().hasFnAttribute(Attribute::UWTable));
95 
96  // Frame pointer and link register are not treated as normal CSR, thus we
97  // can always skip CSR saves for nonreturning functions.
98  return true;
99 }
100 
101 /// hasFP - Return true if the specified function should have a dedicated frame
102 /// pointer register. This is true if the function has variable sized allocas
103 /// or if frame pointer elimination is disabled.
105  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
106  const MachineFrameInfo &MFI = MF.getFrameInfo();
107 
108  // ABI-required frame pointer.
110  return true;
111 
112  // Frame pointer required for use within this function.
113  return (RegInfo->needsStackRealignment(MF) ||
114  MFI.hasVarSizedObjects() ||
115  MFI.isFrameAddressTaken());
116 }
117 
118 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
119 /// not required, we reserve argument space for call sites in the function
120 /// immediately on entry to the current function. This eliminates the need for
121 /// add/sub sp brackets around call sites. Returns true if the call frame is
122 /// included as part of the stack frame.
124  const MachineFrameInfo &MFI = MF.getFrameInfo();
125  unsigned CFSize = MFI.getMaxCallFrameSize();
126  // It's not always a good idea to include the call frame as part of the
127  // stack frame. ARM (especially Thumb) has small immediate offset to
128  // address the stack frame. So a large call frame can cause poor codegen
129  // and may even makes it impossible to scavenge a register.
130  if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
131  return false;
132 
133  return !MFI.hasVarSizedObjects();
134 }
135 
136 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the
137 /// call frame pseudos can be simplified. Unlike most targets, having a FP
138 /// is not sufficient here since we still may reference some objects via SP
139 /// even when FP is available in Thumb2 mode.
140 bool
143 }
144 
146  const MCPhysReg *CSRegs) {
147  // Integer spill area is handled with "pop".
148  if (isPopOpcode(MI.getOpcode())) {
149  // The first two operands are predicates. The last two are
150  // imp-def and imp-use of SP. Check everything in between.
151  for (int i = 5, e = MI.getNumOperands(); i != e; ++i)
152  if (!isCalleeSavedRegister(MI.getOperand(i).getReg(), CSRegs))
153  return false;
154  return true;
155  }
156  if ((MI.getOpcode() == ARM::LDR_POST_IMM ||
157  MI.getOpcode() == ARM::LDR_POST_REG ||
158  MI.getOpcode() == ARM::t2LDR_POST) &&
159  isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs) &&
160  MI.getOperand(1).getReg() == ARM::SP)
161  return true;
162 
163  return false;
164 }
165 
167  bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
168  const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
169  unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
170  ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
171  if (isARM)
172  emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
173  Pred, PredReg, TII, MIFlags);
174  else
175  emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
176  Pred, PredReg, TII, MIFlags);
177 }
178 
179 static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
180  MachineBasicBlock::iterator &MBBI, const DebugLoc &dl,
181  const ARMBaseInstrInfo &TII, int NumBytes,
182  unsigned MIFlags = MachineInstr::NoFlags,
184  unsigned PredReg = 0) {
185  emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
186  MIFlags, Pred, PredReg);
187 }
188 
189 static int sizeOfSPAdjustment(const MachineInstr &MI) {
190  int RegSize;
191  switch (MI.getOpcode()) {
192  case ARM::VSTMDDB_UPD:
193  RegSize = 8;
194  break;
195  case ARM::STMDB_UPD:
196  case ARM::t2STMDB_UPD:
197  RegSize = 4;
198  break;
199  case ARM::t2STR_PRE:
200  case ARM::STR_PRE_IMM:
201  return 4;
202  default:
203  llvm_unreachable("Unknown push or pop like instruction");
204  }
205 
206  int count = 0;
207  // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
208  // pred) so the list starts at 4.
209  for (int i = MI.getNumOperands() - 1; i >= 4; --i)
210  count += RegSize;
211  return count;
212 }
213 
215  size_t StackSizeInBytes) {
216  const MachineFrameInfo &MFI = MF.getFrameInfo();
217  const Function &F = MF.getFunction();
218  unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
219  if (F.hasFnAttribute("stack-probe-size"))
220  F.getFnAttribute("stack-probe-size")
222  .getAsInteger(0, StackProbeSize);
223  return (StackSizeInBytes >= StackProbeSize) &&
224  !F.hasFnAttribute("no-stack-arg-probe");
225 }
226 
227 namespace {
228 
229 struct StackAdjustingInsts {
230  struct InstInfo {
232  unsigned SPAdjust;
233  bool BeforeFPSet;
234  };
235 
237 
238  void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
239  bool BeforeFPSet = false) {
240  InstInfo Info = {I, SPAdjust, BeforeFPSet};
241  Insts.push_back(Info);
242  }
243 
244  void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
245  auto Info =
246  llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
247  assert(Info != Insts.end() && "invalid sp adjusting instruction");
248  Info->SPAdjust += ExtraBytes;
249  }
250 
251  void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl,
252  const ARMBaseInstrInfo &TII, bool HasFP) {
253  MachineFunction &MF = *MBB.getParent();
254  unsigned CFAOffset = 0;
255  for (auto &Info : Insts) {
256  if (HasFP && !Info.BeforeFPSet)
257  return;
258 
259  CFAOffset -= Info.SPAdjust;
260  unsigned CFIIndex = MF.addFrameInst(
261  MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
262  BuildMI(MBB, std::next(Info.I), dl,
263  TII.get(TargetOpcode::CFI_INSTRUCTION))
264  .addCFIIndex(CFIIndex)
266  }
267  }
268 };
269 
270 } // end anonymous namespace
271 
272 /// Emit an instruction sequence that will align the address in
273 /// register Reg by zero-ing out the lower bits. For versions of the
274 /// architecture that support Neon, this must be done in a single
275 /// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
276 /// single instruction. That function only gets called when optimizing
277 /// spilling of D registers on a core with the Neon instruction set
278 /// present.
280  const TargetInstrInfo &TII,
281  MachineBasicBlock &MBB,
283  const DebugLoc &DL, const unsigned Reg,
284  const unsigned Alignment,
285  const bool MustBeSingleInstruction) {
286  const ARMSubtarget &AST =
287  static_cast<const ARMSubtarget &>(MF.getSubtarget());
288  const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
289  const unsigned AlignMask = Alignment - 1;
290  const unsigned NrBitsToZero = countTrailingZeros(Alignment);
291  assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
292  if (!AFI->isThumbFunction()) {
293  // if the BFC instruction is available, use that to zero the lower
294  // bits:
295  // bfc Reg, #0, log2(Alignment)
296  // otherwise use BIC, if the mask to zero the required number of bits
297  // can be encoded in the bic immediate field
298  // bic Reg, Reg, Alignment-1
299  // otherwise, emit
300  // lsr Reg, Reg, log2(Alignment)
301  // lsl Reg, Reg, log2(Alignment)
302  if (CanUseBFC) {
303  BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
304  .addReg(Reg, RegState::Kill)
305  .addImm(~AlignMask)
306  .add(predOps(ARMCC::AL));
307  } else if (AlignMask <= 255) {
308  BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
309  .addReg(Reg, RegState::Kill)
310  .addImm(AlignMask)
312  .add(condCodeOp());
313  } else {
314  assert(!MustBeSingleInstruction &&
315  "Shouldn't call emitAligningInstructions demanding a single "
316  "instruction to be emitted for large stack alignment for a target "
317  "without BFC.");
318  BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
319  .addReg(Reg, RegState::Kill)
320  .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
322  .add(condCodeOp());
323  BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
324  .addReg(Reg, RegState::Kill)
325  .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
327  .add(condCodeOp());
328  }
329  } else {
330  // Since this is only reached for Thumb-2 targets, the BFC instruction
331  // should always be available.
332  assert(CanUseBFC);
333  BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
334  .addReg(Reg, RegState::Kill)
335  .addImm(~AlignMask)
336  .add(predOps(ARMCC::AL));
337  }
338 }
339 
340 /// We need the offset of the frame pointer relative to other MachineFrameInfo
341 /// offsets which are encoded relative to SP at function begin.
342 /// See also emitPrologue() for how the FP is set up.
343 /// Unfortunately we cannot determine this value in determineCalleeSaves() yet
344 /// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
345 /// this to produce a conservative estimate that we check in an assert() later.
346 static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI) {
347  // For Thumb1, push.w isn't available, so the first push will always push
348  // r7 and lr onto the stack first.
349  if (AFI.isThumb1OnlyFunction())
350  return -AFI.getArgRegsSaveSize() - (2 * 4);
351  // This is a conservative estimation: Assume the frame pointer being r7 and
352  // pc("r15") up to r8 getting spilled before (= 8 registers).
353  return -AFI.getArgRegsSaveSize() - (8 * 4);
354 }
355 
357  MachineBasicBlock &MBB) const {
358  MachineBasicBlock::iterator MBBI = MBB.begin();
359  MachineFrameInfo &MFI = MF.getFrameInfo();
361  MachineModuleInfo &MMI = MF.getMMI();
362  MCContext &Context = MMI.getContext();
363  const TargetMachine &TM = MF.getTarget();
364  const MCRegisterInfo *MRI = Context.getRegisterInfo();
365  const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
366  const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
367  assert(!AFI->isThumb1OnlyFunction() &&
368  "This emitPrologue does not support Thumb1!");
369  bool isARM = !AFI->isThumbFunction();
371  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
372  unsigned NumBytes = MFI.getStackSize();
373  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
374 
375  // Debug location must be unknown since the first debug location is used
376  // to determine the end of the prologue.
377  DebugLoc dl;
378 
379  unsigned FramePtr = RegInfo->getFrameRegister(MF);
380 
381  // Determine the sizes of each callee-save spill areas and record which frame
382  // belongs to which callee-save spill areas.
383  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
384  int FramePtrSpillFI = 0;
385  int D8SpillFI = 0;
386 
387  // All calls are tail calls in GHC calling conv, and functions have no
388  // prologue/epilogue.
390  return;
391 
392  StackAdjustingInsts DefCFAOffsetCandidates;
393  bool HasFP = hasFP(MF);
394 
395  // Allocate the vararg register save area.
396  if (ArgRegsSaveSize) {
397  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
399  DefCFAOffsetCandidates.addInst(std::prev(MBBI), ArgRegsSaveSize, true);
400  }
401 
402  if (!AFI->hasStackFrame() &&
403  (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
404  if (NumBytes - ArgRegsSaveSize != 0) {
405  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize),
407  DefCFAOffsetCandidates.addInst(std::prev(MBBI),
408  NumBytes - ArgRegsSaveSize, true);
409  }
410  DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
411  return;
412  }
413 
414  // Determine spill area sizes.
415  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
416  unsigned Reg = CSI[i].getReg();
417  int FI = CSI[i].getFrameIdx();
418  switch (Reg) {
419  case ARM::R8:
420  case ARM::R9:
421  case ARM::R10:
422  case ARM::R11:
423  case ARM::R12:
424  if (STI.splitFramePushPop(MF)) {
425  GPRCS2Size += 4;
426  break;
427  }
429  case ARM::R0:
430  case ARM::R1:
431  case ARM::R2:
432  case ARM::R3:
433  case ARM::R4:
434  case ARM::R5:
435  case ARM::R6:
436  case ARM::R7:
437  case ARM::LR:
438  if (Reg == FramePtr)
439  FramePtrSpillFI = FI;
440  GPRCS1Size += 4;
441  break;
442  default:
443  // This is a DPR. Exclude the aligned DPRCS2 spills.
444  if (Reg == ARM::D8)
445  D8SpillFI = FI;
446  if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
447  DPRCSSize += 8;
448  }
449  }
450 
451  // Move past area 1.
452  MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
453  if (GPRCS1Size > 0) {
454  GPRCS1Push = LastPush = MBBI++;
455  DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
456  }
457 
458  // Determine starting offsets of spill areas.
459  unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size;
460  unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
461  unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U;
462  unsigned DPRGapSize = (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign;
463  unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
464  int FramePtrOffsetInPush = 0;
465  if (HasFP) {
466  int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
467  assert(getMaxFPOffset(MF.getFunction(), *AFI) <= FPOffset &&
468  "Max FP estimation is wrong");
469  FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize;
470  AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
471  NumBytes);
472  }
473  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
474  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
475  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
476 
477  // Move past area 2.
478  if (GPRCS2Size > 0) {
479  GPRCS2Push = LastPush = MBBI++;
480  DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
481  }
482 
483  // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
484  // .cfi_offset operations will reflect that.
485  if (DPRGapSize) {
486  assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
487  if (LastPush != MBB.end() &&
488  tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
489  DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
490  else {
491  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
493  DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
494  }
495  }
496 
497  // Move past area 3.
498  if (DPRCSSize > 0) {
499  // Since vpush register list cannot have gaps, there may be multiple vpush
500  // instructions in the prologue.
501  while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
502  DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));
503  LastPush = MBBI++;
504  }
505  }
506 
507  // Move past the aligned DPRCS2 area.
508  if (AFI->getNumAlignedDPRCS2Regs() > 0) {
510  // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
511  // leaves the stack pointer pointing to the DPRCS2 area.
512  //
513  // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
514  NumBytes += MFI.getObjectOffset(D8SpillFI);
515  } else
516  NumBytes = DPRCSOffset;
517 
518  if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
519  uint32_t NumWords = NumBytes >> 2;
520 
521  if (NumWords < 65536)
522  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
523  .addImm(NumWords)
525  .add(predOps(ARMCC::AL));
526  else
527  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4)
528  .addImm(NumWords)
530 
531  switch (TM.getCodeModel()) {
532  case CodeModel::Tiny:
533  llvm_unreachable("Tiny code model not available on ARM.");
534  case CodeModel::Small:
535  case CodeModel::Medium:
536  case CodeModel::Kernel:
537  BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
539  .addExternalSymbol("__chkstk")
540  .addReg(ARM::R4, RegState::Implicit)
541  .setMIFlags(MachineInstr::FrameSetup);
542  break;
543  case CodeModel::Large:
544  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
545  .addExternalSymbol("__chkstk")
547 
548  BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
550  .addReg(ARM::R12, RegState::Kill)
551  .addReg(ARM::R4, RegState::Implicit)
552  .setMIFlags(MachineInstr::FrameSetup);
553  break;
554  }
555 
556  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
557  .addReg(ARM::SP, RegState::Kill)
561  .add(condCodeOp());
562  NumBytes = 0;
563  }
564 
565  if (NumBytes) {
566  // Adjust SP after all the callee-save spills.
567  if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
568  tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes))
569  DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
570  else {
571  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
573  DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
574  }
575 
576  if (HasFP && isARM)
577  // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
578  // Note it's not safe to do this in Thumb2 mode because it would have
579  // taken two instructions:
580  // mov sp, r7
581  // sub sp, #24
582  // If an interrupt is taken between the two instructions, then sp is in
583  // an inconsistent state (pointing to the middle of callee-saved area).
584  // The interrupt handler can end up clobbering the registers.
585  AFI->setShouldRestoreSPFromFP(true);
586  }
587 
588  // Set FP to point to the stack slot that contains the previous FP.
589  // For iOS, FP is R7, which has now been stored in spill area 1.
590  // Otherwise, if this is not iOS, all the callee-saved registers go
591  // into spill area 1, including the FP in R11. In either case, it
592  // is in area one and the adjustment needs to take place just after
593  // that push.
594  if (HasFP) {
595  MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push);
596  unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
597  emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush,
598  dl, TII, FramePtr, ARM::SP,
599  PushSize + FramePtrOffsetInPush,
601  if (FramePtrOffsetInPush + PushSize != 0) {
602  unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
603  nullptr, MRI->getDwarfRegNum(FramePtr, true),
604  -(ArgRegsSaveSize - FramePtrOffsetInPush)));
605  BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
606  .addCFIIndex(CFIIndex)
608  } else {
609  unsigned CFIIndex =
611  nullptr, MRI->getDwarfRegNum(FramePtr, true)));
612  BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
613  .addCFIIndex(CFIIndex)
615  }
616  }
617 
618  // Now that the prologue's actual instructions are finalised, we can insert
619  // the necessary DWARF cf instructions to describe the situation. Start by
620  // recording where each register ended up:
621  if (GPRCS1Size > 0) {
622  MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
623  int CFIIndex;
624  for (const auto &Entry : CSI) {
625  unsigned Reg = Entry.getReg();
626  int FI = Entry.getFrameIdx();
627  switch (Reg) {
628  case ARM::R8:
629  case ARM::R9:
630  case ARM::R10:
631  case ARM::R11:
632  case ARM::R12:
633  if (STI.splitFramePushPop(MF))
634  break;
636  case ARM::R0:
637  case ARM::R1:
638  case ARM::R2:
639  case ARM::R3:
640  case ARM::R4:
641  case ARM::R5:
642  case ARM::R6:
643  case ARM::R7:
644  case ARM::LR:
646  nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
647  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
648  .addCFIIndex(CFIIndex)
650  break;
651  }
652  }
653  }
654 
655  if (GPRCS2Size > 0) {
656  MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
657  for (const auto &Entry : CSI) {
658  unsigned Reg = Entry.getReg();
659  int FI = Entry.getFrameIdx();
660  switch (Reg) {
661  case ARM::R8:
662  case ARM::R9:
663  case ARM::R10:
664  case ARM::R11:
665  case ARM::R12:
666  if (STI.splitFramePushPop(MF)) {
667  unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
668  unsigned Offset = MFI.getObjectOffset(FI);
669  unsigned CFIIndex = MF.addFrameInst(
670  MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
671  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
672  .addCFIIndex(CFIIndex)
674  }
675  break;
676  }
677  }
678  }
679 
680  if (DPRCSSize > 0) {
681  // Since vpush register list cannot have gaps, there may be multiple vpush
682  // instructions in the prologue.
683  MachineBasicBlock::iterator Pos = std::next(LastPush);
684  for (const auto &Entry : CSI) {
685  unsigned Reg = Entry.getReg();
686  int FI = Entry.getFrameIdx();
687  if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
688  (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
689  unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
690  unsigned Offset = MFI.getObjectOffset(FI);
691  unsigned CFIIndex = MF.addFrameInst(
692  MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
693  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
694  .addCFIIndex(CFIIndex)
696  }
697  }
698  }
699 
700  // Now we can emit descriptions of where the canonical frame address was
701  // throughout the process. If we have a frame pointer, it takes over the job
702  // half-way through, so only the first few .cfi_def_cfa_offset instructions
703  // actually get emitted.
704  DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
705 
706  if (STI.isTargetELF() && hasFP(MF))
708  AFI->getFramePtrSpillOffset());
709 
710  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
711  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
712  AFI->setDPRCalleeSavedGapSize(DPRGapSize);
713  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
714 
715  // If we need dynamic stack realignment, do it here. Be paranoid and make
716  // sure if we also have VLAs, we have a base pointer for frame access.
717  // If aligned NEON registers were spilled, the stack has already been
718  // realigned.
719  if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) {
720  unsigned MaxAlign = MFI.getMaxAlignment();
721  assert(!AFI->isThumb1OnlyFunction());
722  if (!AFI->isThumbFunction()) {
723  emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
724  false);
725  } else {
726  // We cannot use sp as source/dest register here, thus we're using r4 to
727  // perform the calculations. We're emitting the following sequence:
728  // mov r4, sp
729  // -- use emitAligningInstructions to produce best sequence to zero
730  // -- out lower bits in r4
731  // mov sp, r4
732  // FIXME: It will be better just to find spare register here.
733  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
734  .addReg(ARM::SP, RegState::Kill)
735  .add(predOps(ARMCC::AL));
736  emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
737  false);
738  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
739  .addReg(ARM::R4, RegState::Kill)
740  .add(predOps(ARMCC::AL));
741  }
742 
743  AFI->setShouldRestoreSPFromFP(true);
744  }
745 
746  // If we need a base pointer, set it up here. It's whatever the value
747  // of the stack pointer is at this point. Any variable size objects
748  // will be allocated after this, so we can still use the base pointer
749  // to reference locals.
750  // FIXME: Clarify FrameSetup flags here.
751  if (RegInfo->hasBasePointer(MF)) {
752  if (isARM)
753  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
754  .addReg(ARM::SP)
756  .add(condCodeOp());
757  else
758  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
759  .addReg(ARM::SP)
760  .add(predOps(ARMCC::AL));
761  }
762 
763  // If the frame has variable sized objects then the epilogue must restore
764  // the sp from fp. We can assume there's an FP here since hasFP already
765  // checks for hasVarSizedObjects.
766  if (MFI.hasVarSizedObjects())
767  AFI->setShouldRestoreSPFromFP(true);
768 }
769 
771  MachineBasicBlock &MBB) const {
772  MachineFrameInfo &MFI = MF.getFrameInfo();
774  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
775  const ARMBaseInstrInfo &TII =
776  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
777  assert(!AFI->isThumb1OnlyFunction() &&
778  "This emitEpilogue does not support Thumb1!");
779  bool isARM = !AFI->isThumbFunction();
780 
781  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
782  int NumBytes = (int)MFI.getStackSize();
783  unsigned FramePtr = RegInfo->getFrameRegister(MF);
784 
785  // All calls are tail calls in GHC calling conv, and functions have no
786  // prologue/epilogue.
788  return;
789 
790  // First put ourselves on the first (from top) terminator instructions.
792  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
793 
794  if (!AFI->hasStackFrame()) {
795  if (NumBytes - ArgRegsSaveSize != 0)
796  emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize);
797  } else {
798  // Unwind MBBI to point to first LDR / VLDRD.
799  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
800  if (MBBI != MBB.begin()) {
801  do {
802  --MBBI;
803  } while (MBBI != MBB.begin() && isCSRestore(*MBBI, TII, CSRegs));
804  if (!isCSRestore(*MBBI, TII, CSRegs))
805  ++MBBI;
806  }
807 
808  // Move SP to start of FP callee save spill area.
809  NumBytes -= (ArgRegsSaveSize +
812  AFI->getDPRCalleeSavedGapSize() +
814 
815  // Reset SP based on frame pointer only if the stack frame extends beyond
816  // frame pointer stack slot or target is ELF and the function has FP.
817  if (AFI->shouldRestoreSPFromFP()) {
818  NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
819  if (NumBytes) {
820  if (isARM)
821  emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
822  ARMCC::AL, 0, TII);
823  else {
824  // It's not possible to restore SP from FP in a single instruction.
825  // For iOS, this looks like:
826  // mov sp, r7
827  // sub sp, #24
828  // This is bad, if an interrupt is taken after the mov, sp is in an
829  // inconsistent state.
830  // Use the first callee-saved register as a scratch register.
831  assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
832  "No scratch register to restore SP from FP!");
833  emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
834  ARMCC::AL, 0, TII);
835  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
836  .addReg(ARM::R4)
837  .add(predOps(ARMCC::AL));
838  }
839  } else {
840  // Thumb2 or ARM.
841  if (isARM)
842  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
843  .addReg(FramePtr)
845  .add(condCodeOp());
846  else
847  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
848  .addReg(FramePtr)
849  .add(predOps(ARMCC::AL));
850  }
851  } else if (NumBytes &&
852  !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
853  emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
854 
855  // Increment past our save areas.
856  if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
857  MBBI++;
858  // Since vpop register list cannot have gaps, there may be multiple vpop
859  // instructions in the epilogue.
860  while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
861  MBBI++;
862  }
863  if (AFI->getDPRCalleeSavedGapSize()) {
864  assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
865  "unexpected DPR alignment gap");
866  emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize());
867  }
868 
869  if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
870  if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
871  }
872 
873  if (ArgRegsSaveSize)
874  emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
875 }
876 
877 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
878 /// debug info. It's the same as what we use for resolving the code-gen
879 /// references for now. FIXME: This can go wrong when references are
880 /// SP-relative and simple call frames aren't used.
881 int
883  unsigned &FrameReg) const {
884  return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
885 }
886 
887 int
889  int FI, unsigned &FrameReg,
890  int SPAdj) const {
891  const MachineFrameInfo &MFI = MF.getFrameInfo();
892  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
894  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
895  int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
896  int FPOffset = Offset - AFI->getFramePtrSpillOffset();
897  bool isFixed = MFI.isFixedObjectIndex(FI);
898 
899  FrameReg = ARM::SP;
900  Offset += SPAdj;
901 
902  // SP can move around if there are allocas. We may also lose track of SP
903  // when emergency spilling inside a non-reserved call frame setup.
904  bool hasMovingSP = !hasReservedCallFrame(MF);
905 
906  // When dynamically realigning the stack, use the frame pointer for
907  // parameters, and the stack/base pointer for locals.
908  if (RegInfo->needsStackRealignment(MF)) {
909  assert(hasFP(MF) && "dynamic stack realignment without a FP!");
910  if (isFixed) {
911  FrameReg = RegInfo->getFrameRegister(MF);
912  Offset = FPOffset;
913  } else if (hasMovingSP) {
914  assert(RegInfo->hasBasePointer(MF) &&
915  "VLAs and dynamic stack alignment, but missing base pointer!");
916  FrameReg = RegInfo->getBaseRegister();
917  Offset -= SPAdj;
918  }
919  return Offset;
920  }
921 
922  // If there is a frame pointer, use it when we can.
923  if (hasFP(MF) && AFI->hasStackFrame()) {
924  // Use frame pointer to reference fixed objects. Use it for locals if
925  // there are VLAs (and thus the SP isn't reliable as a base).
926  if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
927  FrameReg = RegInfo->getFrameRegister(MF);
928  return FPOffset;
929  } else if (hasMovingSP) {
930  assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
931  if (AFI->isThumb2Function()) {
932  // Try to use the frame pointer if we can, else use the base pointer
933  // since it's available. This is handy for the emergency spill slot, in
934  // particular.
935  if (FPOffset >= -255 && FPOffset < 0) {
936  FrameReg = RegInfo->getFrameRegister(MF);
937  return FPOffset;
938  }
939  }
940  } else if (AFI->isThumbFunction()) {
941  // Prefer SP to base pointer, if the offset is suitably aligned and in
942  // range as the effective range of the immediate offset is bigger when
943  // basing off SP.
944  // Use add <rd>, sp, #<imm8>
945  // ldr <rd>, [sp, #<imm8>]
946  if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
947  return Offset;
948  // In Thumb2 mode, the negative offset is very limited. Try to avoid
949  // out of range references. ldr <rt>,[<rn>, #-<imm8>]
950  if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
951  FrameReg = RegInfo->getFrameRegister(MF);
952  return FPOffset;
953  }
954  } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
955  // Otherwise, use SP or FP, whichever is closer to the stack slot.
956  FrameReg = RegInfo->getFrameRegister(MF);
957  return FPOffset;
958  }
959  }
960  // Use the base pointer if we have one.
961  // FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
962  // That can happen if we forced a base pointer for a large call frame.
963  if (RegInfo->hasBasePointer(MF)) {
964  FrameReg = RegInfo->getBaseRegister();
965  Offset -= SPAdj;
966  }
967  return Offset;
968 }
969 
970 void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
972  const std::vector<CalleeSavedInfo> &CSI,
973  unsigned StmOpc, unsigned StrOpc,
974  bool NoGap,
975  bool(*Func)(unsigned, bool),
976  unsigned NumAlignedDPRCS2Regs,
977  unsigned MIFlags) const {
978  MachineFunction &MF = *MBB.getParent();
979  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
981 
982  DebugLoc DL;
983 
984  using RegAndKill = std::pair<unsigned, bool>;
985 
987  unsigned i = CSI.size();
988  while (i != 0) {
989  unsigned LastReg = 0;
990  for (; i != 0; --i) {
991  unsigned Reg = CSI[i-1].getReg();
992  if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
993 
994  // D-registers in the aligned area DPRCS2 are NOT spilled here.
995  if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
996  continue;
997 
998  const MachineRegisterInfo &MRI = MF.getRegInfo();
999  bool isLiveIn = MRI.isLiveIn(Reg);
1000  if (!isLiveIn && !MRI.isReserved(Reg))
1001  MBB.addLiveIn(Reg);
1002  // If NoGap is true, push consecutive registers and then leave the rest
1003  // for other instructions. e.g.
1004  // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
1005  if (NoGap && LastReg && LastReg != Reg-1)
1006  break;
1007  LastReg = Reg;
1008  // Do not set a kill flag on values that are also marked as live-in. This
1009  // happens with the @llvm-returnaddress intrinsic and with arguments
1010  // passed in callee saved registers.
1011  // Omitting the kill flags is conservatively correct even if the live-in
1012  // is not used after all.
1013  Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn));
1014  }
1015 
1016  if (Regs.empty())
1017  continue;
1018 
1019  llvm::sort(Regs, [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1020  return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
1021  });
1022 
1023  if (Regs.size() > 1 || StrOpc== 0) {
1024  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
1025  .addReg(ARM::SP)
1026  .setMIFlags(MIFlags)
1027  .add(predOps(ARMCC::AL));
1028  for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1029  MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
1030  } else if (Regs.size() == 1) {
1031  BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
1032  .addReg(Regs[0].first, getKillRegState(Regs[0].second))
1033  .addReg(ARM::SP)
1034  .setMIFlags(MIFlags)
1035  .addImm(-4)
1036  .add(predOps(ARMCC::AL));
1037  }
1038  Regs.clear();
1039 
1040  // Put any subsequent vpush instructions before this one: they will refer to
1041  // higher register numbers so need to be pushed first in order to preserve
1042  // monotonicity.
1043  if (MI != MBB.begin())
1044  --MI;
1045  }
1046 }
1047 
1048 void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1050  std::vector<CalleeSavedInfo> &CSI,
1051  unsigned LdmOpc, unsigned LdrOpc,
1052  bool isVarArg, bool NoGap,
1053  bool(*Func)(unsigned, bool),
1054  unsigned NumAlignedDPRCS2Regs) const {
1055  MachineFunction &MF = *MBB.getParent();
1056  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1059  DebugLoc DL;
1060  bool isTailCall = false;
1061  bool isInterrupt = false;
1062  bool isTrap = false;
1063  if (MBB.end() != MI) {
1064  DL = MI->getDebugLoc();
1065  unsigned RetOpcode = MI->getOpcode();
1066  isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri);
1067  isInterrupt =
1068  RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1069  isTrap =
1070  RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
1071  RetOpcode == ARM::tTRAP;
1072  }
1073 
1075  unsigned i = CSI.size();
1076  while (i != 0) {
1077  unsigned LastReg = 0;
1078  bool DeleteRet = false;
1079  for (; i != 0; --i) {
1080  CalleeSavedInfo &Info = CSI[i-1];
1081  unsigned Reg = Info.getReg();
1082  if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1083 
1084  // The aligned reloads from area DPRCS2 are not inserted here.
1085  if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1086  continue;
1087 
1088  if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1089  !isTrap && STI.hasV5TOps()) {
1090  if (MBB.succ_empty()) {
1091  Reg = ARM::PC;
1092  // Fold the return instruction into the LDM.
1093  DeleteRet = true;
1094  LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1095  // We 'restore' LR into PC so it is not live out of the return block:
1096  // Clear Restored bit.
1097  Info.setRestored(false);
1098  } else
1099  LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
1100  }
1101 
1102  // If NoGap is true, pop consecutive registers and then leave the rest
1103  // for other instructions. e.g.
1104  // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1105  if (NoGap && LastReg && LastReg != Reg-1)
1106  break;
1107 
1108  LastReg = Reg;
1109  Regs.push_back(Reg);
1110  }
1111 
1112  if (Regs.empty())
1113  continue;
1114 
1115  llvm::sort(Regs, [&](unsigned LHS, unsigned RHS) {
1116  return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
1117  });
1118 
1119  if (Regs.size() > 1 || LdrOpc == 0) {
1120  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
1121  .addReg(ARM::SP)
1122  .add(predOps(ARMCC::AL));
1123  for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1124  MIB.addReg(Regs[i], getDefRegState(true));
1125  if (DeleteRet) {
1126  if (MI != MBB.end()) {
1127  MIB.copyImplicitOps(*MI);
1128  MI->eraseFromParent();
1129  }
1130  }
1131  MI = MIB;
1132  } else if (Regs.size() == 1) {
1133  // If we adjusted the reg to PC from LR above, switch it back here. We
1134  // only do that for LDM.
1135  if (Regs[0] == ARM::PC)
1136  Regs[0] = ARM::LR;
1137  MachineInstrBuilder MIB =
1138  BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
1139  .addReg(ARM::SP, RegState::Define)
1140  .addReg(ARM::SP);
1141  // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1142  // that refactoring is complete (eventually).
1143  if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1144  MIB.addReg(0);
1146  } else
1147  MIB.addImm(4);
1148  MIB.add(predOps(ARMCC::AL));
1149  }
1150  Regs.clear();
1151 
1152  // Put any subsequent vpop instructions after this one: they will refer to
1153  // higher register numbers so need to be popped afterwards.
1154  if (MI != MBB.end())
1155  ++MI;
1156  }
1157 }
1158 
1159 /// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1160 /// starting from d8. Also insert stack realignment code and leave the stack
1161 /// pointer pointing to the d8 spill slot.
1164  unsigned NumAlignedDPRCS2Regs,
1165  const std::vector<CalleeSavedInfo> &CSI,
1166  const TargetRegisterInfo *TRI) {
1167  MachineFunction &MF = *MBB.getParent();
1169  DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1170  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1171  MachineFrameInfo &MFI = MF.getFrameInfo();
1172 
1173  // Mark the D-register spill slots as properly aligned. Since MFI computes
1174  // stack slot layout backwards, this can actually mean that the d-reg stack
1175  // slot offsets can be wrong. The offset for d8 will always be correct.
1176  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1177  unsigned DNum = CSI[i].getReg() - ARM::D8;
1178  if (DNum > NumAlignedDPRCS2Regs - 1)
1179  continue;
1180  int FI = CSI[i].getFrameIdx();
1181  // The even-numbered registers will be 16-byte aligned, the odd-numbered
1182  // registers will be 8-byte aligned.
1183  MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16);
1184 
1185  // The stack slot for D8 needs to be maximally aligned because this is
1186  // actually the point where we align the stack pointer. MachineFrameInfo
1187  // computes all offsets relative to the incoming stack pointer which is a
1188  // bit weird when realigning the stack. Any extra padding for this
1189  // over-alignment is not realized because the code inserted below adjusts
1190  // the stack pointer by numregs * 8 before aligning the stack pointer.
1191  if (DNum == 0)
1192  MFI.setObjectAlignment(FI, MFI.getMaxAlignment());
1193  }
1194 
1195  // Move the stack pointer to the d8 spill slot, and align it at the same
1196  // time. Leave the stack slot address in the scratch register r4.
1197  //
1198  // sub r4, sp, #numregs * 8
1199  // bic r4, r4, #align - 1
1200  // mov sp, r4
1201  //
1202  bool isThumb = AFI->isThumbFunction();
1203  assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1204  AFI->setShouldRestoreSPFromFP(true);
1205 
1206  // sub r4, sp, #numregs * 8
1207  // The immediate is <= 64, so it doesn't need any special encoding.
1208  unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1209  BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1210  .addReg(ARM::SP)
1211  .addImm(8 * NumAlignedDPRCS2Regs)
1212  .add(predOps(ARMCC::AL))
1213  .add(condCodeOp());
1214 
1215  unsigned MaxAlign = MF.getFrameInfo().getMaxAlignment();
1216  // We must set parameter MustBeSingleInstruction to true, since
1217  // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1218  // stack alignment. Luckily, this can always be done since all ARM
1219  // architecture versions that support Neon also support the BFC
1220  // instruction.
1221  emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
1222 
1223  // mov sp, r4
1224  // The stack pointer must be adjusted before spilling anything, otherwise
1225  // the stack slots could be clobbered by an interrupt handler.
1226  // Leave r4 live, it is used below.
1227  Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
1228  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
1229  .addReg(ARM::R4)
1230  .add(predOps(ARMCC::AL));
1231  if (!isThumb)
1232  MIB.add(condCodeOp());
1233 
1234  // Now spill NumAlignedDPRCS2Regs registers starting from d8.
1235  // r4 holds the stack slot address.
1236  unsigned NextReg = ARM::D8;
1237 
1238  // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
1239  // The writeback is only needed when emitting two vst1.64 instructions.
1240  if (NumAlignedDPRCS2Regs >= 6) {
1241  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1242  &ARM::QQPRRegClass);
1243  MBB.addLiveIn(SupReg);
1244  BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
1245  .addReg(ARM::R4, RegState::Kill)
1246  .addImm(16)
1247  .addReg(NextReg)
1248  .addReg(SupReg, RegState::ImplicitKill)
1249  .add(predOps(ARMCC::AL));
1250  NextReg += 4;
1251  NumAlignedDPRCS2Regs -= 4;
1252  }
1253 
1254  // We won't modify r4 beyond this point. It currently points to the next
1255  // register to be spilled.
1256  unsigned R4BaseReg = NextReg;
1257 
1258  // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
1259  if (NumAlignedDPRCS2Regs >= 4) {
1260  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1261  &ARM::QQPRRegClass);
1262  MBB.addLiveIn(SupReg);
1263  BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
1264  .addReg(ARM::R4)
1265  .addImm(16)
1266  .addReg(NextReg)
1267  .addReg(SupReg, RegState::ImplicitKill)
1268  .add(predOps(ARMCC::AL));
1269  NextReg += 4;
1270  NumAlignedDPRCS2Regs -= 4;
1271  }
1272 
1273  // 16-byte aligned vst1.64 with 2 d-regs.
1274  if (NumAlignedDPRCS2Regs >= 2) {
1275  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1276  &ARM::QPRRegClass);
1277  MBB.addLiveIn(SupReg);
1278  BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
1279  .addReg(ARM::R4)
1280  .addImm(16)
1281  .addReg(SupReg)
1282  .add(predOps(ARMCC::AL));
1283  NextReg += 2;
1284  NumAlignedDPRCS2Regs -= 2;
1285  }
1286 
1287  // Finally, use a vanilla vstr.64 for the odd last register.
1288  if (NumAlignedDPRCS2Regs) {
1289  MBB.addLiveIn(NextReg);
1290  // vstr.64 uses addrmode5 which has an offset scale of 4.
1291  BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
1292  .addReg(NextReg)
1293  .addReg(ARM::R4)
1294  .addImm((NextReg - R4BaseReg) * 2)
1295  .add(predOps(ARMCC::AL));
1296  }
1297 
1298  // The last spill instruction inserted should kill the scratch register r4.
1299  std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1300 }
1301 
1302 /// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
1303 /// iterator to the following instruction.
1306  unsigned NumAlignedDPRCS2Regs) {
1307  // sub r4, sp, #numregs * 8
1308  // bic r4, r4, #align - 1
1309  // mov sp, r4
1310  ++MI; ++MI; ++MI;
1311  assert(MI->mayStore() && "Expecting spill instruction");
1312 
1313  // These switches all fall through.
1314  switch(NumAlignedDPRCS2Regs) {
1315  case 7:
1316  ++MI;
1317  assert(MI->mayStore() && "Expecting spill instruction");
1319  default:
1320  ++MI;
1321  assert(MI->mayStore() && "Expecting spill instruction");
1323  case 1:
1324  case 2:
1325  case 4:
1326  assert(MI->killsRegister(ARM::R4) && "Missed kill flag");
1327  ++MI;
1328  }
1329  return MI;
1330 }
1331 
1332 /// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
1333 /// starting from d8. These instructions are assumed to execute while the
1334 /// stack is still aligned, unlike the code inserted by emitPopInst.
1337  unsigned NumAlignedDPRCS2Regs,
1338  const std::vector<CalleeSavedInfo> &CSI,
1339  const TargetRegisterInfo *TRI) {
1340  MachineFunction &MF = *MBB.getParent();
1342  DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1343  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1344 
1345  // Find the frame index assigned to d8.
1346  int D8SpillFI = 0;
1347  for (unsigned i = 0, e = CSI.size(); i != e; ++i)
1348  if (CSI[i].getReg() == ARM::D8) {
1349  D8SpillFI = CSI[i].getFrameIdx();
1350  break;
1351  }
1352 
1353  // Materialize the address of the d8 spill slot into the scratch register r4.
1354  // This can be fairly complicated if the stack frame is large, so just use
1355  // the normal frame index elimination mechanism to do it. This code runs as
1356  // the initial part of the epilog where the stack and base pointers haven't
1357  // been changed yet.
1358  bool isThumb = AFI->isThumbFunction();
1359  assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1360 
1361  unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
1362  BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1363  .addFrameIndex(D8SpillFI)
1364  .addImm(0)
1365  .add(predOps(ARMCC::AL))
1366  .add(condCodeOp());
1367 
1368  // Now restore NumAlignedDPRCS2Regs registers starting from d8.
1369  unsigned NextReg = ARM::D8;
1370 
1371  // 16-byte aligned vld1.64 with 4 d-regs and writeback.
1372  if (NumAlignedDPRCS2Regs >= 6) {
1373  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1374  &ARM::QQPRRegClass);
1375  BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
1376  .addReg(ARM::R4, RegState::Define)
1378  .addImm(16)
1380  .add(predOps(ARMCC::AL));
1381  NextReg += 4;
1382  NumAlignedDPRCS2Regs -= 4;
1383  }
1384 
1385  // We won't modify r4 beyond this point. It currently points to the next
1386  // register to be spilled.
1387  unsigned R4BaseReg = NextReg;
1388 
1389  // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
1390  if (NumAlignedDPRCS2Regs >= 4) {
1391  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1392  &ARM::QQPRRegClass);
1393  BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
1394  .addReg(ARM::R4)
1395  .addImm(16)
1397  .add(predOps(ARMCC::AL));
1398  NextReg += 4;
1399  NumAlignedDPRCS2Regs -= 4;
1400  }
1401 
1402  // 16-byte aligned vld1.64 with 2 d-regs.
1403  if (NumAlignedDPRCS2Regs >= 2) {
1404  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1405  &ARM::QPRRegClass);
1406  BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
1407  .addReg(ARM::R4)
1408  .addImm(16)
1409  .add(predOps(ARMCC::AL));
1410  NextReg += 2;
1411  NumAlignedDPRCS2Regs -= 2;
1412  }
1413 
1414  // Finally, use a vanilla vldr.64 for the remaining odd register.
1415  if (NumAlignedDPRCS2Regs)
1416  BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
1417  .addReg(ARM::R4)
1418  .addImm(2 * (NextReg - R4BaseReg))
1419  .add(predOps(ARMCC::AL));
1420 
1421  // Last store kills r4.
1422  std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1423 }
1424 
1427  const std::vector<CalleeSavedInfo> &CSI,
1428  const TargetRegisterInfo *TRI) const {
1429  if (CSI.empty())
1430  return false;
1431 
1432  MachineFunction &MF = *MBB.getParent();
1434 
1435  unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
1436  unsigned PushOneOpc = AFI->isThumbFunction() ?
1437  ARM::t2STR_PRE : ARM::STR_PRE_IMM;
1438  unsigned FltOpc = ARM::VSTMDDB_UPD;
1439  unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1440  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,
1442  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,
1444  emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
1445  NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
1446 
1447  // The code above does not insert spill code for the aligned DPRCS2 registers.
1448  // The stack realignment code will be inserted between the push instructions
1449  // and these spills.
1450  if (NumAlignedDPRCS2Regs)
1451  emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
1452 
1453  return true;
1454 }
1455 
1458  std::vector<CalleeSavedInfo> &CSI,
1459  const TargetRegisterInfo *TRI) const {
1460  if (CSI.empty())
1461  return false;
1462 
1463  MachineFunction &MF = *MBB.getParent();
1465  bool isVarArg = AFI->getArgRegsSaveSize() > 0;
1466  unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1467 
1468  // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
1469  // registers. Do that here instead.
1470  if (NumAlignedDPRCS2Regs)
1471  emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
1472 
1473  unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
1474  unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
1475  unsigned FltOpc = ARM::VLDMDIA_UPD;
1476  emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
1477  NumAlignedDPRCS2Regs);
1478  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1479  &isARMArea2Register, 0);
1480  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1481  &isARMArea1Register, 0);
1482 
1483  return true;
1484 }
1485 
1486 // FIXME: Make generic?
1488  const ARMBaseInstrInfo &TII) {
1489  unsigned FnSize = 0;
1490  for (auto &MBB : MF) {
1491  for (auto &MI : MBB)
1492  FnSize += TII.getInstSizeInBytes(MI);
1493  }
1494  if (MF.getJumpTableInfo())
1495  for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
1496  FnSize += Table.MBBs.size() * 4;
1497  FnSize += MF.getConstantPool()->getConstants().size() * 4;
1498  return FnSize;
1499 }
1500 
1501 /// estimateRSStackSizeLimit - Look at each instruction that references stack
1502 /// frames and return the stack size limit beyond which some of these
1503 /// instructions will require a scratch register during their expansion later.
1504 // FIXME: Move to TII?
1506  const TargetFrameLowering *TFI) {
1507  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1508  unsigned Limit = (1 << 12) - 1;
1509  for (auto &MBB : MF) {
1510  for (auto &MI : MBB) {
1511  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1512  if (!MI.getOperand(i).isFI())
1513  continue;
1514 
1515  // When using ADDri to get the address of a stack object, 255 is the
1516  // largest offset guaranteed to fit in the immediate offset.
1517  if (MI.getOpcode() == ARM::ADDri) {
1518  Limit = std::min(Limit, (1U << 8) - 1);
1519  break;
1520  }
1521 
1522  // Otherwise check the addressing mode.
1523  switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
1524  case ARMII::AddrMode3:
1525  case ARMII::AddrModeT2_i8:
1526  Limit = std::min(Limit, (1U << 8) - 1);
1527  break;
1528  case ARMII::AddrMode5:
1531  Limit = std::min(Limit, ((1U << 8) - 1) * 4);
1532  break;
1533  case ARMII::AddrModeT2_i12:
1534  // i12 supports only positive offset so these will be converted to
1535  // i8 opcodes. See llvm::rewriteT2FrameIndex.
1536  if (TFI->hasFP(MF) && AFI->hasStackFrame())
1537  Limit = std::min(Limit, (1U << 8) - 1);
1538  break;
1539  case ARMII::AddrMode4:
1540  case ARMII::AddrMode6:
1541  // Addressing modes 4 & 6 (load/store) instructions can't encode an
1542  // immediate offset for stack references.
1543  return 0;
1544  default:
1545  break;
1546  }
1547  break; // At most one FI per instruction
1548  }
1549  }
1550  }
1551 
1552  return Limit;
1553 }
1554 
1555 // In functions that realign the stack, it can be an advantage to spill the
1556 // callee-saved vector registers after realigning the stack. The vst1 and vld1
1557 // instructions take alignment hints that can improve performance.
1558 static void
1560  MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
1561  if (!SpillAlignedNEONRegs)
1562  return;
1563 
1564  // Naked functions don't spill callee-saved registers.
1565  if (MF.getFunction().hasFnAttribute(Attribute::Naked))
1566  return;
1567 
1568  // We are planning to use NEON instructions vst1 / vld1.
1569  if (!static_cast<const ARMSubtarget &>(MF.getSubtarget()).hasNEON())
1570  return;
1571 
1572  // Don't bother if the default stack alignment is sufficiently high.
1573  if (MF.getSubtarget().getFrameLowering()->getStackAlignment() >= 8)
1574  return;
1575 
1576  // Aligned spills require stack realignment.
1577  if (!static_cast<const ARMBaseRegisterInfo *>(
1578  MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))
1579  return;
1580 
1581  // We always spill contiguous d-registers starting from d8. Count how many
1582  // needs spilling. The register allocator will almost always use the
1583  // callee-saved registers in order, but it can happen that there are holes in
1584  // the range. Registers above the hole will be spilled to the standard DPRCS
1585  // area.
1586  unsigned NumSpills = 0;
1587  for (; NumSpills < 8; ++NumSpills)
1588  if (!SavedRegs.test(ARM::D8 + NumSpills))
1589  break;
1590 
1591  // Don't do this for just one d-register. It's not worth it.
1592  if (NumSpills < 2)
1593  return;
1594 
1595  // Spill the first NumSpills D-registers after realigning the stack.
1596  MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
1597 
1598  // A scratch register is required for the vst1 / vld1 instructions.
1599  SavedRegs.set(ARM::R4);
1600 }
1601 
1603  BitVector &SavedRegs,
1604  RegScavenger *RS) const {
1605  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1606  // This tells PEI to spill the FP as if it is any other callee-save register
1607  // to take advantage the eliminateFrameIndex machinery. This also ensures it
1608  // is spilled in the order specified by getCalleeSavedRegs() to make it easier
1609  // to combine multiple loads / stores.
1610  bool CanEliminateFrame = true;
1611  bool CS1Spilled = false;
1612  bool LRSpilled = false;
1613  unsigned NumGPRSpills = 0;
1614  unsigned NumFPRSpills = 0;
1615  SmallVector<unsigned, 4> UnspilledCS1GPRs;
1616  SmallVector<unsigned, 4> UnspilledCS2GPRs;
1617  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1618  MF.getSubtarget().getRegisterInfo());
1619  const ARMBaseInstrInfo &TII =
1620  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1622  MachineFrameInfo &MFI = MF.getFrameInfo();
1625  (void)TRI; // Silence unused warning in non-assert builds.
1626  unsigned FramePtr = RegInfo->getFrameRegister(MF);
1627 
1628  // Spill R4 if Thumb2 function requires stack realignment - it will be used as
1629  // scratch register. Also spill R4 if Thumb2 function has varsized objects,
1630  // since it's not always possible to restore sp from fp in a single
1631  // instruction.
1632  // FIXME: It will be better just to find spare register here.
1633  if (AFI->isThumb2Function() &&
1634  (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
1635  SavedRegs.set(ARM::R4);
1636 
1637  // If a stack probe will be emitted, spill R4 and LR, since they are
1638  // clobbered by the stack probe call.
1639  // This estimate should be a safe, conservative estimate. The actual
1640  // stack probe is enabled based on the size of the local objects;
1641  // this estimate also includes the varargs store size.
1642  if (STI.isTargetWindows() &&
1643  WindowsRequiresStackProbe(MF, MFI.estimateStackSize(MF))) {
1644  SavedRegs.set(ARM::R4);
1645  SavedRegs.set(ARM::LR);
1646  }
1647 
1648  if (AFI->isThumb1OnlyFunction()) {
1649  // Spill LR if Thumb1 function uses variable length argument lists.
1650  if (AFI->getArgRegsSaveSize() > 0)
1651  SavedRegs.set(ARM::LR);
1652 
1653  // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
1654  // requires stack alignment. We don't know for sure what the stack size
1655  // will be, but for this, an estimate is good enough. If there anything
1656  // changes it, it'll be a spill, which implies we've used all the registers
1657  // and so R4 is already used, so not marking it here will be OK.
1658  // FIXME: It will be better just to find spare register here.
1659  if (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF) ||
1660  MFI.estimateStackSize(MF) > 508)
1661  SavedRegs.set(ARM::R4);
1662  }
1663 
1664  // See if we can spill vector registers to aligned stack.
1665  checkNumAlignedDPRCS2Regs(MF, SavedRegs);
1666 
1667  // Spill the BasePtr if it's used.
1668  if (RegInfo->hasBasePointer(MF))
1669  SavedRegs.set(RegInfo->getBaseRegister());
1670 
1671  // Don't spill FP if the frame can be eliminated. This is determined
1672  // by scanning the callee-save registers to see if any is modified.
1673  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
1674  for (unsigned i = 0; CSRegs[i]; ++i) {
1675  unsigned Reg = CSRegs[i];
1676  bool Spilled = false;
1677  if (SavedRegs.test(Reg)) {
1678  Spilled = true;
1679  CanEliminateFrame = false;
1680  }
1681 
1682  if (!ARM::GPRRegClass.contains(Reg)) {
1683  if (Spilled) {
1684  if (ARM::SPRRegClass.contains(Reg))
1685  NumFPRSpills++;
1686  else if (ARM::DPRRegClass.contains(Reg))
1687  NumFPRSpills += 2;
1688  else if (ARM::QPRRegClass.contains(Reg))
1689  NumFPRSpills += 4;
1690  }
1691  continue;
1692  }
1693 
1694  if (Spilled) {
1695  NumGPRSpills++;
1696 
1697  if (!STI.splitFramePushPop(MF)) {
1698  if (Reg == ARM::LR)
1699  LRSpilled = true;
1700  CS1Spilled = true;
1701  continue;
1702  }
1703 
1704  // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
1705  switch (Reg) {
1706  case ARM::LR:
1707  LRSpilled = true;
1709  case ARM::R0: case ARM::R1:
1710  case ARM::R2: case ARM::R3:
1711  case ARM::R4: case ARM::R5:
1712  case ARM::R6: case ARM::R7:
1713  CS1Spilled = true;
1714  break;
1715  default:
1716  break;
1717  }
1718  } else {
1719  if (!STI.splitFramePushPop(MF)) {
1720  UnspilledCS1GPRs.push_back(Reg);
1721  continue;
1722  }
1723 
1724  switch (Reg) {
1725  case ARM::R0: case ARM::R1:
1726  case ARM::R2: case ARM::R3:
1727  case ARM::R4: case ARM::R5:
1728  case ARM::R6: case ARM::R7:
1729  case ARM::LR:
1730  UnspilledCS1GPRs.push_back(Reg);
1731  break;
1732  default:
1733  UnspilledCS2GPRs.push_back(Reg);
1734  break;
1735  }
1736  }
1737  }
1738 
1739  bool ForceLRSpill = false;
1740  if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
1741  unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
1742  // Force LR to be spilled if the Thumb function size is > 2048. This enables
1743  // use of BL to implement far jump. If it turns out that it's not needed
1744  // then the branch fix up path will undo it.
1745  if (FnSize >= (1 << 11)) {
1746  CanEliminateFrame = false;
1747  ForceLRSpill = true;
1748  }
1749  }
1750 
1751  // If any of the stack slot references may be out of range of an immediate
1752  // offset, make sure a register (or a spill slot) is available for the
1753  // register scavenger. Note that if we're indexing off the frame pointer, the
1754  // effective stack size is 4 bytes larger since the FP points to the stack
1755  // slot of the previous FP. Also, if we have variable sized objects in the
1756  // function, stack slot references will often be negative, and some of
1757  // our instructions are positive-offset only, so conservatively consider
1758  // that case to want a spill slot (or register) as well. Similarly, if
1759  // the function adjusts the stack pointer during execution and the
1760  // adjustments aren't already part of our stack size estimate, our offset
1761  // calculations may be off, so be conservative.
1762  // FIXME: We could add logic to be more precise about negative offsets
1763  // and which instructions will need a scratch register for them. Is it
1764  // worth the effort and added fragility?
1765  unsigned EstimatedStackSize =
1766  MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
1767 
1768  // Determine biggest (positive) SP offset in MachineFrameInfo.
1769  int MaxFixedOffset = 0;
1770  for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
1771  int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
1772  MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
1773  }
1774 
1775  bool HasFP = hasFP(MF);
1776  if (HasFP) {
1777  if (AFI->hasStackFrame())
1778  EstimatedStackSize += 4;
1779  } else {
1780  // If FP is not used, SP will be used to access arguments, so count the
1781  // size of arguments into the estimation.
1782  EstimatedStackSize += MaxFixedOffset;
1783  }
1784  EstimatedStackSize += 16; // For possible paddings.
1785 
1786  unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
1787  if (AFI->isThumb1OnlyFunction()) {
1788  // For Thumb1, don't bother to iterate over the function. The only
1789  // instruction that requires an emergency spill slot is a store to a
1790  // frame index.
1791  //
1792  // tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
1793  // immediate. tSTRi, which is used for bp- and fp-relative accesses, has
1794  // a 5-bit unsigned immediate.
1795  //
1796  // We could try to check if the function actually contains a tSTRspi
1797  // that might need the spill slot, but it's not really important.
1798  // Functions with VLAs or extremely large call frames are rare, and
1799  // if a function is allocating more than 1KB of stack, an extra 4-byte
1800  // slot probably isn't relevant.
1801  if (RegInfo->hasBasePointer(MF))
1802  EstimatedRSStackSizeLimit = (1U << 5) * 4;
1803  else
1804  EstimatedRSStackSizeLimit = (1U << 8) * 4;
1805  EstimatedRSFixedSizeLimit = (1U << 5) * 4;
1806  } else {
1807  EstimatedRSStackSizeLimit = estimateRSStackSizeLimit(MF, this);
1808  EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
1809  }
1810  // Final estimate of whether sp or bp-relative accesses might require
1811  // scavenging.
1812  bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;
1813 
1814  // If the stack pointer moves and we don't have a base pointer, the
1815  // estimate logic doesn't work. The actual offsets might be larger when
1816  // we're constructing a call frame, or we might need to use negative
1817  // offsets from fp.
1818  bool HasMovingSP = MFI.hasVarSizedObjects() ||
1819  (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
1820  bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) || !HasMovingSP;
1821 
1822  // If we have a frame pointer, we assume arguments will be accessed
1823  // relative to the frame pointer. Check whether fp-relative accesses to
1824  // arguments require scavenging.
1825  //
1826  // We could do slightly better on Thumb1; in some cases, an sp-relative
1827  // offset would be legal even though an fp-relative offset is not.
1828  int MaxFPOffset = getMaxFPOffset(MF.getFunction(), *AFI);
1829  bool HasLargeArgumentList =
1830  HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
1831 
1832  bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP ||
1833  HasLargeArgumentList;
1834  LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
1835  << "; EstimatedStack" << EstimatedStackSize
1836  << "; EstimatedFPStack" << MaxFixedOffset - MaxFPOffset
1837  << "; BigFrameOffsets: " << BigFrameOffsets
1838  << "\n");
1839  if (BigFrameOffsets ||
1840  !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
1841  AFI->setHasStackFrame(true);
1842 
1843  if (HasFP) {
1844  SavedRegs.set(FramePtr);
1845  // If the frame pointer is required by the ABI, also spill LR so that we
1846  // emit a complete frame record.
1847  if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) {
1848  SavedRegs.set(ARM::LR);
1849  LRSpilled = true;
1850  NumGPRSpills++;
1851  auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
1852  if (LRPos != UnspilledCS1GPRs.end())
1853  UnspilledCS1GPRs.erase(LRPos);
1854  }
1855  auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);
1856  if (FPPos != UnspilledCS1GPRs.end())
1857  UnspilledCS1GPRs.erase(FPPos);
1858  NumGPRSpills++;
1859  if (FramePtr == ARM::R7)
1860  CS1Spilled = true;
1861  }
1862 
1863  // This is true when we inserted a spill for a callee-save GPR which is
1864  // not otherwise used by the function. This guaranteees it is possible
1865  // to scavenge a register to hold the address of a stack slot. On Thumb1,
1866  // the register must be a valid operand to tSTRi, i.e. r4-r7. For other
1867  // subtargets, this is any GPR, i.e. r4-r11 or lr.
1868  //
1869  // If we don't insert a spill, we instead allocate an emergency spill
1870  // slot, which can be used by scavenging to spill an arbitrary register.
1871  //
1872  // We currently don't try to figure out whether any specific instruction
1873  // requires scavening an additional register.
1874  bool ExtraCSSpill = false;
1875 
1876  if (AFI->isThumb1OnlyFunction()) {
1877  // For Thumb1-only targets, we need some low registers when we save and
1878  // restore the high registers (which aren't allocatable, but could be
1879  // used by inline assembly) because the push/pop instructions can not
1880  // access high registers. If necessary, we might need to push more low
1881  // registers to ensure that there is at least one free that can be used
1882  // for the saving & restoring, and preferably we should ensure that as
1883  // many as are needed are available so that fewer push/pop instructions
1884  // are required.
1885 
1886  // Low registers which are not currently pushed, but could be (r4-r7).
1887  SmallVector<unsigned, 4> AvailableRegs;
1888 
1889  // Unused argument registers (r0-r3) can be clobbered in the prologue for
1890  // free.
1891  int EntryRegDeficit = 0;
1892  for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
1893  if (!MF.getRegInfo().isLiveIn(Reg)) {
1894  --EntryRegDeficit;
1895  LLVM_DEBUG(dbgs()
1896  << printReg(Reg, TRI)
1897  << " is unused argument register, EntryRegDeficit = "
1898  << EntryRegDeficit << "\n");
1899  }
1900  }
1901 
1902  // Unused return registers can be clobbered in the epilogue for free.
1903  int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
1904  LLVM_DEBUG(dbgs() << AFI->getReturnRegsCount()
1905  << " return regs used, ExitRegDeficit = "
1906  << ExitRegDeficit << "\n");
1907 
1908  int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);
1909  LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
1910 
1911  // r4-r6 can be used in the prologue if they are pushed by the first push
1912  // instruction.
1913  for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
1914  if (SavedRegs.test(Reg)) {
1915  --RegDeficit;
1916  LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
1917  << " is saved low register, RegDeficit = "
1918  << RegDeficit << "\n");
1919  } else {
1920  AvailableRegs.push_back(Reg);
1921  LLVM_DEBUG(
1922  dbgs()
1923  << printReg(Reg, TRI)
1924  << " is non-saved low register, adding to AvailableRegs\n");
1925  }
1926  }
1927 
1928  // r7 can be used if it is not being used as the frame pointer.
1929  if (!HasFP) {
1930  if (SavedRegs.test(ARM::R7)) {
1931  --RegDeficit;
1932  LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
1933  << RegDeficit << "\n");
1934  } else {
1935  AvailableRegs.push_back(ARM::R7);
1936  LLVM_DEBUG(
1937  dbgs()
1938  << "%r7 is non-saved low register, adding to AvailableRegs\n");
1939  }
1940  }
1941 
1942  // Each of r8-r11 needs to be copied to a low register, then pushed.
1943  for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
1944  if (SavedRegs.test(Reg)) {
1945  ++RegDeficit;
1946  LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
1947  << " is saved high register, RegDeficit = "
1948  << RegDeficit << "\n");
1949  }
1950  }
1951 
1952  // LR can only be used by PUSH, not POP, and can't be used at all if the
1953  // llvm.returnaddress intrinsic is used. This is only worth doing if we
1954  // are more limited at function entry than exit.
1955  if ((EntryRegDeficit > ExitRegDeficit) &&
1956  !(MF.getRegInfo().isLiveIn(ARM::LR) &&
1958  if (SavedRegs.test(ARM::LR)) {
1959  --RegDeficit;
1960  LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
1961  << RegDeficit << "\n");
1962  } else {
1963  AvailableRegs.push_back(ARM::LR);
1964  LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
1965  }
1966  }
1967 
1968  // If there are more high registers that need pushing than low registers
1969  // available, push some more low registers so that we can use fewer push
1970  // instructions. This might not reduce RegDeficit all the way to zero,
1971  // because we can only guarantee that r4-r6 are available, but r8-r11 may
1972  // need saving.
1973  LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
1974  for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
1975  unsigned Reg = AvailableRegs.pop_back_val();
1976  LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
1977  << " to make up reg deficit\n");
1978  SavedRegs.set(Reg);
1979  NumGPRSpills++;
1980  CS1Spilled = true;
1981  assert(!MRI.isReserved(Reg) && "Should not be reserved");
1982  if (Reg != ARM::LR && !MRI.isPhysRegUsed(Reg))
1983  ExtraCSSpill = true;
1984  UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
1985  if (Reg == ARM::LR)
1986  LRSpilled = true;
1987  }
1988  LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
1989  << "\n");
1990  }
1991 
1992  // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
1993  // restore LR in that case.
1994  bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
1995 
1996  // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
1997  // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
1998  if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
1999  SavedRegs.set(ARM::LR);
2000  NumGPRSpills++;
2002  LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
2003  if (LRPos != UnspilledCS1GPRs.end())
2004  UnspilledCS1GPRs.erase(LRPos);
2005 
2006  ForceLRSpill = false;
2007  if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR) &&
2008  !AFI->isThumb1OnlyFunction())
2009  ExtraCSSpill = true;
2010  }
2011 
2012  // If stack and double are 8-byte aligned and we are spilling an odd number
2013  // of GPRs, spill one extra callee save GPR so we won't have to pad between
2014  // the integer and double callee save areas.
2015  LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
2016  unsigned TargetAlign = getStackAlignment();
2017  if (TargetAlign >= 8 && (NumGPRSpills & 1)) {
2018  if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
2019  for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
2020  unsigned Reg = UnspilledCS1GPRs[i];
2021  // Don't spill high register if the function is thumb. In the case of
2022  // Windows on ARM, accept R11 (frame pointer)
2023  if (!AFI->isThumbFunction() ||
2024  (STI.isTargetWindows() && Reg == ARM::R11) ||
2025  isARMLowRegister(Reg) ||
2026  (Reg == ARM::LR && !ExpensiveLRRestore)) {
2027  SavedRegs.set(Reg);
2028  LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2029  << " to make up alignment\n");
2030  if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg) &&
2031  !(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
2032  ExtraCSSpill = true;
2033  break;
2034  }
2035  }
2036  } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
2037  unsigned Reg = UnspilledCS2GPRs.front();
2038  SavedRegs.set(Reg);
2039  LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2040  << " to make up alignment\n");
2041  if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
2042  ExtraCSSpill = true;
2043  }
2044  }
2045 
2046  // Estimate if we might need to scavenge a register at some point in order
2047  // to materialize a stack offset. If so, either spill one additional
2048  // callee-saved register or reserve a special spill slot to facilitate
2049  // register scavenging. Thumb1 needs a spill slot for stack pointer
2050  // adjustments also, even when the frame itself is small.
2051  if (BigFrameOffsets && !ExtraCSSpill) {
2052  // If any non-reserved CS register isn't spilled, just spill one or two
2053  // extra. That should take care of it!
2054  unsigned NumExtras = TargetAlign / 4;
2055  SmallVector<unsigned, 2> Extras;
2056  while (NumExtras && !UnspilledCS1GPRs.empty()) {
2057  unsigned Reg = UnspilledCS1GPRs.back();
2058  UnspilledCS1GPRs.pop_back();
2059  if (!MRI.isReserved(Reg) &&
2060  (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {
2061  Extras.push_back(Reg);
2062  NumExtras--;
2063  }
2064  }
2065  // For non-Thumb1 functions, also check for hi-reg CS registers
2066  if (!AFI->isThumb1OnlyFunction()) {
2067  while (NumExtras && !UnspilledCS2GPRs.empty()) {
2068  unsigned Reg = UnspilledCS2GPRs.back();
2069  UnspilledCS2GPRs.pop_back();
2070  if (!MRI.isReserved(Reg)) {
2071  Extras.push_back(Reg);
2072  NumExtras--;
2073  }
2074  }
2075  }
2076  if (NumExtras == 0) {
2077  for (unsigned Reg : Extras) {
2078  SavedRegs.set(Reg);
2079  if (!MRI.isPhysRegUsed(Reg))
2080  ExtraCSSpill = true;
2081  }
2082  }
2083  if (!ExtraCSSpill) {
2084  // Reserve a slot closest to SP or frame pointer.
2085  assert(RS && "Register scavenging not provided");
2086  LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
2087  const TargetRegisterClass &RC = ARM::GPRRegClass;
2088  unsigned Size = TRI->getSpillSize(RC);
2089  unsigned Align = TRI->getSpillAlignment(RC);
2090  RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2091  }
2092  }
2093  }
2094 
2095  if (ForceLRSpill) {
2096  SavedRegs.set(ARM::LR);
2097  AFI->setLRIsSpilledForFarJump(true);
2098  }
2099  AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
2100 }
2101 
2102 MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
2105  const ARMBaseInstrInfo &TII =
2106  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2107  if (!hasReservedCallFrame(MF)) {
2108  // If we have alloca, convert as follows:
2109  // ADJCALLSTACKDOWN -> sub, sp, sp, amount
2110  // ADJCALLSTACKUP -> add, sp, sp, amount
2111  MachineInstr &Old = *I;
2112  DebugLoc dl = Old.getDebugLoc();
2113  unsigned Amount = TII.getFrameSize(Old);
2114  if (Amount != 0) {
2115  // We need to keep the stack aligned properly. To do this, we round the
2116  // amount of space needed for the outgoing arguments up to the next
2117  // alignment boundary.
2118  Amount = alignSPAdjust(Amount);
2119 
2121  assert(!AFI->isThumb1OnlyFunction() &&
2122  "This eliminateCallFramePseudoInstr does not support Thumb1!");
2123  bool isARM = !AFI->isThumbFunction();
2124 
2125  // Replace the pseudo instruction with a new instruction...
2126  unsigned Opc = Old.getOpcode();
2127  int PIdx = Old.findFirstPredOperandIdx();
2128  ARMCC::CondCodes Pred =
2129  (PIdx == -1) ? ARMCC::AL
2130  : (ARMCC::CondCodes)Old.getOperand(PIdx).getImm();
2131  unsigned PredReg = TII.getFramePred(Old);
2132  if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
2133  emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
2134  Pred, PredReg);
2135  } else {
2136  assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
2137  emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
2138  Pred, PredReg);
2139  }
2140  }
2141  }
2142  return MBB.erase(I);
2143 }
2144 
2145 /// Get the minimum constant for ARM that is greater than or equal to the
2146 /// argument. In ARM, constants can have any value that can be produced by
2147 /// rotating an 8-bit value to the right by an even number of bits within a
2148 /// 32-bit word.
2150  unsigned Shifted = 0;
2151 
2152  if (Value == 0)
2153  return 0;
2154 
2155  while (!(Value & 0xC0000000)) {
2156  Value = Value << 2;
2157  Shifted += 2;
2158  }
2159 
2160  bool Carry = (Value & 0x00FFFFFF);
2161  Value = ((Value & 0xFF000000) >> 24) + Carry;
2162 
2163  if (Value & 0x0000100)
2164  Value = Value & 0x000001FC;
2165 
2166  if (Shifted > 24)
2167  Value = Value >> (Shifted - 24);
2168  else
2169  Value = Value << (24 - Shifted);
2170 
2171  return Value;
2172 }
2173 
2174 // The stack limit in the TCB is set to this many bytes above the actual
2175 // stack limit.
2176 static const uint64_t kSplitStackAvailable = 256;
2177 
2178 // Adjust the function prologue to enable split stacks. This currently only
2179 // supports android and linux.
2180 //
2181 // The ABI of the segmented stack prologue is a little arbitrarily chosen, but
2182 // must be well defined in order to allow for consistent implementations of the
2183 // __morestack helper function. The ABI is also not a normal ABI in that it
2184 // doesn't follow the normal calling conventions because this allows the
2185 // prologue of each function to be optimized further.
2186 //
2187 // Currently, the ABI looks like (when calling __morestack)
2188 //
2189 // * r4 holds the minimum stack size requested for this function call
2190 // * r5 holds the stack size of the arguments to the function
2191 // * the beginning of the function is 3 instructions after the call to
2192 // __morestack
2193 //
2194 // Implementations of __morestack should use r4 to allocate a new stack, r5 to
2195 // place the arguments on to the new stack, and the 3-instruction knowledge to
2196 // jump directly to the body of the function when working on the new stack.
2197 //
2198 // An old (and possibly no longer compatible) implementation of __morestack for
2199 // ARM can be found at [1].
2200 //
2201 // [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
2203  MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
2204  unsigned Opcode;
2205  unsigned CFIIndex;
2206  const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
2207  bool Thumb = ST->isThumb();
2208 
2209  // Sadly, this currently doesn't support varargs, platforms other than
2210  // android/linux. Note that thumb1/thumb2 are support for android/linux.
2211  if (MF.getFunction().isVarArg())
2212  report_fatal_error("Segmented stacks do not support vararg functions.");
2213  if (!ST->isTargetAndroid() && !ST->isTargetLinux())
2214  report_fatal_error("Segmented stacks not supported on this platform.");
2215 
2216  MachineFrameInfo &MFI = MF.getFrameInfo();
2217  MachineModuleInfo &MMI = MF.getMMI();
2218  MCContext &Context = MMI.getContext();
2219  const MCRegisterInfo *MRI = Context.getRegisterInfo();
2220  const ARMBaseInstrInfo &TII =
2221  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2222  ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
2223  DebugLoc DL;
2224 
2225  uint64_t StackSize = MFI.getStackSize();
2226 
2227  // Do not generate a prologue for leaf functions with a stack of size zero.
2228  // For non-leaf functions we have to allow for the possibility that the
2229  // callis to a non-split function, as in PR37807. This function could also
2230  // take the address of a non-split function. When the linker tries to adjust
2231  // its non-existent prologue, it would fail with an error. Mark the object
2232  // file so that such failures are not errors. See this Go language bug-report
2233  // https://go-review.googlesource.com/c/go/+/148819/
2234  if (StackSize == 0 && !MFI.hasTailCall()) {
2235  MF.getMMI().setHasNosplitStack(true);
2236  return;
2237  }
2238 
2239  // Use R4 and R5 as scratch registers.
2240  // We save R4 and R5 before use and restore them before leaving the function.
2241  unsigned ScratchReg0 = ARM::R4;
2242  unsigned ScratchReg1 = ARM::R5;
2243  uint64_t AlignedStackSize;
2244 
2245  MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
2246  MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
2247  MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock();
2250 
2251  // Grab everything that reaches PrologueMBB to update there liveness as well.
2252  SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
2254  WalkList.push_back(&PrologueMBB);
2255 
2256  do {
2257  MachineBasicBlock *CurMBB = WalkList.pop_back_val();
2258  for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
2259  if (BeforePrologueRegion.insert(PredBB).second)
2260  WalkList.push_back(PredBB);
2261  }
2262  } while (!WalkList.empty());
2263 
2264  // The order in that list is important.
2265  // The blocks will all be inserted before PrologueMBB using that order.
2266  // Therefore the block that should appear first in the CFG should appear
2267  // first in the list.
2268  MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
2269  PostStackMBB};
2270 
2271  for (MachineBasicBlock *B : AddedBlocks)
2272  BeforePrologueRegion.insert(B);
2273 
2274  for (const auto &LI : PrologueMBB.liveins()) {
2275  for (MachineBasicBlock *PredBB : BeforePrologueRegion)
2276  PredBB->addLiveIn(LI);
2277  }
2278 
2279  // Remove the newly added blocks from the list, since we know
2280  // we do not have to do the following updates for them.
2281  for (MachineBasicBlock *B : AddedBlocks) {
2282  BeforePrologueRegion.erase(B);
2283  MF.insert(PrologueMBB.getIterator(), B);
2284  }
2285 
2286  for (MachineBasicBlock *MBB : BeforePrologueRegion) {
2287  // Make sure the LiveIns are still sorted and unique.
2288  MBB->sortUniqueLiveIns();
2289  // Replace the edges to PrologueMBB by edges to the sequences
2290  // we are about to add.
2291  MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
2292  }
2293 
2294  // The required stack size that is aligned to ARM constant criterion.
2295  AlignedStackSize = alignToARMConstant(StackSize);
2296 
2297  // When the frame size is less than 256 we just compare the stack
2298  // boundary directly to the value of the stack pointer, per gcc.
2299  bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
2300 
2301  // We will use two of the callee save registers as scratch registers so we
2302  // need to save those registers onto the stack.
2303  // We will use SR0 to hold stack limit and SR1 to hold the stack size
2304  // requested and arguments for __morestack().
2305  // SR0: Scratch Register #0
2306  // SR1: Scratch Register #1
2307  // push {SR0, SR1}
2308  if (Thumb) {
2309  BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
2310  .add(predOps(ARMCC::AL))
2311  .addReg(ScratchReg0)
2312  .addReg(ScratchReg1);
2313  } else {
2314  BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
2315  .addReg(ARM::SP, RegState::Define)
2316  .addReg(ARM::SP)
2317  .add(predOps(ARMCC::AL))
2318  .addReg(ScratchReg0)
2319  .addReg(ScratchReg1);
2320  }
2321 
2322  // Emit the relevant DWARF information about the change in stack pointer as
2323  // well as where to find both r4 and r5 (the callee-save registers)
2324  CFIIndex =
2326  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2327  .addCFIIndex(CFIIndex);
2329  nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
2330  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2331  .addCFIIndex(CFIIndex);
2333  nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
2334  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2335  .addCFIIndex(CFIIndex);
2336 
2337  // mov SR1, sp
2338  if (Thumb) {
2339  BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
2340  .addReg(ARM::SP)
2341  .add(predOps(ARMCC::AL));
2342  } else if (CompareStackPointer) {
2343  BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
2344  .addReg(ARM::SP)
2345  .add(predOps(ARMCC::AL))
2346  .add(condCodeOp());
2347  }
2348 
2349  // sub SR1, sp, #StackSize
2350  if (!CompareStackPointer && Thumb) {
2351  BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
2352  .add(condCodeOp())
2353  .addReg(ScratchReg1)
2354  .addImm(AlignedStackSize)
2355  .add(predOps(ARMCC::AL));
2356  } else if (!CompareStackPointer) {
2357  BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
2358  .addReg(ARM::SP)
2359  .addImm(AlignedStackSize)
2360  .add(predOps(ARMCC::AL))
2361  .add(condCodeOp());
2362  }
2363 
2364  if (Thumb && ST->isThumb1Only()) {
2365  unsigned PCLabelId = ARMFI->createPICLabelUId();
2367  MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0);
2369  unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4);
2370 
2371  // ldr SR0, [pc, offset(STACK_LIMIT)]
2372  BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
2373  .addConstantPoolIndex(CPI)
2374  .add(predOps(ARMCC::AL));
2375 
2376  // ldr SR0, [SR0]
2377  BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
2378  .addReg(ScratchReg0)
2379  .addImm(0)
2380  .add(predOps(ARMCC::AL));
2381  } else {
2382  // Get TLS base address from the coprocessor
2383  // mrc p15, #0, SR0, c13, c0, #3
2384  BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0)
2385  .addImm(15)
2386  .addImm(0)
2387  .addImm(13)
2388  .addImm(0)
2389  .addImm(3)
2390  .add(predOps(ARMCC::AL));
2391 
2392  // Use the last tls slot on android and a private field of the TCP on linux.
2393  assert(ST->isTargetAndroid() || ST->isTargetLinux());
2394  unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
2395 
2396  // Get the stack limit from the right offset
2397  // ldr SR0, [sr0, #4 * TlsOffset]
2398  BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0)
2399  .addReg(ScratchReg0)
2400  .addImm(4 * TlsOffset)
2401  .add(predOps(ARMCC::AL));
2402  }
2403 
2404  // Compare stack limit with stack size requested.
2405  // cmp SR0, SR1
2406  Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
2407  BuildMI(GetMBB, DL, TII.get(Opcode))
2408  .addReg(ScratchReg0)
2409  .addReg(ScratchReg1)
2410  .add(predOps(ARMCC::AL));
2411 
2412  // This jump is taken if StackLimit < SP - stack required.
2413  Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
2414  BuildMI(GetMBB, DL, TII.get(Opcode)).addMBB(PostStackMBB)
2415  .addImm(ARMCC::LO)
2416  .addReg(ARM::CPSR);
2417 
2418 
2419  // Calling __morestack(StackSize, Size of stack arguments).
2420  // __morestack knows that the stack size requested is in SR0(r4)
2421  // and amount size of stack arguments is in SR1(r5).
2422 
2423  // Pass first argument for the __morestack by Scratch Register #0.
2424  // The amount size of stack required
2425  if (Thumb) {
2426  BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
2427  .add(condCodeOp())
2428  .addImm(AlignedStackSize)
2429  .add(predOps(ARMCC::AL));
2430  } else {
2431  BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
2432  .addImm(AlignedStackSize)
2433  .add(predOps(ARMCC::AL))
2434  .add(condCodeOp());
2435  }
2436  // Pass second argument for the __morestack by Scratch Register #1.
2437  // The amount size of stack consumed to save function arguments.
2438  if (Thumb) {
2439  BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
2440  .add(condCodeOp())
2441  .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
2442  .add(predOps(ARMCC::AL));
2443  } else {
2444  BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
2445  .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
2446  .add(predOps(ARMCC::AL))
2447  .add(condCodeOp());
2448  }
2449 
2450  // push {lr} - Save return address of this function.
2451  if (Thumb) {
2452  BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
2453  .add(predOps(ARMCC::AL))
2454  .addReg(ARM::LR);
2455  } else {
2456  BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
2457  .addReg(ARM::SP, RegState::Define)
2458  .addReg(ARM::SP)
2459  .add(predOps(ARMCC::AL))
2460  .addReg(ARM::LR);
2461  }
2462 
2463  // Emit the DWARF info about the change in stack as well as where to find the
2464  // previous link register
2465  CFIIndex =
2467  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2468  .addCFIIndex(CFIIndex);
2470  nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
2471  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2472  .addCFIIndex(CFIIndex);
2473 
2474  // Call __morestack().
2475  if (Thumb) {
2476  BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
2477  .add(predOps(ARMCC::AL))
2478  .addExternalSymbol("__morestack");
2479  } else {
2480  BuildMI(AllocMBB, DL, TII.get(ARM::BL))
2481  .addExternalSymbol("__morestack");
2482  }
2483 
2484  // pop {lr} - Restore return address of this original function.
2485  if (Thumb) {
2486  if (ST->isThumb1Only()) {
2487  BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
2488  .add(predOps(ARMCC::AL))
2489  .addReg(ScratchReg0);
2490  BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
2491  .addReg(ScratchReg0)
2492  .add(predOps(ARMCC::AL));
2493  } else {
2494  BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
2495  .addReg(ARM::LR, RegState::Define)
2496  .addReg(ARM::SP, RegState::Define)
2497  .addReg(ARM::SP)
2498  .addImm(4)
2499  .add(predOps(ARMCC::AL));
2500  }
2501  } else {
2502  BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
2503  .addReg(ARM::SP, RegState::Define)
2504  .addReg(ARM::SP)
2505  .add(predOps(ARMCC::AL))
2506  .addReg(ARM::LR);
2507  }
2508 
2509  // Restore SR0 and SR1 in case of __morestack() was called.
2510  // __morestack() will skip PostStackMBB block so we need to restore
2511  // scratch registers from here.
2512  // pop {SR0, SR1}
2513  if (Thumb) {
2514  BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
2515  .add(predOps(ARMCC::AL))
2516  .addReg(ScratchReg0)
2517  .addReg(ScratchReg1);
2518  } else {
2519  BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
2520  .addReg(ARM::SP, RegState::Define)
2521  .addReg(ARM::SP)
2522  .add(predOps(ARMCC::AL))
2523  .addReg(ScratchReg0)
2524  .addReg(ScratchReg1);
2525  }
2526 
2527  // Update the CFA offset now that we've popped
2528  CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
2529  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2530  .addCFIIndex(CFIIndex);
2531 
2532  // Return from this function.
2533  BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
2534 
2535  // Restore SR0 and SR1 in case of __morestack() was not called.
2536  // pop {SR0, SR1}
2537  if (Thumb) {
2538  BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
2539  .add(predOps(ARMCC::AL))
2540  .addReg(ScratchReg0)
2541  .addReg(ScratchReg1);
2542  } else {
2543  BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
2544  .addReg(ARM::SP, RegState::Define)
2545  .addReg(ARM::SP)
2546  .add(predOps(ARMCC::AL))
2547  .addReg(ScratchReg0)
2548  .addReg(ScratchReg1);
2549  }
2550 
2551  // Update the CFA offset now that we've popped
2552  CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
2553  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2554  .addCFIIndex(CFIIndex);
2555 
2556  // Tell debuggers that r4 and r5 are now the same as they were in the
2557  // previous function, that they're the "Same Value".
2559  nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
2560  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2561  .addCFIIndex(CFIIndex);
2563  nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
2564  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2565  .addCFIIndex(CFIIndex);
2566 
2567  // Organizing MBB lists
2568  PostStackMBB->addSuccessor(&PrologueMBB);
2569 
2570  AllocMBB->addSuccessor(PostStackMBB);
2571 
2572  GetMBB->addSuccessor(PostStackMBB);
2573  GetMBB->addSuccessor(AllocMBB);
2574 
2575  McrMBB->addSuccessor(GetMBB);
2576 
2577  PrevStackMBB->addSuccessor(McrMBB);
2578 
2579 #ifdef EXPENSIVE_CHECKS
2580  MF.verify();
2581 #endif
2582 }
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:176
const MachineInstrBuilder & add(const MachineOperand &MO) const
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BitVector & set()
Definition: BitVector.h:397
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
#define R4(n)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
LLVMContext & Context
int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate...
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
bool isThumb() const
Definition: ARMSubtarget.h:749
This class represents lattice values for constants.
Definition: AllocatorList.h:23
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static cl::opt< bool > SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), cl::desc("Align ARM NEON spills in prolog and epilog"))
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:385
ARMConstantPoolValue - ARM specific constantpool value.
LLVM_NODISCARD unsigned addFrameInst(const MCCFIInstruction &Inst)
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset)
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:507
unsigned Reg
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override
Adjust the prologue to have the function use segmented stacks.
Register getFrameRegister(const MachineFunction &MF) const override
bool hasV7Ops() const
Definition: ARMSubtarget.h:570
bool test(unsigned Idx) const
Definition: BitVector.h:501
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:323
unsigned second
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
F(f)
MachineModuleInfo & getMMI() const
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
bool isThumb1Only() const
Definition: ARMSubtarget.h:751
#define R2(n)
void setGPRCalleeSavedArea2Offset(unsigned o)
static uint32_t alignToARMConstant(uint32_t Value)
Get the minimum constant for ARM that is greater than or equal to the argument.
static MCCFIInstruction createDefCfaOffset(MCSymbol *L, int Offset)
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:494
static bool isThumb(const MCSubtargetInfo &STI)
unsigned getSpillSize(const TargetRegisterClass &RC) const
Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...
return AArch64::GPR64RegClass contains(Reg)
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:521
static const uint64_t kSplitStackAvailable
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:119
bool isTargetELF() const
Definition: ARMSubtarget.h:694
unsigned getSpillAlignment(const TargetRegisterClass &RC) const
Return the minimum required alignment in bytes for a spill slot for a register of this class...
void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Given a machine basic block that branched to &#39;Old&#39;, change the code and CFG so that it branches to &#39;N...
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
unsigned getFramePred(const MachineInstr &MI) const
Returns predicate register associated with the given frame instruction.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:414
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register...
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required, we reserve argument space for call sites in the function immediately on entry to the current function.
MachineBasicBlock iterator that automatically skips over MIs that are inside bundles (i...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
void emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
This file contains the simple types necessary to represent the attributes associated with functions a...
static int sizeOfSPAdjustment(const MachineInstr &MI)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
int getDwarfRegNum(unsigned RegNum, bool isEH) const
Map a target register to an equivalent dwarf register number.
bool keepFramePointer(const MachineFunction &MF) const override
Return true if the target wants to keep the frame pointer regardless of the function attribute "frame...
void setDPRCalleeSavedAreaOffset(unsigned o)
bool enableCalleeSaveSkip(const MachineFunction &MF) const override
Returns true if the target can safely skip saving callee-saved registers for noreturn nounwind functi...
unsigned getArgRegsSaveSize() const
void setFramePtrSpillOffset(unsigned o)
Context object for machine code objects.
Definition: MCContext.h:64
static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg, unsigned SrcReg, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
unsigned getDPRCalleeSavedAreaSize() const
bool splitFramePushPop(const MachineFunction &MF) const
Returns true if the frame setup is split into two separate pushes (first r0-r7,lr then r8-r11)...
Definition: ARMSubtarget.h:771
void setDPRCalleeSavedGapSize(unsigned s)
unsigned getNumAlignedDPRCS2Regs() const
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
const MCContext & getContext() const
int alignSPAdjust(int SPAdj) const
alignSPAdjust - This method aligns the stack adjustment to the correct alignment. ...
bool hasV6T2Ops() const
Definition: ARMSubtarget.h:569
virtual const TargetInstrInfo * getInstrInfo() const
static bool isARMArea1Register(unsigned Reg, bool isIOS)
isARMArea1Register - Returns true if the register is a low register (r0-r7) or a stack/pc register th...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1258
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
TargetInstrInfo - Interface to description of machine instruction set.
unsigned getDefRegState(bool B)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static MachineBasicBlock::iterator skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs)
Skip past the code inserted by emitAlignedDPRCS2Spills, and return an iterator to the following instr...
static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register)
.cfi_same_value Current value of Register is the same as in the previous frame.
Definition: MCDwarf.h:552
static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs)
int ResolveFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg, int SPAdj) const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
This file declares the machine register scavenger class.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
static MCCFIInstruction createDefCfa(MCSymbol *L, unsigned Register, int Offset)
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it...
Definition: MCDwarf.h:480
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool cannotEliminateFrame(const MachineFunction &MF) const
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register)
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:487
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF, const ARMBaseInstrInfo &TII)
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Returns the size of the specified MachineInstr.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
unsigned getGPRCalleeSavedArea1Size() const
int getStackProtectorIndex() const
Return the index for the stack protector object.
unsigned getMaxAlignment() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
bool hasBasePointer(const MachineFunction &MF) const
void setGPRCalleeSavedArea2Size(unsigned s)
static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI)
We need the offset of the frame pointer relative to other MachineFrameInfo offsets which are encoded ...
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:798
unsigned getReturnRegsCount() const
static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI)
Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers starting from d8...
self_iterator getIterator()
Definition: ilist_node.h:81
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
iterator_range< pred_iterator > predecessors()
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1220
const ARMFrameLowering * getFrameLowering() const override
Definition: ARMSubtarget.h:529
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
iterator erase(const_iterator CI)
Definition: SmallVector.h:434
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
size_t size() const
Definition: SmallVector.h:52
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1213
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned first
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1122
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:478
int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override
getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.
unsigned getDPRCalleeSavedGapSize() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI)
Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers starting from d8...
#define R6(n)
void setGPRCalleeSavedArea1Size(unsigned s)
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call...
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:377
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
bool isTargetLinux() const
Definition: ARMSubtarget.h:688
static uint64_t add(uint64_t LeftOp, uint64_t RightOp)
Definition: FileCheck.cpp:218
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
static bool isARMArea2Register(unsigned Reg, bool isIOS)
bool isTargetAndroid() const
Definition: ARMSubtarget.h:735
Information about stack frame layout on the target.
static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, const TargetInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const unsigned Reg, const unsigned Alignment, const bool MustBeSingleInstruction)
Emit an instruction sequence that will align the address in register Reg by zero-ing out the lower bi...
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
void setOffsetAdjustment(int Adj)
Set the correction for frame offsets.
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
CodeModel::Model getCodeModel() const
Returns the code model.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
ARMFrameLowering(const ARMSubtarget &sti)
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
unsigned getReturnOpcode() const
Returns the correct return opcode for the current feature set.
Definition: ARMSubtarget.h:841
unsigned getArgumentStackSize() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
static unsigned estimateRSStackSizeLimit(MachineFunction &MF, const TargetFrameLowering *TFI)
estimateRSStackSizeLimit - Look at each instruction that references stack frames and return the stack...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
static bool isPopOpcode(int Opc)
bool isLiveIn(unsigned Reg) const
static bool isARMLowRegister(unsigned Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:160
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
uint16_t getEncodingValue(unsigned RegNo) const
Returns the encoding for RegNo.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
unsigned getGPRCalleeSavedArea2Size() const
void setGPRCalleeSavedArea1Offset(unsigned o)
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use...
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg...
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:223
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
bool isReturnAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
TargetOptions Options
#define I(x, y, z)
Definition: MD5.cpp:58
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:533
virtual const TargetFrameLowering * getFrameLowering() const
void setDPRCalleeSavedAreaSize(unsigned s)
int getOffsetAdjustment() const
Return the correction for frame offsets.
uint32_t Size
Definition: Profile.cpp:46
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool hasV5TOps() const
Definition: ARMSubtarget.h:564
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
iterator_range< livein_iterator > liveins() const
static bool isARMArea3Register(unsigned Reg, bool isIOS)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
static bool WindowsRequiresStackProbe(const MachineFunction &MF, size_t StackSizeInBytes)
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:312
bool needsStackRealignment(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
LLVM Value Representation.
Definition: Value.h:72
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
unsigned getFramePtrSpillOffset() const
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:333
bool isTargetWindows() const
Definition: ARMSubtarget.h:691
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
bool addRegisterKilled(unsigned IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
IRTranslator LLVM IR MI
static bool isCSRestore(MachineInstr &MI, const ARMBaseInstrInfo &TII, const MCPhysReg *CSRegs)
BitVector getPristineRegs(const MachineFunction &MF) const
Return a set of physical registers that are pristine.
const ARMSubtarget & STI
Register getReg() const
getReg - Returns the register number.
static void checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs)
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
unsigned getReg() const
bool hasTailCall() const
Returns true if the function contains a tail call.
void setObjectAlignment(int ObjectIdx, unsigned Align)
setObjectAlignment - Change the alignment of the specified stack object.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects...
unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one...
static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
bool isReserved(unsigned PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
This class contains meta information specific to a module.