LLVM  9.0.0svn
ARMFrameLowering.cpp
Go to the documentation of this file.
1 //===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the ARM implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "ARMFrameLowering.h"
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMBaseRegisterInfo.h"
16 #include "ARMConstantPoolValue.h"
17 #include "ARMMachineFunctionInfo.h"
18 #include "ARMSubtarget.h"
21 #include "Utils/ARMBaseInfo.h"
22 #include "llvm/ADT/BitVector.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/SmallVector.h"
41 #include "llvm/IR/Attributes.h"
42 #include "llvm/IR/CallingConv.h"
43 #include "llvm/IR/DebugLoc.h"
44 #include "llvm/IR/Function.h"
45 #include "llvm/MC/MCContext.h"
46 #include "llvm/MC/MCDwarf.h"
47 #include "llvm/MC/MCInstrDesc.h"
48 #include "llvm/MC/MCRegisterInfo.h"
49 #include "llvm/Support/CodeGen.h"
51 #include "llvm/Support/Compiler.h"
52 #include "llvm/Support/Debug.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <cstddef>
61 #include <cstdint>
62 #include <iterator>
63 #include <utility>
64 #include <vector>
65 
66 #define DEBUG_TYPE "arm-frame-lowering"
67 
68 using namespace llvm;
69 
70 static cl::opt<bool>
71 SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
72  cl::desc("Align ARM NEON spills in prolog and epilog"));
73 
76  unsigned NumAlignedDPRCS2Regs);
77 
79  : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
80  STI(sti) {}
81 
83  // iOS always has a FP for backtracking, force other targets to keep their FP
84  // when doing FastISel. The emitted code is currently superior, and in cases
85  // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
86  return MF.getSubtarget<ARMSubtarget>().useFastISel();
87 }
88 
89 /// Returns true if the target can safely skip saving callee-saved registers
90 /// for noreturn nounwind functions.
92  assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
93  MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
94  !MF.getFunction().hasFnAttribute(Attribute::UWTable));
95 
96  // Frame pointer and link register are not treated as normal CSR, thus we
97  // can always skip CSR saves for nonreturning functions.
98  return true;
99 }
100 
101 /// hasFP - Return true if the specified function should have a dedicated frame
102 /// pointer register. This is true if the function has variable sized allocas
103 /// or if frame pointer elimination is disabled.
105  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
106  const MachineFrameInfo &MFI = MF.getFrameInfo();
107 
108  // ABI-required frame pointer.
110  return true;
111 
112  // Frame pointer required for use within this function.
113  return (RegInfo->needsStackRealignment(MF) ||
114  MFI.hasVarSizedObjects() ||
115  MFI.isFrameAddressTaken());
116 }
117 
118 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
119 /// not required, we reserve argument space for call sites in the function
120 /// immediately on entry to the current function. This eliminates the need for
121 /// add/sub sp brackets around call sites. Returns true if the call frame is
122 /// included as part of the stack frame.
124  const MachineFrameInfo &MFI = MF.getFrameInfo();
125  unsigned CFSize = MFI.getMaxCallFrameSize();
126  // It's not always a good idea to include the call frame as part of the
127  // stack frame. ARM (especially Thumb) has small immediate offset to
128  // address the stack frame. So a large call frame can cause poor codegen
129  // and may even makes it impossible to scavenge a register.
130  if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
131  return false;
132 
133  return !MFI.hasVarSizedObjects();
134 }
135 
136 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the
137 /// call frame pseudos can be simplified. Unlike most targets, having a FP
138 /// is not sufficient here since we still may reference some objects via SP
139 /// even when FP is available in Thumb2 mode.
140 bool
143 }
144 
146  const MCPhysReg *CSRegs) {
147  // Integer spill area is handled with "pop".
148  if (isPopOpcode(MI.getOpcode())) {
149  // The first two operands are predicates. The last two are
150  // imp-def and imp-use of SP. Check everything in between.
151  for (int i = 5, e = MI.getNumOperands(); i != e; ++i)
152  if (!isCalleeSavedRegister(MI.getOperand(i).getReg(), CSRegs))
153  return false;
154  return true;
155  }
156  if ((MI.getOpcode() == ARM::LDR_POST_IMM ||
157  MI.getOpcode() == ARM::LDR_POST_REG ||
158  MI.getOpcode() == ARM::t2LDR_POST) &&
159  isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs) &&
160  MI.getOperand(1).getReg() == ARM::SP)
161  return true;
162 
163  return false;
164 }
165 
167  bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
168  const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
169  unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
170  ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
171  if (isARM)
172  emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
173  Pred, PredReg, TII, MIFlags);
174  else
175  emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
176  Pred, PredReg, TII, MIFlags);
177 }
178 
179 static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
180  MachineBasicBlock::iterator &MBBI, const DebugLoc &dl,
181  const ARMBaseInstrInfo &TII, int NumBytes,
182  unsigned MIFlags = MachineInstr::NoFlags,
184  unsigned PredReg = 0) {
185  emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
186  MIFlags, Pred, PredReg);
187 }
188 
189 static int sizeOfSPAdjustment(const MachineInstr &MI) {
190  int RegSize;
191  switch (MI.getOpcode()) {
192  case ARM::VSTMDDB_UPD:
193  RegSize = 8;
194  break;
195  case ARM::STMDB_UPD:
196  case ARM::t2STMDB_UPD:
197  RegSize = 4;
198  break;
199  case ARM::t2STR_PRE:
200  case ARM::STR_PRE_IMM:
201  return 4;
202  default:
203  llvm_unreachable("Unknown push or pop like instruction");
204  }
205 
206  int count = 0;
207  // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
208  // pred) so the list starts at 4.
209  for (int i = MI.getNumOperands() - 1; i >= 4; --i)
210  count += RegSize;
211  return count;
212 }
213 
215  size_t StackSizeInBytes) {
216  const MachineFrameInfo &MFI = MF.getFrameInfo();
217  const Function &F = MF.getFunction();
218  unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
219  if (F.hasFnAttribute("stack-probe-size"))
220  F.getFnAttribute("stack-probe-size")
222  .getAsInteger(0, StackProbeSize);
223  return (StackSizeInBytes >= StackProbeSize) &&
224  !F.hasFnAttribute("no-stack-arg-probe");
225 }
226 
227 namespace {
228 
229 struct StackAdjustingInsts {
230  struct InstInfo {
232  unsigned SPAdjust;
233  bool BeforeFPSet;
234  };
235 
237 
238  void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
239  bool BeforeFPSet = false) {
240  InstInfo Info = {I, SPAdjust, BeforeFPSet};
241  Insts.push_back(Info);
242  }
243 
244  void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
245  auto Info =
246  llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
247  assert(Info != Insts.end() && "invalid sp adjusting instruction");
248  Info->SPAdjust += ExtraBytes;
249  }
250 
251  void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl,
252  const ARMBaseInstrInfo &TII, bool HasFP) {
253  MachineFunction &MF = *MBB.getParent();
254  unsigned CFAOffset = 0;
255  for (auto &Info : Insts) {
256  if (HasFP && !Info.BeforeFPSet)
257  return;
258 
259  CFAOffset -= Info.SPAdjust;
260  unsigned CFIIndex = MF.addFrameInst(
261  MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
262  BuildMI(MBB, std::next(Info.I), dl,
263  TII.get(TargetOpcode::CFI_INSTRUCTION))
264  .addCFIIndex(CFIIndex)
266  }
267  }
268 };
269 
270 } // end anonymous namespace
271 
272 /// Emit an instruction sequence that will align the address in
273 /// register Reg by zero-ing out the lower bits. For versions of the
274 /// architecture that support Neon, this must be done in a single
275 /// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
276 /// single instruction. That function only gets called when optimizing
277 /// spilling of D registers on a core with the Neon instruction set
278 /// present.
280  const TargetInstrInfo &TII,
281  MachineBasicBlock &MBB,
283  const DebugLoc &DL, const unsigned Reg,
284  const unsigned Alignment,
285  const bool MustBeSingleInstruction) {
286  const ARMSubtarget &AST =
287  static_cast<const ARMSubtarget &>(MF.getSubtarget());
288  const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
289  const unsigned AlignMask = Alignment - 1;
290  const unsigned NrBitsToZero = countTrailingZeros(Alignment);
291  assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
292  if (!AFI->isThumbFunction()) {
293  // if the BFC instruction is available, use that to zero the lower
294  // bits:
295  // bfc Reg, #0, log2(Alignment)
296  // otherwise use BIC, if the mask to zero the required number of bits
297  // can be encoded in the bic immediate field
298  // bic Reg, Reg, Alignment-1
299  // otherwise, emit
300  // lsr Reg, Reg, log2(Alignment)
301  // lsl Reg, Reg, log2(Alignment)
302  if (CanUseBFC) {
303  BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
304  .addReg(Reg, RegState::Kill)
305  .addImm(~AlignMask)
306  .add(predOps(ARMCC::AL));
307  } else if (AlignMask <= 255) {
308  BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
309  .addReg(Reg, RegState::Kill)
310  .addImm(AlignMask)
312  .add(condCodeOp());
313  } else {
314  assert(!MustBeSingleInstruction &&
315  "Shouldn't call emitAligningInstructions demanding a single "
316  "instruction to be emitted for large stack alignment for a target "
317  "without BFC.");
318  BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
319  .addReg(Reg, RegState::Kill)
320  .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
322  .add(condCodeOp());
323  BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
324  .addReg(Reg, RegState::Kill)
325  .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
327  .add(condCodeOp());
328  }
329  } else {
330  // Since this is only reached for Thumb-2 targets, the BFC instruction
331  // should always be available.
332  assert(CanUseBFC);
333  BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
334  .addReg(Reg, RegState::Kill)
335  .addImm(~AlignMask)
336  .add(predOps(ARMCC::AL));
337  }
338 }
339 
340 /// We need the offset of the frame pointer relative to other MachineFrameInfo
341 /// offsets which are encoded relative to SP at function begin.
342 /// See also emitPrologue() for how the FP is set up.
343 /// Unfortunately we cannot determine this value in determineCalleeSaves() yet
344 /// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
345 /// this to produce a conservative estimate that we check in an assert() later.
346 static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI) {
347  // This is a conservative estimation: Assume the frame pointer being r7 and
348  // pc("r15") up to r8 getting spilled before (= 8 registers).
349  return -AFI.getArgRegsSaveSize() - (8 * 4);
350 }
351 
353  MachineBasicBlock &MBB) const {
354  MachineBasicBlock::iterator MBBI = MBB.begin();
355  MachineFrameInfo &MFI = MF.getFrameInfo();
357  MachineModuleInfo &MMI = MF.getMMI();
358  MCContext &Context = MMI.getContext();
359  const TargetMachine &TM = MF.getTarget();
360  const MCRegisterInfo *MRI = Context.getRegisterInfo();
361  const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
362  const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
363  assert(!AFI->isThumb1OnlyFunction() &&
364  "This emitPrologue does not support Thumb1!");
365  bool isARM = !AFI->isThumbFunction();
367  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
368  unsigned NumBytes = MFI.getStackSize();
369  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
370 
371  // Debug location must be unknown since the first debug location is used
372  // to determine the end of the prologue.
373  DebugLoc dl;
374 
375  unsigned FramePtr = RegInfo->getFrameRegister(MF);
376 
377  // Determine the sizes of each callee-save spill areas and record which frame
378  // belongs to which callee-save spill areas.
379  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
380  int FramePtrSpillFI = 0;
381  int D8SpillFI = 0;
382 
383  // All calls are tail calls in GHC calling conv, and functions have no
384  // prologue/epilogue.
386  return;
387 
388  StackAdjustingInsts DefCFAOffsetCandidates;
389  bool HasFP = hasFP(MF);
390 
391  // Allocate the vararg register save area.
392  if (ArgRegsSaveSize) {
393  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
395  DefCFAOffsetCandidates.addInst(std::prev(MBBI), ArgRegsSaveSize, true);
396  }
397 
398  if (!AFI->hasStackFrame() &&
399  (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
400  if (NumBytes - ArgRegsSaveSize != 0) {
401  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize),
403  DefCFAOffsetCandidates.addInst(std::prev(MBBI),
404  NumBytes - ArgRegsSaveSize, true);
405  }
406  DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
407  return;
408  }
409 
410  // Determine spill area sizes.
411  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
412  unsigned Reg = CSI[i].getReg();
413  int FI = CSI[i].getFrameIdx();
414  switch (Reg) {
415  case ARM::R8:
416  case ARM::R9:
417  case ARM::R10:
418  case ARM::R11:
419  case ARM::R12:
420  if (STI.splitFramePushPop(MF)) {
421  GPRCS2Size += 4;
422  break;
423  }
425  case ARM::R0:
426  case ARM::R1:
427  case ARM::R2:
428  case ARM::R3:
429  case ARM::R4:
430  case ARM::R5:
431  case ARM::R6:
432  case ARM::R7:
433  case ARM::LR:
434  if (Reg == FramePtr)
435  FramePtrSpillFI = FI;
436  GPRCS1Size += 4;
437  break;
438  default:
439  // This is a DPR. Exclude the aligned DPRCS2 spills.
440  if (Reg == ARM::D8)
441  D8SpillFI = FI;
442  if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
443  DPRCSSize += 8;
444  }
445  }
446 
447  // Move past area 1.
448  MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
449  if (GPRCS1Size > 0) {
450  GPRCS1Push = LastPush = MBBI++;
451  DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
452  }
453 
454  // Determine starting offsets of spill areas.
455  unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size;
456  unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
457  unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U;
458  unsigned DPRGapSize = (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign;
459  unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
460  int FramePtrOffsetInPush = 0;
461  if (HasFP) {
462  int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
463  assert(getMaxFPOffset(MF.getFunction(), *AFI) <= FPOffset &&
464  "Max FP estimation is wrong");
465  FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize;
466  AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
467  NumBytes);
468  }
469  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
470  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
471  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
472 
473  // Move past area 2.
474  if (GPRCS2Size > 0) {
475  GPRCS2Push = LastPush = MBBI++;
476  DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
477  }
478 
479  // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
480  // .cfi_offset operations will reflect that.
481  if (DPRGapSize) {
482  assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
483  if (LastPush != MBB.end() &&
484  tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
485  DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
486  else {
487  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
489  DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
490  }
491  }
492 
493  // Move past area 3.
494  if (DPRCSSize > 0) {
495  // Since vpush register list cannot have gaps, there may be multiple vpush
496  // instructions in the prologue.
497  while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
498  DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));
499  LastPush = MBBI++;
500  }
501  }
502 
503  // Move past the aligned DPRCS2 area.
504  if (AFI->getNumAlignedDPRCS2Regs() > 0) {
506  // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
507  // leaves the stack pointer pointing to the DPRCS2 area.
508  //
509  // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
510  NumBytes += MFI.getObjectOffset(D8SpillFI);
511  } else
512  NumBytes = DPRCSOffset;
513 
514  if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
515  uint32_t NumWords = NumBytes >> 2;
516 
517  if (NumWords < 65536)
518  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
519  .addImm(NumWords)
521  .add(predOps(ARMCC::AL));
522  else
523  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4)
524  .addImm(NumWords)
526 
527  switch (TM.getCodeModel()) {
528  case CodeModel::Tiny:
529  llvm_unreachable("Tiny code model not available on ARM.");
530  case CodeModel::Small:
531  case CodeModel::Medium:
532  case CodeModel::Kernel:
533  BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
535  .addExternalSymbol("__chkstk")
536  .addReg(ARM::R4, RegState::Implicit)
537  .setMIFlags(MachineInstr::FrameSetup);
538  break;
539  case CodeModel::Large:
540  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
541  .addExternalSymbol("__chkstk")
543 
544  BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
546  .addReg(ARM::R12, RegState::Kill)
547  .addReg(ARM::R4, RegState::Implicit)
548  .setMIFlags(MachineInstr::FrameSetup);
549  break;
550  }
551 
552  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
553  .addReg(ARM::SP, RegState::Kill)
557  .add(condCodeOp());
558  NumBytes = 0;
559  }
560 
561  if (NumBytes) {
562  // Adjust SP after all the callee-save spills.
563  if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
564  tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes))
565  DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
566  else {
567  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
569  DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
570  }
571 
572  if (HasFP && isARM)
573  // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
574  // Note it's not safe to do this in Thumb2 mode because it would have
575  // taken two instructions:
576  // mov sp, r7
577  // sub sp, #24
578  // If an interrupt is taken between the two instructions, then sp is in
579  // an inconsistent state (pointing to the middle of callee-saved area).
580  // The interrupt handler can end up clobbering the registers.
581  AFI->setShouldRestoreSPFromFP(true);
582  }
583 
584  // Set FP to point to the stack slot that contains the previous FP.
585  // For iOS, FP is R7, which has now been stored in spill area 1.
586  // Otherwise, if this is not iOS, all the callee-saved registers go
587  // into spill area 1, including the FP in R11. In either case, it
588  // is in area one and the adjustment needs to take place just after
589  // that push.
590  if (HasFP) {
591  MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push);
592  unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
593  emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush,
594  dl, TII, FramePtr, ARM::SP,
595  PushSize + FramePtrOffsetInPush,
597  if (FramePtrOffsetInPush + PushSize != 0) {
598  unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
599  nullptr, MRI->getDwarfRegNum(FramePtr, true),
600  -(ArgRegsSaveSize - FramePtrOffsetInPush)));
601  BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
602  .addCFIIndex(CFIIndex)
604  } else {
605  unsigned CFIIndex =
607  nullptr, MRI->getDwarfRegNum(FramePtr, true)));
608  BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
609  .addCFIIndex(CFIIndex)
611  }
612  }
613 
614  // Now that the prologue's actual instructions are finalised, we can insert
615  // the necessary DWARF cf instructions to describe the situation. Start by
616  // recording where each register ended up:
617  if (GPRCS1Size > 0) {
618  MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
619  int CFIIndex;
620  for (const auto &Entry : CSI) {
621  unsigned Reg = Entry.getReg();
622  int FI = Entry.getFrameIdx();
623  switch (Reg) {
624  case ARM::R8:
625  case ARM::R9:
626  case ARM::R10:
627  case ARM::R11:
628  case ARM::R12:
629  if (STI.splitFramePushPop(MF))
630  break;
632  case ARM::R0:
633  case ARM::R1:
634  case ARM::R2:
635  case ARM::R3:
636  case ARM::R4:
637  case ARM::R5:
638  case ARM::R6:
639  case ARM::R7:
640  case ARM::LR:
642  nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
643  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
644  .addCFIIndex(CFIIndex)
646  break;
647  }
648  }
649  }
650 
651  if (GPRCS2Size > 0) {
652  MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
653  for (const auto &Entry : CSI) {
654  unsigned Reg = Entry.getReg();
655  int FI = Entry.getFrameIdx();
656  switch (Reg) {
657  case ARM::R8:
658  case ARM::R9:
659  case ARM::R10:
660  case ARM::R11:
661  case ARM::R12:
662  if (STI.splitFramePushPop(MF)) {
663  unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
664  unsigned Offset = MFI.getObjectOffset(FI);
665  unsigned CFIIndex = MF.addFrameInst(
666  MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
667  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
668  .addCFIIndex(CFIIndex)
670  }
671  break;
672  }
673  }
674  }
675 
676  if (DPRCSSize > 0) {
677  // Since vpush register list cannot have gaps, there may be multiple vpush
678  // instructions in the prologue.
679  MachineBasicBlock::iterator Pos = std::next(LastPush);
680  for (const auto &Entry : CSI) {
681  unsigned Reg = Entry.getReg();
682  int FI = Entry.getFrameIdx();
683  if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
684  (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
685  unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
686  unsigned Offset = MFI.getObjectOffset(FI);
687  unsigned CFIIndex = MF.addFrameInst(
688  MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
689  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
690  .addCFIIndex(CFIIndex)
692  }
693  }
694  }
695 
696  // Now we can emit descriptions of where the canonical frame address was
697  // throughout the process. If we have a frame pointer, it takes over the job
698  // half-way through, so only the first few .cfi_def_cfa_offset instructions
699  // actually get emitted.
700  DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
701 
702  if (STI.isTargetELF() && hasFP(MF))
704  AFI->getFramePtrSpillOffset());
705 
706  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
707  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
708  AFI->setDPRCalleeSavedGapSize(DPRGapSize);
709  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
710 
711  // If we need dynamic stack realignment, do it here. Be paranoid and make
712  // sure if we also have VLAs, we have a base pointer for frame access.
713  // If aligned NEON registers were spilled, the stack has already been
714  // realigned.
715  if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) {
716  unsigned MaxAlign = MFI.getMaxAlignment();
717  assert(!AFI->isThumb1OnlyFunction());
718  if (!AFI->isThumbFunction()) {
719  emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
720  false);
721  } else {
722  // We cannot use sp as source/dest register here, thus we're using r4 to
723  // perform the calculations. We're emitting the following sequence:
724  // mov r4, sp
725  // -- use emitAligningInstructions to produce best sequence to zero
726  // -- out lower bits in r4
727  // mov sp, r4
728  // FIXME: It will be better just to find spare register here.
729  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
730  .addReg(ARM::SP, RegState::Kill)
731  .add(predOps(ARMCC::AL));
732  emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
733  false);
734  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
735  .addReg(ARM::R4, RegState::Kill)
736  .add(predOps(ARMCC::AL));
737  }
738 
739  AFI->setShouldRestoreSPFromFP(true);
740  }
741 
742  // If we need a base pointer, set it up here. It's whatever the value
743  // of the stack pointer is at this point. Any variable size objects
744  // will be allocated after this, so we can still use the base pointer
745  // to reference locals.
746  // FIXME: Clarify FrameSetup flags here.
747  if (RegInfo->hasBasePointer(MF)) {
748  if (isARM)
749  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
750  .addReg(ARM::SP)
752  .add(condCodeOp());
753  else
754  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
755  .addReg(ARM::SP)
756  .add(predOps(ARMCC::AL));
757  }
758 
759  // If the frame has variable sized objects then the epilogue must restore
760  // the sp from fp. We can assume there's an FP here since hasFP already
761  // checks for hasVarSizedObjects.
762  if (MFI.hasVarSizedObjects())
763  AFI->setShouldRestoreSPFromFP(true);
764 }
765 
767  MachineBasicBlock &MBB) const {
768  MachineFrameInfo &MFI = MF.getFrameInfo();
770  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
771  const ARMBaseInstrInfo &TII =
772  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
773  assert(!AFI->isThumb1OnlyFunction() &&
774  "This emitEpilogue does not support Thumb1!");
775  bool isARM = !AFI->isThumbFunction();
776 
777  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
778  int NumBytes = (int)MFI.getStackSize();
779  unsigned FramePtr = RegInfo->getFrameRegister(MF);
780 
781  // All calls are tail calls in GHC calling conv, and functions have no
782  // prologue/epilogue.
784  return;
785 
786  // First put ourselves on the first (from top) terminator instructions.
788  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
789 
790  if (!AFI->hasStackFrame()) {
791  if (NumBytes - ArgRegsSaveSize != 0)
792  emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize);
793  } else {
794  // Unwind MBBI to point to first LDR / VLDRD.
795  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
796  if (MBBI != MBB.begin()) {
797  do {
798  --MBBI;
799  } while (MBBI != MBB.begin() && isCSRestore(*MBBI, TII, CSRegs));
800  if (!isCSRestore(*MBBI, TII, CSRegs))
801  ++MBBI;
802  }
803 
804  // Move SP to start of FP callee save spill area.
805  NumBytes -= (ArgRegsSaveSize +
808  AFI->getDPRCalleeSavedGapSize() +
810 
811  // Reset SP based on frame pointer only if the stack frame extends beyond
812  // frame pointer stack slot or target is ELF and the function has FP.
813  if (AFI->shouldRestoreSPFromFP()) {
814  NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
815  if (NumBytes) {
816  if (isARM)
817  emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
818  ARMCC::AL, 0, TII);
819  else {
820  // It's not possible to restore SP from FP in a single instruction.
821  // For iOS, this looks like:
822  // mov sp, r7
823  // sub sp, #24
824  // This is bad, if an interrupt is taken after the mov, sp is in an
825  // inconsistent state.
826  // Use the first callee-saved register as a scratch register.
827  assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
828  "No scratch register to restore SP from FP!");
829  emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
830  ARMCC::AL, 0, TII);
831  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
832  .addReg(ARM::R4)
833  .add(predOps(ARMCC::AL));
834  }
835  } else {
836  // Thumb2 or ARM.
837  if (isARM)
838  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
839  .addReg(FramePtr)
841  .add(condCodeOp());
842  else
843  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
844  .addReg(FramePtr)
845  .add(predOps(ARMCC::AL));
846  }
847  } else if (NumBytes &&
848  !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
849  emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
850 
851  // Increment past our save areas.
852  if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
853  MBBI++;
854  // Since vpop register list cannot have gaps, there may be multiple vpop
855  // instructions in the epilogue.
856  while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
857  MBBI++;
858  }
859  if (AFI->getDPRCalleeSavedGapSize()) {
860  assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
861  "unexpected DPR alignment gap");
862  emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize());
863  }
864 
865  if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
866  if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
867  }
868 
869  if (ArgRegsSaveSize)
870  emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
871 }
872 
873 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
874 /// debug info. It's the same as what we use for resolving the code-gen
875 /// references for now. FIXME: This can go wrong when references are
876 /// SP-relative and simple call frames aren't used.
877 int
879  unsigned &FrameReg) const {
880  return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
881 }
882 
883 int
885  int FI, unsigned &FrameReg,
886  int SPAdj) const {
887  const MachineFrameInfo &MFI = MF.getFrameInfo();
888  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
890  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
891  int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
892  int FPOffset = Offset - AFI->getFramePtrSpillOffset();
893  bool isFixed = MFI.isFixedObjectIndex(FI);
894 
895  FrameReg = ARM::SP;
896  Offset += SPAdj;
897 
898  // SP can move around if there are allocas. We may also lose track of SP
899  // when emergency spilling inside a non-reserved call frame setup.
900  bool hasMovingSP = !hasReservedCallFrame(MF);
901 
902  // When dynamically realigning the stack, use the frame pointer for
903  // parameters, and the stack/base pointer for locals.
904  if (RegInfo->needsStackRealignment(MF)) {
905  assert(hasFP(MF) && "dynamic stack realignment without a FP!");
906  if (isFixed) {
907  FrameReg = RegInfo->getFrameRegister(MF);
908  Offset = FPOffset;
909  } else if (hasMovingSP) {
910  assert(RegInfo->hasBasePointer(MF) &&
911  "VLAs and dynamic stack alignment, but missing base pointer!");
912  FrameReg = RegInfo->getBaseRegister();
913  Offset -= SPAdj;
914  }
915  return Offset;
916  }
917 
918  // If there is a frame pointer, use it when we can.
919  if (hasFP(MF) && AFI->hasStackFrame()) {
920  // Use frame pointer to reference fixed objects. Use it for locals if
921  // there are VLAs (and thus the SP isn't reliable as a base).
922  if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
923  FrameReg = RegInfo->getFrameRegister(MF);
924  return FPOffset;
925  } else if (hasMovingSP) {
926  assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
927  if (AFI->isThumb2Function()) {
928  // Try to use the frame pointer if we can, else use the base pointer
929  // since it's available. This is handy for the emergency spill slot, in
930  // particular.
931  if (FPOffset >= -255 && FPOffset < 0) {
932  FrameReg = RegInfo->getFrameRegister(MF);
933  return FPOffset;
934  }
935  }
936  } else if (AFI->isThumbFunction()) {
937  // Prefer SP to base pointer, if the offset is suitably aligned and in
938  // range as the effective range of the immediate offset is bigger when
939  // basing off SP.
940  // Use add <rd>, sp, #<imm8>
941  // ldr <rd>, [sp, #<imm8>]
942  if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
943  return Offset;
944  // In Thumb2 mode, the negative offset is very limited. Try to avoid
945  // out of range references. ldr <rt>,[<rn>, #-<imm8>]
946  if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
947  FrameReg = RegInfo->getFrameRegister(MF);
948  return FPOffset;
949  }
950  } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
951  // Otherwise, use SP or FP, whichever is closer to the stack slot.
952  FrameReg = RegInfo->getFrameRegister(MF);
953  return FPOffset;
954  }
955  }
956  // Use the base pointer if we have one.
957  if (RegInfo->hasBasePointer(MF))
958  FrameReg = RegInfo->getBaseRegister();
959  return Offset;
960 }
961 
962 void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
964  const std::vector<CalleeSavedInfo> &CSI,
965  unsigned StmOpc, unsigned StrOpc,
966  bool NoGap,
967  bool(*Func)(unsigned, bool),
968  unsigned NumAlignedDPRCS2Regs,
969  unsigned MIFlags) const {
970  MachineFunction &MF = *MBB.getParent();
971  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
973 
974  DebugLoc DL;
975 
976  using RegAndKill = std::pair<unsigned, bool>;
977 
979  unsigned i = CSI.size();
980  while (i != 0) {
981  unsigned LastReg = 0;
982  for (; i != 0; --i) {
983  unsigned Reg = CSI[i-1].getReg();
984  if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
985 
986  // D-registers in the aligned area DPRCS2 are NOT spilled here.
987  if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
988  continue;
989 
990  const MachineRegisterInfo &MRI = MF.getRegInfo();
991  bool isLiveIn = MRI.isLiveIn(Reg);
992  if (!isLiveIn && !MRI.isReserved(Reg))
993  MBB.addLiveIn(Reg);
994  // If NoGap is true, push consecutive registers and then leave the rest
995  // for other instructions. e.g.
996  // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
997  if (NoGap && LastReg && LastReg != Reg-1)
998  break;
999  LastReg = Reg;
1000  // Do not set a kill flag on values that are also marked as live-in. This
1001  // happens with the @llvm-returnaddress intrinsic and with arguments
1002  // passed in callee saved registers.
1003  // Omitting the kill flags is conservatively correct even if the live-in
1004  // is not used after all.
1005  Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn));
1006  }
1007 
1008  if (Regs.empty())
1009  continue;
1010 
1011  llvm::sort(Regs, [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1012  return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
1013  });
1014 
1015  if (Regs.size() > 1 || StrOpc== 0) {
1016  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
1017  .addReg(ARM::SP)
1018  .setMIFlags(MIFlags)
1019  .add(predOps(ARMCC::AL));
1020  for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1021  MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
1022  } else if (Regs.size() == 1) {
1023  BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
1024  .addReg(Regs[0].first, getKillRegState(Regs[0].second))
1025  .addReg(ARM::SP)
1026  .setMIFlags(MIFlags)
1027  .addImm(-4)
1028  .add(predOps(ARMCC::AL));
1029  }
1030  Regs.clear();
1031 
1032  // Put any subsequent vpush instructions before this one: they will refer to
1033  // higher register numbers so need to be pushed first in order to preserve
1034  // monotonicity.
1035  if (MI != MBB.begin())
1036  --MI;
1037  }
1038 }
1039 
1040 void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1042  std::vector<CalleeSavedInfo> &CSI,
1043  unsigned LdmOpc, unsigned LdrOpc,
1044  bool isVarArg, bool NoGap,
1045  bool(*Func)(unsigned, bool),
1046  unsigned NumAlignedDPRCS2Regs) const {
1047  MachineFunction &MF = *MBB.getParent();
1048  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1051  DebugLoc DL;
1052  bool isTailCall = false;
1053  bool isInterrupt = false;
1054  bool isTrap = false;
1055  if (MBB.end() != MI) {
1056  DL = MI->getDebugLoc();
1057  unsigned RetOpcode = MI->getOpcode();
1058  isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri);
1059  isInterrupt =
1060  RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1061  isTrap =
1062  RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
1063  RetOpcode == ARM::tTRAP;
1064  }
1065 
1067  unsigned i = CSI.size();
1068  while (i != 0) {
1069  unsigned LastReg = 0;
1070  bool DeleteRet = false;
1071  for (; i != 0; --i) {
1072  CalleeSavedInfo &Info = CSI[i-1];
1073  unsigned Reg = Info.getReg();
1074  if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1075 
1076  // The aligned reloads from area DPRCS2 are not inserted here.
1077  if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1078  continue;
1079 
1080  if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1081  !isTrap && STI.hasV5TOps()) {
1082  if (MBB.succ_empty()) {
1083  Reg = ARM::PC;
1084  // Fold the return instruction into the LDM.
1085  DeleteRet = true;
1086  LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1087  // We 'restore' LR into PC so it is not live out of the return block:
1088  // Clear Restored bit.
1089  Info.setRestored(false);
1090  } else
1091  LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
1092  }
1093 
1094  // If NoGap is true, pop consecutive registers and then leave the rest
1095  // for other instructions. e.g.
1096  // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1097  if (NoGap && LastReg && LastReg != Reg-1)
1098  break;
1099 
1100  LastReg = Reg;
1101  Regs.push_back(Reg);
1102  }
1103 
1104  if (Regs.empty())
1105  continue;
1106 
1107  llvm::sort(Regs, [&](unsigned LHS, unsigned RHS) {
1108  return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
1109  });
1110 
1111  if (Regs.size() > 1 || LdrOpc == 0) {
1112  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
1113  .addReg(ARM::SP)
1114  .add(predOps(ARMCC::AL));
1115  for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1116  MIB.addReg(Regs[i], getDefRegState(true));
1117  if (DeleteRet) {
1118  if (MI != MBB.end()) {
1119  MIB.copyImplicitOps(*MI);
1120  MI->eraseFromParent();
1121  }
1122  }
1123  MI = MIB;
1124  } else if (Regs.size() == 1) {
1125  // If we adjusted the reg to PC from LR above, switch it back here. We
1126  // only do that for LDM.
1127  if (Regs[0] == ARM::PC)
1128  Regs[0] = ARM::LR;
1129  MachineInstrBuilder MIB =
1130  BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
1131  .addReg(ARM::SP, RegState::Define)
1132  .addReg(ARM::SP);
1133  // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1134  // that refactoring is complete (eventually).
1135  if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1136  MIB.addReg(0);
1138  } else
1139  MIB.addImm(4);
1140  MIB.add(predOps(ARMCC::AL));
1141  }
1142  Regs.clear();
1143 
1144  // Put any subsequent vpop instructions after this one: they will refer to
1145  // higher register numbers so need to be popped afterwards.
1146  if (MI != MBB.end())
1147  ++MI;
1148  }
1149 }
1150 
1151 /// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1152 /// starting from d8. Also insert stack realignment code and leave the stack
1153 /// pointer pointing to the d8 spill slot.
1156  unsigned NumAlignedDPRCS2Regs,
1157  const std::vector<CalleeSavedInfo> &CSI,
1158  const TargetRegisterInfo *TRI) {
1159  MachineFunction &MF = *MBB.getParent();
1161  DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1162  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1163  MachineFrameInfo &MFI = MF.getFrameInfo();
1164 
1165  // Mark the D-register spill slots as properly aligned. Since MFI computes
1166  // stack slot layout backwards, this can actually mean that the d-reg stack
1167  // slot offsets can be wrong. The offset for d8 will always be correct.
1168  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1169  unsigned DNum = CSI[i].getReg() - ARM::D8;
1170  if (DNum > NumAlignedDPRCS2Regs - 1)
1171  continue;
1172  int FI = CSI[i].getFrameIdx();
1173  // The even-numbered registers will be 16-byte aligned, the odd-numbered
1174  // registers will be 8-byte aligned.
1175  MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16);
1176 
1177  // The stack slot for D8 needs to be maximally aligned because this is
1178  // actually the point where we align the stack pointer. MachineFrameInfo
1179  // computes all offsets relative to the incoming stack pointer which is a
1180  // bit weird when realigning the stack. Any extra padding for this
1181  // over-alignment is not realized because the code inserted below adjusts
1182  // the stack pointer by numregs * 8 before aligning the stack pointer.
1183  if (DNum == 0)
1184  MFI.setObjectAlignment(FI, MFI.getMaxAlignment());
1185  }
1186 
1187  // Move the stack pointer to the d8 spill slot, and align it at the same
1188  // time. Leave the stack slot address in the scratch register r4.
1189  //
1190  // sub r4, sp, #numregs * 8
1191  // bic r4, r4, #align - 1
1192  // mov sp, r4
1193  //
1194  bool isThumb = AFI->isThumbFunction();
1195  assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1196  AFI->setShouldRestoreSPFromFP(true);
1197 
1198  // sub r4, sp, #numregs * 8
1199  // The immediate is <= 64, so it doesn't need any special encoding.
1200  unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1201  BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1202  .addReg(ARM::SP)
1203  .addImm(8 * NumAlignedDPRCS2Regs)
1204  .add(predOps(ARMCC::AL))
1205  .add(condCodeOp());
1206 
1207  unsigned MaxAlign = MF.getFrameInfo().getMaxAlignment();
1208  // We must set parameter MustBeSingleInstruction to true, since
1209  // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1210  // stack alignment. Luckily, this can always be done since all ARM
1211  // architecture versions that support Neon also support the BFC
1212  // instruction.
1213  emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
1214 
1215  // mov sp, r4
1216  // The stack pointer must be adjusted before spilling anything, otherwise
1217  // the stack slots could be clobbered by an interrupt handler.
1218  // Leave r4 live, it is used below.
1219  Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
1220  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
1221  .addReg(ARM::R4)
1222  .add(predOps(ARMCC::AL));
1223  if (!isThumb)
1224  MIB.add(condCodeOp());
1225 
1226  // Now spill NumAlignedDPRCS2Regs registers starting from d8.
1227  // r4 holds the stack slot address.
1228  unsigned NextReg = ARM::D8;
1229 
1230  // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
1231  // The writeback is only needed when emitting two vst1.64 instructions.
1232  if (NumAlignedDPRCS2Regs >= 6) {
1233  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1234  &ARM::QQPRRegClass);
1235  MBB.addLiveIn(SupReg);
1236  BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
1237  .addReg(ARM::R4, RegState::Kill)
1238  .addImm(16)
1239  .addReg(NextReg)
1240  .addReg(SupReg, RegState::ImplicitKill)
1241  .add(predOps(ARMCC::AL));
1242  NextReg += 4;
1243  NumAlignedDPRCS2Regs -= 4;
1244  }
1245 
1246  // We won't modify r4 beyond this point. It currently points to the next
1247  // register to be spilled.
1248  unsigned R4BaseReg = NextReg;
1249 
1250  // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
1251  if (NumAlignedDPRCS2Regs >= 4) {
1252  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1253  &ARM::QQPRRegClass);
1254  MBB.addLiveIn(SupReg);
1255  BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
1256  .addReg(ARM::R4)
1257  .addImm(16)
1258  .addReg(NextReg)
1259  .addReg(SupReg, RegState::ImplicitKill)
1260  .add(predOps(ARMCC::AL));
1261  NextReg += 4;
1262  NumAlignedDPRCS2Regs -= 4;
1263  }
1264 
1265  // 16-byte aligned vst1.64 with 2 d-regs.
1266  if (NumAlignedDPRCS2Regs >= 2) {
1267  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1268  &ARM::QPRRegClass);
1269  MBB.addLiveIn(SupReg);
1270  BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
1271  .addReg(ARM::R4)
1272  .addImm(16)
1273  .addReg(SupReg)
1274  .add(predOps(ARMCC::AL));
1275  NextReg += 2;
1276  NumAlignedDPRCS2Regs -= 2;
1277  }
1278 
1279  // Finally, use a vanilla vstr.64 for the odd last register.
1280  if (NumAlignedDPRCS2Regs) {
1281  MBB.addLiveIn(NextReg);
1282  // vstr.64 uses addrmode5 which has an offset scale of 4.
1283  BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
1284  .addReg(NextReg)
1285  .addReg(ARM::R4)
1286  .addImm((NextReg - R4BaseReg) * 2)
1287  .add(predOps(ARMCC::AL));
1288  }
1289 
1290  // The last spill instruction inserted should kill the scratch register r4.
1291  std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1292 }
1293 
1294 /// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
1295 /// iterator to the following instruction.
1298  unsigned NumAlignedDPRCS2Regs) {
1299  // sub r4, sp, #numregs * 8
1300  // bic r4, r4, #align - 1
1301  // mov sp, r4
1302  ++MI; ++MI; ++MI;
1303  assert(MI->mayStore() && "Expecting spill instruction");
1304 
1305  // These switches all fall through.
1306  switch(NumAlignedDPRCS2Regs) {
1307  case 7:
1308  ++MI;
1309  assert(MI->mayStore() && "Expecting spill instruction");
1311  default:
1312  ++MI;
1313  assert(MI->mayStore() && "Expecting spill instruction");
1315  case 1:
1316  case 2:
1317  case 4:
1318  assert(MI->killsRegister(ARM::R4) && "Missed kill flag");
1319  ++MI;
1320  }
1321  return MI;
1322 }
1323 
1324 /// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
1325 /// starting from d8. These instructions are assumed to execute while the
1326 /// stack is still aligned, unlike the code inserted by emitPopInst.
1329  unsigned NumAlignedDPRCS2Regs,
1330  const std::vector<CalleeSavedInfo> &CSI,
1331  const TargetRegisterInfo *TRI) {
1332  MachineFunction &MF = *MBB.getParent();
1334  DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1335  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1336 
1337  // Find the frame index assigned to d8.
1338  int D8SpillFI = 0;
1339  for (unsigned i = 0, e = CSI.size(); i != e; ++i)
1340  if (CSI[i].getReg() == ARM::D8) {
1341  D8SpillFI = CSI[i].getFrameIdx();
1342  break;
1343  }
1344 
1345  // Materialize the address of the d8 spill slot into the scratch register r4.
1346  // This can be fairly complicated if the stack frame is large, so just use
1347  // the normal frame index elimination mechanism to do it. This code runs as
1348  // the initial part of the epilog where the stack and base pointers haven't
1349  // been changed yet.
1350  bool isThumb = AFI->isThumbFunction();
1351  assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1352 
1353  unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
1354  BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1355  .addFrameIndex(D8SpillFI)
1356  .addImm(0)
1357  .add(predOps(ARMCC::AL))
1358  .add(condCodeOp());
1359 
1360  // Now restore NumAlignedDPRCS2Regs registers starting from d8.
1361  unsigned NextReg = ARM::D8;
1362 
1363  // 16-byte aligned vld1.64 with 4 d-regs and writeback.
1364  if (NumAlignedDPRCS2Regs >= 6) {
1365  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1366  &ARM::QQPRRegClass);
1367  BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
1368  .addReg(ARM::R4, RegState::Define)
1370  .addImm(16)
1372  .add(predOps(ARMCC::AL));
1373  NextReg += 4;
1374  NumAlignedDPRCS2Regs -= 4;
1375  }
1376 
1377  // We won't modify r4 beyond this point. It currently points to the next
1378  // register to be spilled.
1379  unsigned R4BaseReg = NextReg;
1380 
1381  // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
1382  if (NumAlignedDPRCS2Regs >= 4) {
1383  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1384  &ARM::QQPRRegClass);
1385  BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
1386  .addReg(ARM::R4)
1387  .addImm(16)
1389  .add(predOps(ARMCC::AL));
1390  NextReg += 4;
1391  NumAlignedDPRCS2Regs -= 4;
1392  }
1393 
1394  // 16-byte aligned vld1.64 with 2 d-regs.
1395  if (NumAlignedDPRCS2Regs >= 2) {
1396  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1397  &ARM::QPRRegClass);
1398  BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
1399  .addReg(ARM::R4)
1400  .addImm(16)
1401  .add(predOps(ARMCC::AL));
1402  NextReg += 2;
1403  NumAlignedDPRCS2Regs -= 2;
1404  }
1405 
1406  // Finally, use a vanilla vldr.64 for the remaining odd register.
1407  if (NumAlignedDPRCS2Regs)
1408  BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
1409  .addReg(ARM::R4)
1410  .addImm(2 * (NextReg - R4BaseReg))
1411  .add(predOps(ARMCC::AL));
1412 
1413  // Last store kills r4.
1414  std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1415 }
1416 
1419  const std::vector<CalleeSavedInfo> &CSI,
1420  const TargetRegisterInfo *TRI) const {
1421  if (CSI.empty())
1422  return false;
1423 
1424  MachineFunction &MF = *MBB.getParent();
1426 
1427  unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
1428  unsigned PushOneOpc = AFI->isThumbFunction() ?
1429  ARM::t2STR_PRE : ARM::STR_PRE_IMM;
1430  unsigned FltOpc = ARM::VSTMDDB_UPD;
1431  unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1432  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,
1434  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,
1436  emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
1437  NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
1438 
1439  // The code above does not insert spill code for the aligned DPRCS2 registers.
1440  // The stack realignment code will be inserted between the push instructions
1441  // and these spills.
1442  if (NumAlignedDPRCS2Regs)
1443  emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
1444 
1445  return true;
1446 }
1447 
1450  std::vector<CalleeSavedInfo> &CSI,
1451  const TargetRegisterInfo *TRI) const {
1452  if (CSI.empty())
1453  return false;
1454 
1455  MachineFunction &MF = *MBB.getParent();
1457  bool isVarArg = AFI->getArgRegsSaveSize() > 0;
1458  unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1459 
1460  // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
1461  // registers. Do that here instead.
1462  if (NumAlignedDPRCS2Regs)
1463  emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
1464 
1465  unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
1466  unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
1467  unsigned FltOpc = ARM::VLDMDIA_UPD;
1468  emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
1469  NumAlignedDPRCS2Regs);
1470  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1471  &isARMArea2Register, 0);
1472  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1473  &isARMArea1Register, 0);
1474 
1475  return true;
1476 }
1477 
1478 // FIXME: Make generic?
1480  const ARMBaseInstrInfo &TII) {
1481  unsigned FnSize = 0;
1482  for (auto &MBB : MF) {
1483  for (auto &MI : MBB)
1484  FnSize += TII.getInstSizeInBytes(MI);
1485  }
1486  if (MF.getJumpTableInfo())
1487  for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
1488  FnSize += Table.MBBs.size() * 4;
1489  FnSize += MF.getConstantPool()->getConstants().size() * 4;
1490  return FnSize;
1491 }
1492 
1493 /// estimateRSStackSizeLimit - Look at each instruction that references stack
1494 /// frames and return the stack size limit beyond which some of these
1495 /// instructions will require a scratch register during their expansion later.
1496 // FIXME: Move to TII?
1498  const TargetFrameLowering *TFI) {
1499  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1500  unsigned Limit = (1 << 12) - 1;
1501  for (auto &MBB : MF) {
1502  for (auto &MI : MBB) {
1503  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1504  if (!MI.getOperand(i).isFI())
1505  continue;
1506 
1507  // When using ADDri to get the address of a stack object, 255 is the
1508  // largest offset guaranteed to fit in the immediate offset.
1509  if (MI.getOpcode() == ARM::ADDri) {
1510  Limit = std::min(Limit, (1U << 8) - 1);
1511  break;
1512  }
1513 
1514  // Otherwise check the addressing mode.
1515  switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
1516  case ARMII::AddrMode3:
1517  case ARMII::AddrModeT2_i8:
1518  Limit = std::min(Limit, (1U << 8) - 1);
1519  break;
1520  case ARMII::AddrMode5:
1523  Limit = std::min(Limit, ((1U << 8) - 1) * 4);
1524  break;
1525  case ARMII::AddrModeT2_i12:
1526  // i12 supports only positive offset so these will be converted to
1527  // i8 opcodes. See llvm::rewriteT2FrameIndex.
1528  if (TFI->hasFP(MF) && AFI->hasStackFrame())
1529  Limit = std::min(Limit, (1U << 8) - 1);
1530  break;
1531  case ARMII::AddrMode4:
1532  case ARMII::AddrMode6:
1533  // Addressing modes 4 & 6 (load/store) instructions can't encode an
1534  // immediate offset for stack references.
1535  return 0;
1536  default:
1537  break;
1538  }
1539  break; // At most one FI per instruction
1540  }
1541  }
1542  }
1543 
1544  return Limit;
1545 }
1546 
1547 // In functions that realign the stack, it can be an advantage to spill the
1548 // callee-saved vector registers after realigning the stack. The vst1 and vld1
1549 // instructions take alignment hints that can improve performance.
1550 static void
1552  MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
1553  if (!SpillAlignedNEONRegs)
1554  return;
1555 
1556  // Naked functions don't spill callee-saved registers.
1557  if (MF.getFunction().hasFnAttribute(Attribute::Naked))
1558  return;
1559 
1560  // We are planning to use NEON instructions vst1 / vld1.
1561  if (!static_cast<const ARMSubtarget &>(MF.getSubtarget()).hasNEON())
1562  return;
1563 
1564  // Don't bother if the default stack alignment is sufficiently high.
1565  if (MF.getSubtarget().getFrameLowering()->getStackAlignment() >= 8)
1566  return;
1567 
1568  // Aligned spills require stack realignment.
1569  if (!static_cast<const ARMBaseRegisterInfo *>(
1570  MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))
1571  return;
1572 
1573  // We always spill contiguous d-registers starting from d8. Count how many
1574  // needs spilling. The register allocator will almost always use the
1575  // callee-saved registers in order, but it can happen that there are holes in
1576  // the range. Registers above the hole will be spilled to the standard DPRCS
1577  // area.
1578  unsigned NumSpills = 0;
1579  for (; NumSpills < 8; ++NumSpills)
1580  if (!SavedRegs.test(ARM::D8 + NumSpills))
1581  break;
1582 
1583  // Don't do this for just one d-register. It's not worth it.
1584  if (NumSpills < 2)
1585  return;
1586 
1587  // Spill the first NumSpills D-registers after realigning the stack.
1588  MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
1589 
1590  // A scratch register is required for the vst1 / vld1 instructions.
1591  SavedRegs.set(ARM::R4);
1592 }
1593 
1595  BitVector &SavedRegs,
1596  RegScavenger *RS) const {
1597  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1598  // This tells PEI to spill the FP as if it is any other callee-save register
1599  // to take advantage the eliminateFrameIndex machinery. This also ensures it
1600  // is spilled in the order specified by getCalleeSavedRegs() to make it easier
1601  // to combine multiple loads / stores.
1602  bool CanEliminateFrame = true;
1603  bool CS1Spilled = false;
1604  bool LRSpilled = false;
1605  unsigned NumGPRSpills = 0;
1606  unsigned NumFPRSpills = 0;
1607  SmallVector<unsigned, 4> UnspilledCS1GPRs;
1608  SmallVector<unsigned, 4> UnspilledCS2GPRs;
1609  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1610  MF.getSubtarget().getRegisterInfo());
1611  const ARMBaseInstrInfo &TII =
1612  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1614  MachineFrameInfo &MFI = MF.getFrameInfo();
1617  (void)TRI; // Silence unused warning in non-assert builds.
1618  unsigned FramePtr = RegInfo->getFrameRegister(MF);
1619 
1620  // Spill R4 if Thumb2 function requires stack realignment - it will be used as
1621  // scratch register. Also spill R4 if Thumb2 function has varsized objects,
1622  // since it's not always possible to restore sp from fp in a single
1623  // instruction.
1624  // FIXME: It will be better just to find spare register here.
1625  if (AFI->isThumb2Function() &&
1626  (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
1627  SavedRegs.set(ARM::R4);
1628 
1629  // If a stack probe will be emitted, spill R4 and LR, since they are
1630  // clobbered by the stack probe call.
1631  // This estimate should be a safe, conservative estimate. The actual
1632  // stack probe is enabled based on the size of the local objects;
1633  // this estimate also includes the varargs store size.
1634  if (STI.isTargetWindows() &&
1635  WindowsRequiresStackProbe(MF, MFI.estimateStackSize(MF))) {
1636  SavedRegs.set(ARM::R4);
1637  SavedRegs.set(ARM::LR);
1638  }
1639 
1640  if (AFI->isThumb1OnlyFunction()) {
1641  // Spill LR if Thumb1 function uses variable length argument lists.
1642  if (AFI->getArgRegsSaveSize() > 0)
1643  SavedRegs.set(ARM::LR);
1644 
1645  // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
1646  // requires stack alignment. We don't know for sure what the stack size
1647  // will be, but for this, an estimate is good enough. If there anything
1648  // changes it, it'll be a spill, which implies we've used all the registers
1649  // and so R4 is already used, so not marking it here will be OK.
1650  // FIXME: It will be better just to find spare register here.
1651  if (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF) ||
1652  MFI.estimateStackSize(MF) > 508)
1653  SavedRegs.set(ARM::R4);
1654  }
1655 
1656  // See if we can spill vector registers to aligned stack.
1657  checkNumAlignedDPRCS2Regs(MF, SavedRegs);
1658 
1659  // Spill the BasePtr if it's used.
1660  if (RegInfo->hasBasePointer(MF))
1661  SavedRegs.set(RegInfo->getBaseRegister());
1662 
1663  // Don't spill FP if the frame can be eliminated. This is determined
1664  // by scanning the callee-save registers to see if any is modified.
1665  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
1666  for (unsigned i = 0; CSRegs[i]; ++i) {
1667  unsigned Reg = CSRegs[i];
1668  bool Spilled = false;
1669  if (SavedRegs.test(Reg)) {
1670  Spilled = true;
1671  CanEliminateFrame = false;
1672  }
1673 
1674  if (!ARM::GPRRegClass.contains(Reg)) {
1675  if (Spilled) {
1676  if (ARM::SPRRegClass.contains(Reg))
1677  NumFPRSpills++;
1678  else if (ARM::DPRRegClass.contains(Reg))
1679  NumFPRSpills += 2;
1680  else if (ARM::QPRRegClass.contains(Reg))
1681  NumFPRSpills += 4;
1682  }
1683  continue;
1684  }
1685 
1686  if (Spilled) {
1687  NumGPRSpills++;
1688 
1689  if (!STI.splitFramePushPop(MF)) {
1690  if (Reg == ARM::LR)
1691  LRSpilled = true;
1692  CS1Spilled = true;
1693  continue;
1694  }
1695 
1696  // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
1697  switch (Reg) {
1698  case ARM::LR:
1699  LRSpilled = true;
1701  case ARM::R0: case ARM::R1:
1702  case ARM::R2: case ARM::R3:
1703  case ARM::R4: case ARM::R5:
1704  case ARM::R6: case ARM::R7:
1705  CS1Spilled = true;
1706  break;
1707  default:
1708  break;
1709  }
1710  } else {
1711  if (!STI.splitFramePushPop(MF)) {
1712  UnspilledCS1GPRs.push_back(Reg);
1713  continue;
1714  }
1715 
1716  switch (Reg) {
1717  case ARM::R0: case ARM::R1:
1718  case ARM::R2: case ARM::R3:
1719  case ARM::R4: case ARM::R5:
1720  case ARM::R6: case ARM::R7:
1721  case ARM::LR:
1722  UnspilledCS1GPRs.push_back(Reg);
1723  break;
1724  default:
1725  UnspilledCS2GPRs.push_back(Reg);
1726  break;
1727  }
1728  }
1729  }
1730 
1731  bool ForceLRSpill = false;
1732  if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
1733  unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
1734  // Force LR to be spilled if the Thumb function size is > 2048. This enables
1735  // use of BL to implement far jump. If it turns out that it's not needed
1736  // then the branch fix up path will undo it.
1737  if (FnSize >= (1 << 11)) {
1738  CanEliminateFrame = false;
1739  ForceLRSpill = true;
1740  }
1741  }
1742 
1743  // If any of the stack slot references may be out of range of an immediate
1744  // offset, make sure a register (or a spill slot) is available for the
1745  // register scavenger. Note that if we're indexing off the frame pointer, the
1746  // effective stack size is 4 bytes larger since the FP points to the stack
1747  // slot of the previous FP. Also, if we have variable sized objects in the
1748  // function, stack slot references will often be negative, and some of
1749  // our instructions are positive-offset only, so conservatively consider
1750  // that case to want a spill slot (or register) as well. Similarly, if
1751  // the function adjusts the stack pointer during execution and the
1752  // adjustments aren't already part of our stack size estimate, our offset
1753  // calculations may be off, so be conservative.
1754  // FIXME: We could add logic to be more precise about negative offsets
1755  // and which instructions will need a scratch register for them. Is it
1756  // worth the effort and added fragility?
1757  unsigned EstimatedStackSize =
1758  MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
1759 
1760  // Determine biggest (positive) SP offset in MachineFrameInfo.
1761  int MaxFixedOffset = 0;
1762  for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
1763  int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
1764  MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
1765  }
1766 
1767  bool HasFP = hasFP(MF);
1768  if (HasFP) {
1769  if (AFI->hasStackFrame())
1770  EstimatedStackSize += 4;
1771  } else {
1772  // If FP is not used, SP will be used to access arguments, so count the
1773  // size of arguments into the estimation.
1774  EstimatedStackSize += MaxFixedOffset;
1775  }
1776  EstimatedStackSize += 16; // For possible paddings.
1777 
1778  unsigned EstimatedRSStackSizeLimit = estimateRSStackSizeLimit(MF, this);
1779  int MaxFPOffset = getMaxFPOffset(MF.getFunction(), *AFI);
1780  bool BigFrameOffsets = EstimatedStackSize >= EstimatedRSStackSizeLimit ||
1781  MFI.hasVarSizedObjects() ||
1782  (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF)) ||
1783  // For large argument stacks fp relative addressed may overflow.
1784  (HasFP && (MaxFixedOffset - MaxFPOffset) >= (int)EstimatedRSStackSizeLimit);
1785  if (BigFrameOffsets ||
1786  !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
1787  AFI->setHasStackFrame(true);
1788 
1789  if (HasFP) {
1790  SavedRegs.set(FramePtr);
1791  // If the frame pointer is required by the ABI, also spill LR so that we
1792  // emit a complete frame record.
1793  if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) {
1794  SavedRegs.set(ARM::LR);
1795  LRSpilled = true;
1796  NumGPRSpills++;
1797  auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
1798  if (LRPos != UnspilledCS1GPRs.end())
1799  UnspilledCS1GPRs.erase(LRPos);
1800  }
1801  auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);
1802  if (FPPos != UnspilledCS1GPRs.end())
1803  UnspilledCS1GPRs.erase(FPPos);
1804  NumGPRSpills++;
1805  if (FramePtr == ARM::R7)
1806  CS1Spilled = true;
1807  }
1808 
1809  // This is true when we inserted a spill for an unused register that can now
1810  // be used for register scavenging.
1811  bool ExtraCSSpill = false;
1812 
1813  if (AFI->isThumb1OnlyFunction()) {
1814  // For Thumb1-only targets, we need some low registers when we save and
1815  // restore the high registers (which aren't allocatable, but could be
1816  // used by inline assembly) because the push/pop instructions can not
1817  // access high registers. If necessary, we might need to push more low
1818  // registers to ensure that there is at least one free that can be used
1819  // for the saving & restoring, and preferably we should ensure that as
1820  // many as are needed are available so that fewer push/pop instructions
1821  // are required.
1822 
1823  // Low registers which are not currently pushed, but could be (r4-r7).
1824  SmallVector<unsigned, 4> AvailableRegs;
1825 
1826  // Unused argument registers (r0-r3) can be clobbered in the prologue for
1827  // free.
1828  int EntryRegDeficit = 0;
1829  for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
1830  if (!MF.getRegInfo().isLiveIn(Reg)) {
1831  --EntryRegDeficit;
1832  LLVM_DEBUG(dbgs()
1833  << printReg(Reg, TRI)
1834  << " is unused argument register, EntryRegDeficit = "
1835  << EntryRegDeficit << "\n");
1836  }
1837  }
1838 
1839  // Unused return registers can be clobbered in the epilogue for free.
1840  int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
1841  LLVM_DEBUG(dbgs() << AFI->getReturnRegsCount()
1842  << " return regs used, ExitRegDeficit = "
1843  << ExitRegDeficit << "\n");
1844 
1845  int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);
1846  LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
1847 
1848  // r4-r6 can be used in the prologue if they are pushed by the first push
1849  // instruction.
1850  for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
1851  if (SavedRegs.test(Reg)) {
1852  --RegDeficit;
1853  LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
1854  << " is saved low register, RegDeficit = "
1855  << RegDeficit << "\n");
1856  } else {
1857  AvailableRegs.push_back(Reg);
1858  LLVM_DEBUG(
1859  dbgs()
1860  << printReg(Reg, TRI)
1861  << " is non-saved low register, adding to AvailableRegs\n");
1862  }
1863  }
1864 
1865  // r7 can be used if it is not being used as the frame pointer.
1866  if (!HasFP) {
1867  if (SavedRegs.test(ARM::R7)) {
1868  --RegDeficit;
1869  LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
1870  << RegDeficit << "\n");
1871  } else {
1872  AvailableRegs.push_back(ARM::R7);
1873  LLVM_DEBUG(
1874  dbgs()
1875  << "%r7 is non-saved low register, adding to AvailableRegs\n");
1876  }
1877  }
1878 
1879  // Each of r8-r11 needs to be copied to a low register, then pushed.
1880  for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
1881  if (SavedRegs.test(Reg)) {
1882  ++RegDeficit;
1883  LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
1884  << " is saved high register, RegDeficit = "
1885  << RegDeficit << "\n");
1886  }
1887  }
1888 
1889  // LR can only be used by PUSH, not POP, and can't be used at all if the
1890  // llvm.returnaddress intrinsic is used. This is only worth doing if we
1891  // are more limited at function entry than exit.
1892  if ((EntryRegDeficit > ExitRegDeficit) &&
1893  !(MF.getRegInfo().isLiveIn(ARM::LR) &&
1895  if (SavedRegs.test(ARM::LR)) {
1896  --RegDeficit;
1897  LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
1898  << RegDeficit << "\n");
1899  } else {
1900  AvailableRegs.push_back(ARM::LR);
1901  LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
1902  }
1903  }
1904 
1905  // If there are more high registers that need pushing than low registers
1906  // available, push some more low registers so that we can use fewer push
1907  // instructions. This might not reduce RegDeficit all the way to zero,
1908  // because we can only guarantee that r4-r6 are available, but r8-r11 may
1909  // need saving.
1910  LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
1911  for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
1912  unsigned Reg = AvailableRegs.pop_back_val();
1913  LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
1914  << " to make up reg deficit\n");
1915  SavedRegs.set(Reg);
1916  NumGPRSpills++;
1917  CS1Spilled = true;
1918  assert(!MRI.isReserved(Reg) && "Should not be reserved");
1919  if (!MRI.isPhysRegUsed(Reg))
1920  ExtraCSSpill = true;
1921  UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
1922  if (Reg == ARM::LR)
1923  LRSpilled = true;
1924  }
1925  LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
1926  << "\n");
1927  }
1928 
1929  // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
1930  // restore LR in that case.
1931  bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
1932 
1933  // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
1934  // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
1935  if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
1936  SavedRegs.set(ARM::LR);
1937  NumGPRSpills++;
1939  LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
1940  if (LRPos != UnspilledCS1GPRs.end())
1941  UnspilledCS1GPRs.erase(LRPos);
1942 
1943  ForceLRSpill = false;
1944  if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR))
1945  ExtraCSSpill = true;
1946  }
1947 
1948  // If stack and double are 8-byte aligned and we are spilling an odd number
1949  // of GPRs, spill one extra callee save GPR so we won't have to pad between
1950  // the integer and double callee save areas.
1951  LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
1952  unsigned TargetAlign = getStackAlignment();
1953  if (TargetAlign >= 8 && (NumGPRSpills & 1)) {
1954  if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
1955  for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
1956  unsigned Reg = UnspilledCS1GPRs[i];
1957  // Don't spill high register if the function is thumb. In the case of
1958  // Windows on ARM, accept R11 (frame pointer)
1959  if (!AFI->isThumbFunction() ||
1960  (STI.isTargetWindows() && Reg == ARM::R11) ||
1961  isARMLowRegister(Reg) ||
1962  (Reg == ARM::LR && !ExpensiveLRRestore)) {
1963  SavedRegs.set(Reg);
1964  LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
1965  << " to make up alignment\n");
1966  if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
1967  ExtraCSSpill = true;
1968  break;
1969  }
1970  }
1971  } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
1972  unsigned Reg = UnspilledCS2GPRs.front();
1973  SavedRegs.set(Reg);
1974  LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
1975  << " to make up alignment\n");
1976  if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
1977  ExtraCSSpill = true;
1978  }
1979  }
1980 
1981  // Estimate if we might need to scavenge a register at some point in order
1982  // to materialize a stack offset. If so, either spill one additional
1983  // callee-saved register or reserve a special spill slot to facilitate
1984  // register scavenging. Thumb1 needs a spill slot for stack pointer
1985  // adjustments also, even when the frame itself is small.
1986  if (BigFrameOffsets && !ExtraCSSpill) {
1987  // If any non-reserved CS register isn't spilled, just spill one or two
1988  // extra. That should take care of it!
1989  unsigned NumExtras = TargetAlign / 4;
1990  SmallVector<unsigned, 2> Extras;
1991  while (NumExtras && !UnspilledCS1GPRs.empty()) {
1992  unsigned Reg = UnspilledCS1GPRs.back();
1993  UnspilledCS1GPRs.pop_back();
1994  if (!MRI.isReserved(Reg) &&
1995  (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
1996  Reg == ARM::LR)) {
1997  Extras.push_back(Reg);
1998  NumExtras--;
1999  }
2000  }
2001  // For non-Thumb1 functions, also check for hi-reg CS registers
2002  if (!AFI->isThumb1OnlyFunction()) {
2003  while (NumExtras && !UnspilledCS2GPRs.empty()) {
2004  unsigned Reg = UnspilledCS2GPRs.back();
2005  UnspilledCS2GPRs.pop_back();
2006  if (!MRI.isReserved(Reg)) {
2007  Extras.push_back(Reg);
2008  NumExtras--;
2009  }
2010  }
2011  }
2012  if (NumExtras == 0) {
2013  for (unsigned Reg : Extras) {
2014  SavedRegs.set(Reg);
2015  if (!MRI.isPhysRegUsed(Reg))
2016  ExtraCSSpill = true;
2017  }
2018  }
2019  if (!ExtraCSSpill && !AFI->isThumb1OnlyFunction()) {
2020  // note: Thumb1 functions spill to R12, not the stack. Reserve a slot
2021  // closest to SP or frame pointer.
2022  assert(RS && "Register scavenging not provided");
2023  const TargetRegisterClass &RC = ARM::GPRRegClass;
2024  unsigned Size = TRI->getSpillSize(RC);
2025  unsigned Align = TRI->getSpillAlignment(RC);
2026  RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2027  }
2028  }
2029  }
2030 
2031  if (ForceLRSpill) {
2032  SavedRegs.set(ARM::LR);
2033  AFI->setLRIsSpilledForFarJump(true);
2034  }
2035  AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
2036 }
2037 
2038 MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
2041  const ARMBaseInstrInfo &TII =
2042  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2043  if (!hasReservedCallFrame(MF)) {
2044  // If we have alloca, convert as follows:
2045  // ADJCALLSTACKDOWN -> sub, sp, sp, amount
2046  // ADJCALLSTACKUP -> add, sp, sp, amount
2047  MachineInstr &Old = *I;
2048  DebugLoc dl = Old.getDebugLoc();
2049  unsigned Amount = TII.getFrameSize(Old);
2050  if (Amount != 0) {
2051  // We need to keep the stack aligned properly. To do this, we round the
2052  // amount of space needed for the outgoing arguments up to the next
2053  // alignment boundary.
2054  Amount = alignSPAdjust(Amount);
2055 
2057  assert(!AFI->isThumb1OnlyFunction() &&
2058  "This eliminateCallFramePseudoInstr does not support Thumb1!");
2059  bool isARM = !AFI->isThumbFunction();
2060 
2061  // Replace the pseudo instruction with a new instruction...
2062  unsigned Opc = Old.getOpcode();
2063  int PIdx = Old.findFirstPredOperandIdx();
2064  ARMCC::CondCodes Pred =
2065  (PIdx == -1) ? ARMCC::AL
2066  : (ARMCC::CondCodes)Old.getOperand(PIdx).getImm();
2067  unsigned PredReg = TII.getFramePred(Old);
2068  if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
2069  emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
2070  Pred, PredReg);
2071  } else {
2072  assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
2073  emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
2074  Pred, PredReg);
2075  }
2076  }
2077  }
2078  return MBB.erase(I);
2079 }
2080 
2081 /// Get the minimum constant for ARM that is greater than or equal to the
2082 /// argument. In ARM, constants can have any value that can be produced by
2083 /// rotating an 8-bit value to the right by an even number of bits within a
2084 /// 32-bit word.
2086  unsigned Shifted = 0;
2087 
2088  if (Value == 0)
2089  return 0;
2090 
2091  while (!(Value & 0xC0000000)) {
2092  Value = Value << 2;
2093  Shifted += 2;
2094  }
2095 
2096  bool Carry = (Value & 0x00FFFFFF);
2097  Value = ((Value & 0xFF000000) >> 24) + Carry;
2098 
2099  if (Value & 0x0000100)
2100  Value = Value & 0x000001FC;
2101 
2102  if (Shifted > 24)
2103  Value = Value >> (Shifted - 24);
2104  else
2105  Value = Value << (24 - Shifted);
2106 
2107  return Value;
2108 }
2109 
2110 // The stack limit in the TCB is set to this many bytes above the actual
2111 // stack limit.
2112 static const uint64_t kSplitStackAvailable = 256;
2113 
2114 // Adjust the function prologue to enable split stacks. This currently only
2115 // supports android and linux.
2116 //
2117 // The ABI of the segmented stack prologue is a little arbitrarily chosen, but
2118 // must be well defined in order to allow for consistent implementations of the
2119 // __morestack helper function. The ABI is also not a normal ABI in that it
2120 // doesn't follow the normal calling conventions because this allows the
2121 // prologue of each function to be optimized further.
2122 //
2123 // Currently, the ABI looks like (when calling __morestack)
2124 //
2125 // * r4 holds the minimum stack size requested for this function call
2126 // * r5 holds the stack size of the arguments to the function
2127 // * the beginning of the function is 3 instructions after the call to
2128 // __morestack
2129 //
2130 // Implementations of __morestack should use r4 to allocate a new stack, r5 to
2131 // place the arguments on to the new stack, and the 3-instruction knowledge to
2132 // jump directly to the body of the function when working on the new stack.
2133 //
2134 // An old (and possibly no longer compatible) implementation of __morestack for
2135 // ARM can be found at [1].
2136 //
2137 // [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
2139  MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
2140  unsigned Opcode;
2141  unsigned CFIIndex;
2142  const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
2143  bool Thumb = ST->isThumb();
2144 
2145  // Sadly, this currently doesn't support varargs, platforms other than
2146  // android/linux. Note that thumb1/thumb2 are support for android/linux.
2147  if (MF.getFunction().isVarArg())
2148  report_fatal_error("Segmented stacks do not support vararg functions.");
2149  if (!ST->isTargetAndroid() && !ST->isTargetLinux())
2150  report_fatal_error("Segmented stacks not supported on this platform.");
2151 
2152  MachineFrameInfo &MFI = MF.getFrameInfo();
2153  MachineModuleInfo &MMI = MF.getMMI();
2154  MCContext &Context = MMI.getContext();
2155  const MCRegisterInfo *MRI = Context.getRegisterInfo();
2156  const ARMBaseInstrInfo &TII =
2157  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2158  ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
2159  DebugLoc DL;
2160 
2161  uint64_t StackSize = MFI.getStackSize();
2162 
2163  // Do not generate a prologue for leaf functions with a stack of size zero.
2164  // For non-leaf functions we have to allow for the possibility that the
2165  // callis to a non-split function, as in PR37807. This function could also
2166  // take the address of a non-split function. When the linker tries to adjust
2167  // its non-existent prologue, it would fail with an error. Mark the object
2168  // file so that such failures are not errors. See this Go language bug-report
2169  // https://go-review.googlesource.com/c/go/+/148819/
2170  if (StackSize == 0 && !MFI.hasTailCall()) {
2171  MF.getMMI().setHasNosplitStack(true);
2172  return;
2173  }
2174 
2175  // Use R4 and R5 as scratch registers.
2176  // We save R4 and R5 before use and restore them before leaving the function.
2177  unsigned ScratchReg0 = ARM::R4;
2178  unsigned ScratchReg1 = ARM::R5;
2179  uint64_t AlignedStackSize;
2180 
2181  MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
2182  MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
2183  MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock();
2186 
2187  // Grab everything that reaches PrologueMBB to update there liveness as well.
2188  SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
2190  WalkList.push_back(&PrologueMBB);
2191 
2192  do {
2193  MachineBasicBlock *CurMBB = WalkList.pop_back_val();
2194  for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
2195  if (BeforePrologueRegion.insert(PredBB).second)
2196  WalkList.push_back(PredBB);
2197  }
2198  } while (!WalkList.empty());
2199 
2200  // The order in that list is important.
2201  // The blocks will all be inserted before PrologueMBB using that order.
2202  // Therefore the block that should appear first in the CFG should appear
2203  // first in the list.
2204  MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
2205  PostStackMBB};
2206 
2207  for (MachineBasicBlock *B : AddedBlocks)
2208  BeforePrologueRegion.insert(B);
2209 
2210  for (const auto &LI : PrologueMBB.liveins()) {
2211  for (MachineBasicBlock *PredBB : BeforePrologueRegion)
2212  PredBB->addLiveIn(LI);
2213  }
2214 
2215  // Remove the newly added blocks from the list, since we know
2216  // we do not have to do the following updates for them.
2217  for (MachineBasicBlock *B : AddedBlocks) {
2218  BeforePrologueRegion.erase(B);
2219  MF.insert(PrologueMBB.getIterator(), B);
2220  }
2221 
2222  for (MachineBasicBlock *MBB : BeforePrologueRegion) {
2223  // Make sure the LiveIns are still sorted and unique.
2224  MBB->sortUniqueLiveIns();
2225  // Replace the edges to PrologueMBB by edges to the sequences
2226  // we are about to add.
2227  MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
2228  }
2229 
2230  // The required stack size that is aligned to ARM constant criterion.
2231  AlignedStackSize = alignToARMConstant(StackSize);
2232 
2233  // When the frame size is less than 256 we just compare the stack
2234  // boundary directly to the value of the stack pointer, per gcc.
2235  bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
2236 
2237  // We will use two of the callee save registers as scratch registers so we
2238  // need to save those registers onto the stack.
2239  // We will use SR0 to hold stack limit and SR1 to hold the stack size
2240  // requested and arguments for __morestack().
2241  // SR0: Scratch Register #0
2242  // SR1: Scratch Register #1
2243  // push {SR0, SR1}
2244  if (Thumb) {
2245  BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
2246  .add(predOps(ARMCC::AL))
2247  .addReg(ScratchReg0)
2248  .addReg(ScratchReg1);
2249  } else {
2250  BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
2251  .addReg(ARM::SP, RegState::Define)
2252  .addReg(ARM::SP)
2253  .add(predOps(ARMCC::AL))
2254  .addReg(ScratchReg0)
2255  .addReg(ScratchReg1);
2256  }
2257 
2258  // Emit the relevant DWARF information about the change in stack pointer as
2259  // well as where to find both r4 and r5 (the callee-save registers)
2260  CFIIndex =
2262  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2263  .addCFIIndex(CFIIndex);
2265  nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
2266  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2267  .addCFIIndex(CFIIndex);
2269  nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
2270  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2271  .addCFIIndex(CFIIndex);
2272 
2273  // mov SR1, sp
2274  if (Thumb) {
2275  BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
2276  .addReg(ARM::SP)
2277  .add(predOps(ARMCC::AL));
2278  } else if (CompareStackPointer) {
2279  BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
2280  .addReg(ARM::SP)
2281  .add(predOps(ARMCC::AL))
2282  .add(condCodeOp());
2283  }
2284 
2285  // sub SR1, sp, #StackSize
2286  if (!CompareStackPointer && Thumb) {
2287  BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
2288  .add(condCodeOp())
2289  .addReg(ScratchReg1)
2290  .addImm(AlignedStackSize)
2291  .add(predOps(ARMCC::AL));
2292  } else if (!CompareStackPointer) {
2293  BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
2294  .addReg(ARM::SP)
2295  .addImm(AlignedStackSize)
2296  .add(predOps(ARMCC::AL))
2297  .add(condCodeOp());
2298  }
2299 
2300  if (Thumb && ST->isThumb1Only()) {
2301  unsigned PCLabelId = ARMFI->createPICLabelUId();
2303  MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0);
2305  unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4);
2306 
2307  // ldr SR0, [pc, offset(STACK_LIMIT)]
2308  BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
2309  .addConstantPoolIndex(CPI)
2310  .add(predOps(ARMCC::AL));
2311 
2312  // ldr SR0, [SR0]
2313  BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
2314  .addReg(ScratchReg0)
2315  .addImm(0)
2316  .add(predOps(ARMCC::AL));
2317  } else {
2318  // Get TLS base address from the coprocessor
2319  // mrc p15, #0, SR0, c13, c0, #3
2320  BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0)
2321  .addImm(15)
2322  .addImm(0)
2323  .addImm(13)
2324  .addImm(0)
2325  .addImm(3)
2326  .add(predOps(ARMCC::AL));
2327 
2328  // Use the last tls slot on android and a private field of the TCP on linux.
2329  assert(ST->isTargetAndroid() || ST->isTargetLinux());
2330  unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
2331 
2332  // Get the stack limit from the right offset
2333  // ldr SR0, [sr0, #4 * TlsOffset]
2334  BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0)
2335  .addReg(ScratchReg0)
2336  .addImm(4 * TlsOffset)
2337  .add(predOps(ARMCC::AL));
2338  }
2339 
2340  // Compare stack limit with stack size requested.
2341  // cmp SR0, SR1
2342  Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
2343  BuildMI(GetMBB, DL, TII.get(Opcode))
2344  .addReg(ScratchReg0)
2345  .addReg(ScratchReg1)
2346  .add(predOps(ARMCC::AL));
2347 
2348  // This jump is taken if StackLimit < SP - stack required.
2349  Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
2350  BuildMI(GetMBB, DL, TII.get(Opcode)).addMBB(PostStackMBB)
2351  .addImm(ARMCC::LO)
2352  .addReg(ARM::CPSR);
2353 
2354 
2355  // Calling __morestack(StackSize, Size of stack arguments).
2356  // __morestack knows that the stack size requested is in SR0(r4)
2357  // and amount size of stack arguments is in SR1(r5).
2358 
2359  // Pass first argument for the __morestack by Scratch Register #0.
2360  // The amount size of stack required
2361  if (Thumb) {
2362  BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
2363  .add(condCodeOp())
2364  .addImm(AlignedStackSize)
2365  .add(predOps(ARMCC::AL));
2366  } else {
2367  BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
2368  .addImm(AlignedStackSize)
2369  .add(predOps(ARMCC::AL))
2370  .add(condCodeOp());
2371  }
2372  // Pass second argument for the __morestack by Scratch Register #1.
2373  // The amount size of stack consumed to save function arguments.
2374  if (Thumb) {
2375  BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
2376  .add(condCodeOp())
2377  .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
2378  .add(predOps(ARMCC::AL));
2379  } else {
2380  BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
2381  .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
2382  .add(predOps(ARMCC::AL))
2383  .add(condCodeOp());
2384  }
2385 
2386  // push {lr} - Save return address of this function.
2387  if (Thumb) {
2388  BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
2389  .add(predOps(ARMCC::AL))
2390  .addReg(ARM::LR);
2391  } else {
2392  BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
2393  .addReg(ARM::SP, RegState::Define)
2394  .addReg(ARM::SP)
2395  .add(predOps(ARMCC::AL))
2396  .addReg(ARM::LR);
2397  }
2398 
2399  // Emit the DWARF info about the change in stack as well as where to find the
2400  // previous link register
2401  CFIIndex =
2403  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2404  .addCFIIndex(CFIIndex);
2406  nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
2407  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2408  .addCFIIndex(CFIIndex);
2409 
2410  // Call __morestack().
2411  if (Thumb) {
2412  BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
2413  .add(predOps(ARMCC::AL))
2414  .addExternalSymbol("__morestack");
2415  } else {
2416  BuildMI(AllocMBB, DL, TII.get(ARM::BL))
2417  .addExternalSymbol("__morestack");
2418  }
2419 
2420  // pop {lr} - Restore return address of this original function.
2421  if (Thumb) {
2422  if (ST->isThumb1Only()) {
2423  BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
2424  .add(predOps(ARMCC::AL))
2425  .addReg(ScratchReg0);
2426  BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
2427  .addReg(ScratchReg0)
2428  .add(predOps(ARMCC::AL));
2429  } else {
2430  BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
2431  .addReg(ARM::LR, RegState::Define)
2432  .addReg(ARM::SP, RegState::Define)
2433  .addReg(ARM::SP)
2434  .addImm(4)
2435  .add(predOps(ARMCC::AL));
2436  }
2437  } else {
2438  BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
2439  .addReg(ARM::SP, RegState::Define)
2440  .addReg(ARM::SP)
2441  .add(predOps(ARMCC::AL))
2442  .addReg(ARM::LR);
2443  }
2444 
2445  // Restore SR0 and SR1 in case of __morestack() was called.
2446  // __morestack() will skip PostStackMBB block so we need to restore
2447  // scratch registers from here.
2448  // pop {SR0, SR1}
2449  if (Thumb) {
2450  BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
2451  .add(predOps(ARMCC::AL))
2452  .addReg(ScratchReg0)
2453  .addReg(ScratchReg1);
2454  } else {
2455  BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
2456  .addReg(ARM::SP, RegState::Define)
2457  .addReg(ARM::SP)
2458  .add(predOps(ARMCC::AL))
2459  .addReg(ScratchReg0)
2460  .addReg(ScratchReg1);
2461  }
2462 
2463  // Update the CFA offset now that we've popped
2464  CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
2465  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2466  .addCFIIndex(CFIIndex);
2467 
2468  // Return from this function.
2469  BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
2470 
2471  // Restore SR0 and SR1 in case of __morestack() was not called.
2472  // pop {SR0, SR1}
2473  if (Thumb) {
2474  BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
2475  .add(predOps(ARMCC::AL))
2476  .addReg(ScratchReg0)
2477  .addReg(ScratchReg1);
2478  } else {
2479  BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
2480  .addReg(ARM::SP, RegState::Define)
2481  .addReg(ARM::SP)
2482  .add(predOps(ARMCC::AL))
2483  .addReg(ScratchReg0)
2484  .addReg(ScratchReg1);
2485  }
2486 
2487  // Update the CFA offset now that we've popped
2488  CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
2489  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2490  .addCFIIndex(CFIIndex);
2491 
2492  // Tell debuggers that r4 and r5 are now the same as they were in the
2493  // previous function, that they're the "Same Value".
2495  nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
2496  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2497  .addCFIIndex(CFIIndex);
2499  nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
2500  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2501  .addCFIIndex(CFIIndex);
2502 
2503  // Organizing MBB lists
2504  PostStackMBB->addSuccessor(&PrologueMBB);
2505 
2506  AllocMBB->addSuccessor(PostStackMBB);
2507 
2508  GetMBB->addSuccessor(PostStackMBB);
2509  GetMBB->addSuccessor(AllocMBB);
2510 
2511  McrMBB->addSuccessor(GetMBB);
2512 
2513  PrevStackMBB->addSuccessor(McrMBB);
2514 
2515 #ifdef EXPENSIVE_CHECKS
2516  MF.verify();
2517 #endif
2518 }
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:176
const MachineInstrBuilder & add(const MachineOperand &MO) const
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BitVector & set()
Definition: BitVector.h:397
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
#define R4(n)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
LLVMContext & Context
int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate...
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
bool isThumb() const
Definition: ARMSubtarget.h:717
This class represents lattice values for constants.
Definition: AllocatorList.h:23
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static cl::opt< bool > SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), cl::desc("Align ARM NEON spills in prolog and epilog"))
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:382
ARMConstantPoolValue - ARM specific constantpool value.
LLVM_NODISCARD unsigned addFrameInst(const MCCFIInstruction &Inst)
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getReg() const
getReg - Returns the register number.
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset)
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:504
unsigned Reg
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override
Adjust the prologue to have the function use segmented stacks.
bool hasV7Ops() const
Definition: ARMSubtarget.h:545
bool test(unsigned Idx) const
Definition: BitVector.h:501
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:323
unsigned second
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
F(f)
MachineModuleInfo & getMMI() const
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
bool isThumb1Only() const
Definition: ARMSubtarget.h:719
#define R2(n)
void setGPRCalleeSavedArea2Offset(unsigned o)
static uint32_t alignToARMConstant(uint32_t Value)
Get the minimum constant for ARM that is greater than or equal to the argument.
static MCCFIInstruction createDefCfaOffset(MCSymbol *L, int Offset)
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:491
static bool isThumb(const MCSubtargetInfo &STI)
unsigned getSpillSize(const TargetRegisterClass &RC) const
Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...
return AArch64::GPR64RegClass contains(Reg)
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:496
static const uint64_t kSplitStackAvailable
bool isTargetELF() const
Definition: ARMSubtarget.h:662
unsigned getSpillAlignment(const TargetRegisterClass &RC) const
Return the minimum required alignment in bytes for a spill slot for a register of this class...
void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Given a machine basic block that branched to &#39;Old&#39;, change the code and CFG so that it branches to &#39;N...
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
unsigned getFramePred(const MachineInstr &MI) const
Returns predicate register associated with the given frame instruction.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:411
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register...
unsigned getFrameRegister(const MachineFunction &MF) const override
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required, we reserve argument space for call sites in the function immediately on entry to the current function.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
void emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
This file contains the simple types necessary to represent the attributes associated with functions a...
static int sizeOfSPAdjustment(const MachineInstr &MI)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
int getDwarfRegNum(unsigned RegNum, bool isEH) const
Map a target register to an equivalent dwarf register number.
bool keepFramePointer(const MachineFunction &MF) const override
Return true if the target wants to keep the frame pointer regardless of the function attribute "frame...
void setDPRCalleeSavedAreaOffset(unsigned o)
bool enableCalleeSaveSkip(const MachineFunction &MF) const override
Returns true if the target can safely skip saving callee-saved registers for noreturn nounwind functi...
unsigned getArgRegsSaveSize() const
void setFramePtrSpillOffset(unsigned o)
Context object for machine code objects.
Definition: MCContext.h:62
static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg, unsigned SrcReg, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
unsigned getDPRCalleeSavedAreaSize() const
bool splitFramePushPop(const MachineFunction &MF) const
Returns true if the frame setup is split into two separate pushes (first r0-r7,lr then r8-r11)...
Definition: ARMSubtarget.h:739
void setDPRCalleeSavedGapSize(unsigned s)
unsigned getNumAlignedDPRCS2Regs() const
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
const MCContext & getContext() const
int alignSPAdjust(int SPAdj) const
alignSPAdjust - This method aligns the stack adjustment to the correct alignment. ...
bool hasV6T2Ops() const
Definition: ARMSubtarget.h:544
virtual const TargetInstrInfo * getInstrInfo() const
static bool isARMArea1Register(unsigned Reg, bool isIOS)
isARMArea1Register - Returns true if the register is a low register (r0-r7) or a stack/pc register th...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1251
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
TargetInstrInfo - Interface to description of machine instruction set.
unsigned getDefRegState(bool B)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static MachineBasicBlock::iterator skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs)
Skip past the code inserted by emitAlignedDPRCS2Spills, and return an iterator to the following instr...
static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register)
.cfi_same_value Current value of Register is the same as in the previous frame.
Definition: MCDwarf.h:549
static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs)
int ResolveFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg, int SPAdj) const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
This file declares the machine register scavenger class.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:119
static MCCFIInstruction createDefCfa(MCSymbol *L, unsigned Register, int Offset)
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it...
Definition: MCDwarf.h:477
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool cannotEliminateFrame(const MachineFunction &MF) const
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register)
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:484
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF, const ARMBaseInstrInfo &TII)
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Returns the size of the specified MachineInstr.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
unsigned getGPRCalleeSavedArea1Size() const
int getStackProtectorIndex() const
Return the index for the stack protector object.
unsigned getMaxAlignment() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
bool hasBasePointer(const MachineFunction &MF) const
void setGPRCalleeSavedArea2Size(unsigned s)
static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI)
We need the offset of the frame pointer relative to other MachineFrameInfo offsets which are encoded ...
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:793
unsigned getReturnRegsCount() const
static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI)
Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers starting from d8...
self_iterator getIterator()
Definition: ilist_node.h:81
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
iterator_range< pred_iterator > predecessors()
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1213
const ARMFrameLowering * getFrameLowering() const override
Definition: ARMSubtarget.h:504
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:196
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
iterator erase(const_iterator CI)
Definition: SmallVector.h:438
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
size_t size() const
Definition: SmallVector.h:52
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1206
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned first
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1115
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:478
int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override
getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.
unsigned getDPRCalleeSavedGapSize() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI)
Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers starting from d8...
#define R6(n)
void setGPRCalleeSavedArea1Size(unsigned s)
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call...
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:377
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
bool isTargetLinux() const
Definition: ARMSubtarget.h:656
static uint64_t add(uint64_t LeftOp, uint64_t RightOp)
Definition: FileCheck.cpp:124
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
static bool isARMArea2Register(unsigned Reg, bool isIOS)
bool isTargetAndroid() const
Definition: ARMSubtarget.h:703
Information about stack frame layout on the target.
static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, const TargetInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const unsigned Reg, const unsigned Alignment, const bool MustBeSingleInstruction)
Emit an instruction sequence that will align the address in register Reg by zero-ing out the lower bi...
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
void setOffsetAdjustment(int Adj)
Set the correction for frame offsets.
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
CodeModel::Model getCodeModel() const
Returns the code model.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
ARMFrameLowering(const ARMSubtarget &sti)
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
unsigned getReturnOpcode() const
Returns the correct return opcode for the current feature set.
Definition: ARMSubtarget.h:809
unsigned getArgumentStackSize() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
static unsigned estimateRSStackSizeLimit(MachineFunction &MF, const TargetFrameLowering *TFI)
estimateRSStackSizeLimit - Look at each instruction that references stack frames and return the stack...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
static bool isPopOpcode(int Opc)
bool isLiveIn(unsigned Reg) const
static bool isARMLowRegister(unsigned Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:160
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
uint16_t getEncodingValue(unsigned RegNo) const
Returns the encoding for RegNo.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
unsigned getGPRCalleeSavedArea2Size() const
void setGPRCalleeSavedArea1Offset(unsigned o)
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use...
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg...
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:194
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
bool isReturnAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
TargetOptions Options
#define I(x, y, z)
Definition: MD5.cpp:58
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:508
virtual const TargetFrameLowering * getFrameLowering() const
void setDPRCalleeSavedAreaSize(unsigned s)
int getOffsetAdjustment() const
Return the correction for frame offsets.
uint32_t Size
Definition: Profile.cpp:46
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool hasV5TOps() const
Definition: ARMSubtarget.h:539
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
iterator_range< livein_iterator > liveins() const
static bool isARMArea3Register(unsigned Reg, bool isIOS)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
static bool WindowsRequiresStackProbe(const MachineFunction &MF, size_t StackSizeInBytes)
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:294
bool needsStackRealignment(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
LLVM Value Representation.
Definition: Value.h:72
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
unsigned getFramePtrSpillOffset() const
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:333
bool isTargetWindows() const
Definition: ARMSubtarget.h:659
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
bool addRegisterKilled(unsigned IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
IRTranslator LLVM IR MI
static bool isCSRestore(MachineInstr &MI, const ARMBaseInstrInfo &TII, const MCPhysReg *CSRegs)
BitVector getPristineRegs(const MachineFunction &MF) const
Return a set of physical registers that are pristine.
const ARMSubtarget & STI
static void checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs)
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
unsigned getReg() const
bool hasTailCall() const
Returns true if the function contains a tail call.
void setObjectAlignment(int ObjectIdx, unsigned Align)
setObjectAlignment - Change the alignment of the specified stack object.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects...
unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one...
static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
bool isReserved(unsigned PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
This class contains meta information specific to a module.