LLVM  9.0.0svn
ARMFrameLowering.cpp
Go to the documentation of this file.
1 //===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the ARM implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "ARMFrameLowering.h"
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMBaseRegisterInfo.h"
16 #include "ARMConstantPoolValue.h"
17 #include "ARMMachineFunctionInfo.h"
18 #include "ARMSubtarget.h"
21 #include "Utils/ARMBaseInfo.h"
22 #include "llvm/ADT/BitVector.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/SmallVector.h"
40 #include "llvm/IR/Attributes.h"
41 #include "llvm/IR/CallingConv.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/IR/Function.h"
44 #include "llvm/MC/MCContext.h"
45 #include "llvm/MC/MCDwarf.h"
46 #include "llvm/MC/MCInstrDesc.h"
47 #include "llvm/MC/MCRegisterInfo.h"
48 #include "llvm/Support/CodeGen.h"
50 #include "llvm/Support/Compiler.h"
51 #include "llvm/Support/Debug.h"
57 #include <algorithm>
58 #include <cassert>
59 #include <cstddef>
60 #include <cstdint>
61 #include <iterator>
62 #include <utility>
63 #include <vector>
64 
65 #define DEBUG_TYPE "arm-frame-lowering"
66 
67 using namespace llvm;
68 
69 static cl::opt<bool>
70 SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
71  cl::desc("Align ARM NEON spills in prolog and epilog"));
72 
75  unsigned NumAlignedDPRCS2Regs);
76 
78  : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
79  STI(sti) {}
80 
82  // iOS always has a FP for backtracking, force other targets to keep their FP
83  // when doing FastISel. The emitted code is currently superior, and in cases
84  // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
85  return MF.getSubtarget<ARMSubtarget>().useFastISel();
86 }
87 
88 /// Returns true if the target can safely skip saving callee-saved registers
89 /// for noreturn nounwind functions.
91  assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
92  MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
93  !MF.getFunction().hasFnAttribute(Attribute::UWTable));
94 
95  // Frame pointer and link register are not treated as normal CSR, thus we
96  // can always skip CSR saves for nonreturning functions.
97  return true;
98 }
99 
100 /// hasFP - Return true if the specified function should have a dedicated frame
101 /// pointer register. This is true if the function has variable sized allocas
102 /// or if frame pointer elimination is disabled.
104  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
105  const MachineFrameInfo &MFI = MF.getFrameInfo();
106 
107  // ABI-required frame pointer.
109  return true;
110 
111  // Frame pointer required for use within this function.
112  return (RegInfo->needsStackRealignment(MF) ||
113  MFI.hasVarSizedObjects() ||
114  MFI.isFrameAddressTaken());
115 }
116 
117 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
118 /// not required, we reserve argument space for call sites in the function
119 /// immediately on entry to the current function. This eliminates the need for
120 /// add/sub sp brackets around call sites. Returns true if the call frame is
121 /// included as part of the stack frame.
123  const MachineFrameInfo &MFI = MF.getFrameInfo();
124  unsigned CFSize = MFI.getMaxCallFrameSize();
125  // It's not always a good idea to include the call frame as part of the
126  // stack frame. ARM (especially Thumb) has small immediate offset to
127  // address the stack frame. So a large call frame can cause poor codegen
128  // and may even makes it impossible to scavenge a register.
129  if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
130  return false;
131 
132  return !MFI.hasVarSizedObjects();
133 }
134 
135 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the
136 /// call frame pseudos can be simplified. Unlike most targets, having a FP
137 /// is not sufficient here since we still may reference some objects via SP
138 /// even when FP is available in Thumb2 mode.
139 bool
142 }
143 
145  const MCPhysReg *CSRegs) {
146  // Integer spill area is handled with "pop".
147  if (isPopOpcode(MI.getOpcode())) {
148  // The first two operands are predicates. The last two are
149  // imp-def and imp-use of SP. Check everything in between.
150  for (int i = 5, e = MI.getNumOperands(); i != e; ++i)
151  if (!isCalleeSavedRegister(MI.getOperand(i).getReg(), CSRegs))
152  return false;
153  return true;
154  }
155  if ((MI.getOpcode() == ARM::LDR_POST_IMM ||
156  MI.getOpcode() == ARM::LDR_POST_REG ||
157  MI.getOpcode() == ARM::t2LDR_POST) &&
158  isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs) &&
159  MI.getOperand(1).getReg() == ARM::SP)
160  return true;
161 
162  return false;
163 }
164 
166  bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
167  const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
168  unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
169  ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
170  if (isARM)
171  emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
172  Pred, PredReg, TII, MIFlags);
173  else
174  emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
175  Pred, PredReg, TII, MIFlags);
176 }
177 
178 static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
179  MachineBasicBlock::iterator &MBBI, const DebugLoc &dl,
180  const ARMBaseInstrInfo &TII, int NumBytes,
181  unsigned MIFlags = MachineInstr::NoFlags,
183  unsigned PredReg = 0) {
184  emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
185  MIFlags, Pred, PredReg);
186 }
187 
188 static int sizeOfSPAdjustment(const MachineInstr &MI) {
189  int RegSize;
190  switch (MI.getOpcode()) {
191  case ARM::VSTMDDB_UPD:
192  RegSize = 8;
193  break;
194  case ARM::STMDB_UPD:
195  case ARM::t2STMDB_UPD:
196  RegSize = 4;
197  break;
198  case ARM::t2STR_PRE:
199  case ARM::STR_PRE_IMM:
200  return 4;
201  default:
202  llvm_unreachable("Unknown push or pop like instruction");
203  }
204 
205  int count = 0;
206  // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
207  // pred) so the list starts at 4.
208  for (int i = MI.getNumOperands() - 1; i >= 4; --i)
209  count += RegSize;
210  return count;
211 }
212 
214  size_t StackSizeInBytes) {
215  const MachineFrameInfo &MFI = MF.getFrameInfo();
216  const Function &F = MF.getFunction();
217  unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
218  if (F.hasFnAttribute("stack-probe-size"))
219  F.getFnAttribute("stack-probe-size")
221  .getAsInteger(0, StackProbeSize);
222  return (StackSizeInBytes >= StackProbeSize) &&
223  !F.hasFnAttribute("no-stack-arg-probe");
224 }
225 
226 namespace {
227 
228 struct StackAdjustingInsts {
229  struct InstInfo {
231  unsigned SPAdjust;
232  bool BeforeFPSet;
233  };
234 
236 
237  void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
238  bool BeforeFPSet = false) {
239  InstInfo Info = {I, SPAdjust, BeforeFPSet};
240  Insts.push_back(Info);
241  }
242 
243  void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
244  auto Info =
245  llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
246  assert(Info != Insts.end() && "invalid sp adjusting instruction");
247  Info->SPAdjust += ExtraBytes;
248  }
249 
250  void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl,
251  const ARMBaseInstrInfo &TII, bool HasFP) {
252  MachineFunction &MF = *MBB.getParent();
253  unsigned CFAOffset = 0;
254  for (auto &Info : Insts) {
255  if (HasFP && !Info.BeforeFPSet)
256  return;
257 
258  CFAOffset -= Info.SPAdjust;
259  unsigned CFIIndex = MF.addFrameInst(
260  MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
261  BuildMI(MBB, std::next(Info.I), dl,
262  TII.get(TargetOpcode::CFI_INSTRUCTION))
263  .addCFIIndex(CFIIndex)
265  }
266  }
267 };
268 
269 } // end anonymous namespace
270 
271 /// Emit an instruction sequence that will align the address in
272 /// register Reg by zero-ing out the lower bits. For versions of the
273 /// architecture that support Neon, this must be done in a single
274 /// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
275 /// single instruction. That function only gets called when optimizing
276 /// spilling of D registers on a core with the Neon instruction set
277 /// present.
279  const TargetInstrInfo &TII,
280  MachineBasicBlock &MBB,
282  const DebugLoc &DL, const unsigned Reg,
283  const unsigned Alignment,
284  const bool MustBeSingleInstruction) {
285  const ARMSubtarget &AST =
286  static_cast<const ARMSubtarget &>(MF.getSubtarget());
287  const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
288  const unsigned AlignMask = Alignment - 1;
289  const unsigned NrBitsToZero = countTrailingZeros(Alignment);
290  assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
291  if (!AFI->isThumbFunction()) {
292  // if the BFC instruction is available, use that to zero the lower
293  // bits:
294  // bfc Reg, #0, log2(Alignment)
295  // otherwise use BIC, if the mask to zero the required number of bits
296  // can be encoded in the bic immediate field
297  // bic Reg, Reg, Alignment-1
298  // otherwise, emit
299  // lsr Reg, Reg, log2(Alignment)
300  // lsl Reg, Reg, log2(Alignment)
301  if (CanUseBFC) {
302  BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
303  .addReg(Reg, RegState::Kill)
304  .addImm(~AlignMask)
305  .add(predOps(ARMCC::AL));
306  } else if (AlignMask <= 255) {
307  BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
308  .addReg(Reg, RegState::Kill)
309  .addImm(AlignMask)
311  .add(condCodeOp());
312  } else {
313  assert(!MustBeSingleInstruction &&
314  "Shouldn't call emitAligningInstructions demanding a single "
315  "instruction to be emitted for large stack alignment for a target "
316  "without BFC.");
317  BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
318  .addReg(Reg, RegState::Kill)
319  .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
321  .add(condCodeOp());
322  BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
323  .addReg(Reg, RegState::Kill)
324  .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
326  .add(condCodeOp());
327  }
328  } else {
329  // Since this is only reached for Thumb-2 targets, the BFC instruction
330  // should always be available.
331  assert(CanUseBFC);
332  BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
333  .addReg(Reg, RegState::Kill)
334  .addImm(~AlignMask)
335  .add(predOps(ARMCC::AL));
336  }
337 }
338 
339 /// We need the offset of the frame pointer relative to other MachineFrameInfo
340 /// offsets which are encoded relative to SP at function begin.
341 /// See also emitPrologue() for how the FP is set up.
342 /// Unfortunately we cannot determine this value in determineCalleeSaves() yet
343 /// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
344 /// this to produce a conservative estimate that we check in an assert() later.
345 static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI) {
346  // This is a conservative estimation: Assume the frame pointer being r7 and
347  // pc("r15") up to r8 getting spilled before (= 8 registers).
348  return -AFI.getArgRegsSaveSize() - (8 * 4);
349 }
350 
352  MachineBasicBlock &MBB) const {
353  MachineBasicBlock::iterator MBBI = MBB.begin();
354  MachineFrameInfo &MFI = MF.getFrameInfo();
356  MachineModuleInfo &MMI = MF.getMMI();
357  MCContext &Context = MMI.getContext();
358  const TargetMachine &TM = MF.getTarget();
359  const MCRegisterInfo *MRI = Context.getRegisterInfo();
360  const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
361  const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
362  assert(!AFI->isThumb1OnlyFunction() &&
363  "This emitPrologue does not support Thumb1!");
364  bool isARM = !AFI->isThumbFunction();
366  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
367  unsigned NumBytes = MFI.getStackSize();
368  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
369 
370  // Debug location must be unknown since the first debug location is used
371  // to determine the end of the prologue.
372  DebugLoc dl;
373 
374  unsigned FramePtr = RegInfo->getFrameRegister(MF);
375 
376  // Determine the sizes of each callee-save spill areas and record which frame
377  // belongs to which callee-save spill areas.
378  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
379  int FramePtrSpillFI = 0;
380  int D8SpillFI = 0;
381 
382  // All calls are tail calls in GHC calling conv, and functions have no
383  // prologue/epilogue.
385  return;
386 
387  StackAdjustingInsts DefCFAOffsetCandidates;
388  bool HasFP = hasFP(MF);
389 
390  // Allocate the vararg register save area.
391  if (ArgRegsSaveSize) {
392  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
394  DefCFAOffsetCandidates.addInst(std::prev(MBBI), ArgRegsSaveSize, true);
395  }
396 
397  if (!AFI->hasStackFrame() &&
398  (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
399  if (NumBytes - ArgRegsSaveSize != 0) {
400  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize),
402  DefCFAOffsetCandidates.addInst(std::prev(MBBI),
403  NumBytes - ArgRegsSaveSize, true);
404  }
405  DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
406  return;
407  }
408 
409  // Determine spill area sizes.
410  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
411  unsigned Reg = CSI[i].getReg();
412  int FI = CSI[i].getFrameIdx();
413  switch (Reg) {
414  case ARM::R8:
415  case ARM::R9:
416  case ARM::R10:
417  case ARM::R11:
418  case ARM::R12:
419  if (STI.splitFramePushPop(MF)) {
420  GPRCS2Size += 4;
421  break;
422  }
424  case ARM::R0:
425  case ARM::R1:
426  case ARM::R2:
427  case ARM::R3:
428  case ARM::R4:
429  case ARM::R5:
430  case ARM::R6:
431  case ARM::R7:
432  case ARM::LR:
433  if (Reg == FramePtr)
434  FramePtrSpillFI = FI;
435  GPRCS1Size += 4;
436  break;
437  default:
438  // This is a DPR. Exclude the aligned DPRCS2 spills.
439  if (Reg == ARM::D8)
440  D8SpillFI = FI;
441  if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
442  DPRCSSize += 8;
443  }
444  }
445 
446  // Move past area 1.
447  MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
448  if (GPRCS1Size > 0) {
449  GPRCS1Push = LastPush = MBBI++;
450  DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
451  }
452 
453  // Determine starting offsets of spill areas.
454  unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size;
455  unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
456  unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U;
457  unsigned DPRGapSize = (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign;
458  unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
459  int FramePtrOffsetInPush = 0;
460  if (HasFP) {
461  int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
462  assert(getMaxFPOffset(MF.getFunction(), *AFI) <= FPOffset &&
463  "Max FP estimation is wrong");
464  FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize;
465  AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
466  NumBytes);
467  }
468  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
469  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
470  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
471 
472  // Move past area 2.
473  if (GPRCS2Size > 0) {
474  GPRCS2Push = LastPush = MBBI++;
475  DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
476  }
477 
478  // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
479  // .cfi_offset operations will reflect that.
480  if (DPRGapSize) {
481  assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
482  if (LastPush != MBB.end() &&
483  tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
484  DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
485  else {
486  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
488  DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
489  }
490  }
491 
492  // Move past area 3.
493  if (DPRCSSize > 0) {
494  // Since vpush register list cannot have gaps, there may be multiple vpush
495  // instructions in the prologue.
496  while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
497  DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));
498  LastPush = MBBI++;
499  }
500  }
501 
502  // Move past the aligned DPRCS2 area.
503  if (AFI->getNumAlignedDPRCS2Regs() > 0) {
505  // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
506  // leaves the stack pointer pointing to the DPRCS2 area.
507  //
508  // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
509  NumBytes += MFI.getObjectOffset(D8SpillFI);
510  } else
511  NumBytes = DPRCSOffset;
512 
513  if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
514  uint32_t NumWords = NumBytes >> 2;
515 
516  if (NumWords < 65536)
517  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
518  .addImm(NumWords)
520  .add(predOps(ARMCC::AL));
521  else
522  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4)
523  .addImm(NumWords)
525 
526  switch (TM.getCodeModel()) {
527  case CodeModel::Tiny:
528  llvm_unreachable("Tiny code model not available on ARM.");
529  case CodeModel::Small:
530  case CodeModel::Medium:
531  case CodeModel::Kernel:
532  BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
534  .addExternalSymbol("__chkstk")
535  .addReg(ARM::R4, RegState::Implicit)
536  .setMIFlags(MachineInstr::FrameSetup);
537  break;
538  case CodeModel::Large:
539  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
540  .addExternalSymbol("__chkstk")
542 
543  BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
545  .addReg(ARM::R12, RegState::Kill)
546  .addReg(ARM::R4, RegState::Implicit)
547  .setMIFlags(MachineInstr::FrameSetup);
548  break;
549  }
550 
551  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
552  .addReg(ARM::SP, RegState::Kill)
556  .add(condCodeOp());
557  NumBytes = 0;
558  }
559 
560  if (NumBytes) {
561  // Adjust SP after all the callee-save spills.
562  if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
563  tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes))
564  DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
565  else {
566  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
568  DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
569  }
570 
571  if (HasFP && isARM)
572  // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
573  // Note it's not safe to do this in Thumb2 mode because it would have
574  // taken two instructions:
575  // mov sp, r7
576  // sub sp, #24
577  // If an interrupt is taken between the two instructions, then sp is in
578  // an inconsistent state (pointing to the middle of callee-saved area).
579  // The interrupt handler can end up clobbering the registers.
580  AFI->setShouldRestoreSPFromFP(true);
581  }
582 
583  // Set FP to point to the stack slot that contains the previous FP.
584  // For iOS, FP is R7, which has now been stored in spill area 1.
585  // Otherwise, if this is not iOS, all the callee-saved registers go
586  // into spill area 1, including the FP in R11. In either case, it
587  // is in area one and the adjustment needs to take place just after
588  // that push.
589  if (HasFP) {
590  MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push);
591  unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
592  emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush,
593  dl, TII, FramePtr, ARM::SP,
594  PushSize + FramePtrOffsetInPush,
596  if (FramePtrOffsetInPush + PushSize != 0) {
597  unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
598  nullptr, MRI->getDwarfRegNum(FramePtr, true),
599  -(ArgRegsSaveSize - FramePtrOffsetInPush)));
600  BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
601  .addCFIIndex(CFIIndex)
603  } else {
604  unsigned CFIIndex =
606  nullptr, MRI->getDwarfRegNum(FramePtr, true)));
607  BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
608  .addCFIIndex(CFIIndex)
610  }
611  }
612 
613  // Now that the prologue's actual instructions are finalised, we can insert
614  // the necessary DWARF cf instructions to describe the situation. Start by
615  // recording where each register ended up:
616  if (GPRCS1Size > 0) {
617  MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
618  int CFIIndex;
619  for (const auto &Entry : CSI) {
620  unsigned Reg = Entry.getReg();
621  int FI = Entry.getFrameIdx();
622  switch (Reg) {
623  case ARM::R8:
624  case ARM::R9:
625  case ARM::R10:
626  case ARM::R11:
627  case ARM::R12:
628  if (STI.splitFramePushPop(MF))
629  break;
631  case ARM::R0:
632  case ARM::R1:
633  case ARM::R2:
634  case ARM::R3:
635  case ARM::R4:
636  case ARM::R5:
637  case ARM::R6:
638  case ARM::R7:
639  case ARM::LR:
641  nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
642  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
643  .addCFIIndex(CFIIndex)
645  break;
646  }
647  }
648  }
649 
650  if (GPRCS2Size > 0) {
651  MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
652  for (const auto &Entry : CSI) {
653  unsigned Reg = Entry.getReg();
654  int FI = Entry.getFrameIdx();
655  switch (Reg) {
656  case ARM::R8:
657  case ARM::R9:
658  case ARM::R10:
659  case ARM::R11:
660  case ARM::R12:
661  if (STI.splitFramePushPop(MF)) {
662  unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
663  unsigned Offset = MFI.getObjectOffset(FI);
664  unsigned CFIIndex = MF.addFrameInst(
665  MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
666  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
667  .addCFIIndex(CFIIndex)
669  }
670  break;
671  }
672  }
673  }
674 
675  if (DPRCSSize > 0) {
676  // Since vpush register list cannot have gaps, there may be multiple vpush
677  // instructions in the prologue.
678  MachineBasicBlock::iterator Pos = std::next(LastPush);
679  for (const auto &Entry : CSI) {
680  unsigned Reg = Entry.getReg();
681  int FI = Entry.getFrameIdx();
682  if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
683  (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
684  unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
685  unsigned Offset = MFI.getObjectOffset(FI);
686  unsigned CFIIndex = MF.addFrameInst(
687  MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
688  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
689  .addCFIIndex(CFIIndex)
691  }
692  }
693  }
694 
695  // Now we can emit descriptions of where the canonical frame address was
696  // throughout the process. If we have a frame pointer, it takes over the job
697  // half-way through, so only the first few .cfi_def_cfa_offset instructions
698  // actually get emitted.
699  DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
700 
701  if (STI.isTargetELF() && hasFP(MF))
703  AFI->getFramePtrSpillOffset());
704 
705  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
706  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
707  AFI->setDPRCalleeSavedGapSize(DPRGapSize);
708  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
709 
710  // If we need dynamic stack realignment, do it here. Be paranoid and make
711  // sure if we also have VLAs, we have a base pointer for frame access.
712  // If aligned NEON registers were spilled, the stack has already been
713  // realigned.
714  if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) {
715  unsigned MaxAlign = MFI.getMaxAlignment();
716  assert(!AFI->isThumb1OnlyFunction());
717  if (!AFI->isThumbFunction()) {
718  emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
719  false);
720  } else {
721  // We cannot use sp as source/dest register here, thus we're using r4 to
722  // perform the calculations. We're emitting the following sequence:
723  // mov r4, sp
724  // -- use emitAligningInstructions to produce best sequence to zero
725  // -- out lower bits in r4
726  // mov sp, r4
727  // FIXME: It will be better just to find spare register here.
728  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
729  .addReg(ARM::SP, RegState::Kill)
730  .add(predOps(ARMCC::AL));
731  emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
732  false);
733  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
734  .addReg(ARM::R4, RegState::Kill)
735  .add(predOps(ARMCC::AL));
736  }
737 
738  AFI->setShouldRestoreSPFromFP(true);
739  }
740 
741  // If we need a base pointer, set it up here. It's whatever the value
742  // of the stack pointer is at this point. Any variable size objects
743  // will be allocated after this, so we can still use the base pointer
744  // to reference locals.
745  // FIXME: Clarify FrameSetup flags here.
746  if (RegInfo->hasBasePointer(MF)) {
747  if (isARM)
748  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
749  .addReg(ARM::SP)
751  .add(condCodeOp());
752  else
753  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
754  .addReg(ARM::SP)
755  .add(predOps(ARMCC::AL));
756  }
757 
758  // If the frame has variable sized objects then the epilogue must restore
759  // the sp from fp. We can assume there's an FP here since hasFP already
760  // checks for hasVarSizedObjects.
761  if (MFI.hasVarSizedObjects())
762  AFI->setShouldRestoreSPFromFP(true);
763 }
764 
766  MachineBasicBlock &MBB) const {
767  MachineFrameInfo &MFI = MF.getFrameInfo();
769  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
770  const ARMBaseInstrInfo &TII =
771  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
772  assert(!AFI->isThumb1OnlyFunction() &&
773  "This emitEpilogue does not support Thumb1!");
774  bool isARM = !AFI->isThumbFunction();
775 
776  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
777  int NumBytes = (int)MFI.getStackSize();
778  unsigned FramePtr = RegInfo->getFrameRegister(MF);
779 
780  // All calls are tail calls in GHC calling conv, and functions have no
781  // prologue/epilogue.
783  return;
784 
785  // First put ourselves on the first (from top) terminator instructions.
787  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
788 
789  if (!AFI->hasStackFrame()) {
790  if (NumBytes - ArgRegsSaveSize != 0)
791  emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize);
792  } else {
793  // Unwind MBBI to point to first LDR / VLDRD.
794  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
795  if (MBBI != MBB.begin()) {
796  do {
797  --MBBI;
798  } while (MBBI != MBB.begin() && isCSRestore(*MBBI, TII, CSRegs));
799  if (!isCSRestore(*MBBI, TII, CSRegs))
800  ++MBBI;
801  }
802 
803  // Move SP to start of FP callee save spill area.
804  NumBytes -= (ArgRegsSaveSize +
807  AFI->getDPRCalleeSavedGapSize() +
809 
810  // Reset SP based on frame pointer only if the stack frame extends beyond
811  // frame pointer stack slot or target is ELF and the function has FP.
812  if (AFI->shouldRestoreSPFromFP()) {
813  NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
814  if (NumBytes) {
815  if (isARM)
816  emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
817  ARMCC::AL, 0, TII);
818  else {
819  // It's not possible to restore SP from FP in a single instruction.
820  // For iOS, this looks like:
821  // mov sp, r7
822  // sub sp, #24
823  // This is bad, if an interrupt is taken after the mov, sp is in an
824  // inconsistent state.
825  // Use the first callee-saved register as a scratch register.
826  assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
827  "No scratch register to restore SP from FP!");
828  emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
829  ARMCC::AL, 0, TII);
830  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
831  .addReg(ARM::R4)
832  .add(predOps(ARMCC::AL));
833  }
834  } else {
835  // Thumb2 or ARM.
836  if (isARM)
837  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
838  .addReg(FramePtr)
840  .add(condCodeOp());
841  else
842  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
843  .addReg(FramePtr)
844  .add(predOps(ARMCC::AL));
845  }
846  } else if (NumBytes &&
847  !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
848  emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
849 
850  // Increment past our save areas.
851  if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
852  MBBI++;
853  // Since vpop register list cannot have gaps, there may be multiple vpop
854  // instructions in the epilogue.
855  while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
856  MBBI++;
857  }
858  if (AFI->getDPRCalleeSavedGapSize()) {
859  assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
860  "unexpected DPR alignment gap");
861  emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize());
862  }
863 
864  if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
865  if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
866  }
867 
868  if (ArgRegsSaveSize)
869  emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
870 }
871 
872 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
873 /// debug info. It's the same as what we use for resolving the code-gen
874 /// references for now. FIXME: This can go wrong when references are
875 /// SP-relative and simple call frames aren't used.
876 int
878  unsigned &FrameReg) const {
879  return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
880 }
881 
882 int
884  int FI, unsigned &FrameReg,
885  int SPAdj) const {
886  const MachineFrameInfo &MFI = MF.getFrameInfo();
887  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
889  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
890  int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
891  int FPOffset = Offset - AFI->getFramePtrSpillOffset();
892  bool isFixed = MFI.isFixedObjectIndex(FI);
893 
894  FrameReg = ARM::SP;
895  Offset += SPAdj;
896 
897  // SP can move around if there are allocas. We may also lose track of SP
898  // when emergency spilling inside a non-reserved call frame setup.
899  bool hasMovingSP = !hasReservedCallFrame(MF);
900 
901  // When dynamically realigning the stack, use the frame pointer for
902  // parameters, and the stack/base pointer for locals.
903  if (RegInfo->needsStackRealignment(MF)) {
904  assert(hasFP(MF) && "dynamic stack realignment without a FP!");
905  if (isFixed) {
906  FrameReg = RegInfo->getFrameRegister(MF);
907  Offset = FPOffset;
908  } else if (hasMovingSP) {
909  assert(RegInfo->hasBasePointer(MF) &&
910  "VLAs and dynamic stack alignment, but missing base pointer!");
911  FrameReg = RegInfo->getBaseRegister();
912  Offset -= SPAdj;
913  }
914  return Offset;
915  }
916 
917  // If there is a frame pointer, use it when we can.
918  if (hasFP(MF) && AFI->hasStackFrame()) {
919  // Use frame pointer to reference fixed objects. Use it for locals if
920  // there are VLAs (and thus the SP isn't reliable as a base).
921  if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
922  FrameReg = RegInfo->getFrameRegister(MF);
923  return FPOffset;
924  } else if (hasMovingSP) {
925  assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
926  if (AFI->isThumb2Function()) {
927  // Try to use the frame pointer if we can, else use the base pointer
928  // since it's available. This is handy for the emergency spill slot, in
929  // particular.
930  if (FPOffset >= -255 && FPOffset < 0) {
931  FrameReg = RegInfo->getFrameRegister(MF);
932  return FPOffset;
933  }
934  }
935  } else if (AFI->isThumbFunction()) {
936  // Prefer SP to base pointer, if the offset is suitably aligned and in
937  // range as the effective range of the immediate offset is bigger when
938  // basing off SP.
939  // Use add <rd>, sp, #<imm8>
940  // ldr <rd>, [sp, #<imm8>]
941  if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
942  return Offset;
943  // In Thumb2 mode, the negative offset is very limited. Try to avoid
944  // out of range references. ldr <rt>,[<rn>, #-<imm8>]
945  if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
946  FrameReg = RegInfo->getFrameRegister(MF);
947  return FPOffset;
948  }
949  } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
950  // Otherwise, use SP or FP, whichever is closer to the stack slot.
951  FrameReg = RegInfo->getFrameRegister(MF);
952  return FPOffset;
953  }
954  }
955  // Use the base pointer if we have one.
956  if (RegInfo->hasBasePointer(MF))
957  FrameReg = RegInfo->getBaseRegister();
958  return Offset;
959 }
960 
961 void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
963  const std::vector<CalleeSavedInfo> &CSI,
964  unsigned StmOpc, unsigned StrOpc,
965  bool NoGap,
966  bool(*Func)(unsigned, bool),
967  unsigned NumAlignedDPRCS2Regs,
968  unsigned MIFlags) const {
969  MachineFunction &MF = *MBB.getParent();
970  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
972 
973  DebugLoc DL;
974 
975  using RegAndKill = std::pair<unsigned, bool>;
976 
978  unsigned i = CSI.size();
979  while (i != 0) {
980  unsigned LastReg = 0;
981  for (; i != 0; --i) {
982  unsigned Reg = CSI[i-1].getReg();
983  if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
984 
985  // D-registers in the aligned area DPRCS2 are NOT spilled here.
986  if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
987  continue;
988 
989  const MachineRegisterInfo &MRI = MF.getRegInfo();
990  bool isLiveIn = MRI.isLiveIn(Reg);
991  if (!isLiveIn && !MRI.isReserved(Reg))
992  MBB.addLiveIn(Reg);
993  // If NoGap is true, push consecutive registers and then leave the rest
994  // for other instructions. e.g.
995  // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
996  if (NoGap && LastReg && LastReg != Reg-1)
997  break;
998  LastReg = Reg;
999  // Do not set a kill flag on values that are also marked as live-in. This
1000  // happens with the @llvm-returnaddress intrinsic and with arguments
1001  // passed in callee saved registers.
1002  // Omitting the kill flags is conservatively correct even if the live-in
1003  // is not used after all.
1004  Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn));
1005  }
1006 
1007  if (Regs.empty())
1008  continue;
1009 
1010  llvm::sort(Regs, [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1011  return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
1012  });
1013 
1014  if (Regs.size() > 1 || StrOpc== 0) {
1015  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
1016  .addReg(ARM::SP)
1017  .setMIFlags(MIFlags)
1018  .add(predOps(ARMCC::AL));
1019  for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1020  MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
1021  } else if (Regs.size() == 1) {
1022  BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
1023  .addReg(Regs[0].first, getKillRegState(Regs[0].second))
1024  .addReg(ARM::SP)
1025  .setMIFlags(MIFlags)
1026  .addImm(-4)
1027  .add(predOps(ARMCC::AL));
1028  }
1029  Regs.clear();
1030 
1031  // Put any subsequent vpush instructions before this one: they will refer to
1032  // higher register numbers so need to be pushed first in order to preserve
1033  // monotonicity.
1034  if (MI != MBB.begin())
1035  --MI;
1036  }
1037 }
1038 
1039 void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1041  std::vector<CalleeSavedInfo> &CSI,
1042  unsigned LdmOpc, unsigned LdrOpc,
1043  bool isVarArg, bool NoGap,
1044  bool(*Func)(unsigned, bool),
1045  unsigned NumAlignedDPRCS2Regs) const {
1046  MachineFunction &MF = *MBB.getParent();
1047  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1050  DebugLoc DL;
1051  bool isTailCall = false;
1052  bool isInterrupt = false;
1053  bool isTrap = false;
1054  if (MBB.end() != MI) {
1055  DL = MI->getDebugLoc();
1056  unsigned RetOpcode = MI->getOpcode();
1057  isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri);
1058  isInterrupt =
1059  RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1060  isTrap =
1061  RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
1062  RetOpcode == ARM::tTRAP;
1063  }
1064 
1066  unsigned i = CSI.size();
1067  while (i != 0) {
1068  unsigned LastReg = 0;
1069  bool DeleteRet = false;
1070  for (; i != 0; --i) {
1071  CalleeSavedInfo &Info = CSI[i-1];
1072  unsigned Reg = Info.getReg();
1073  if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1074 
1075  // The aligned reloads from area DPRCS2 are not inserted here.
1076  if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1077  continue;
1078 
1079  if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1080  !isTrap && STI.hasV5TOps()) {
1081  if (MBB.succ_empty()) {
1082  Reg = ARM::PC;
1083  // Fold the return instruction into the LDM.
1084  DeleteRet = true;
1085  LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1086  // We 'restore' LR into PC so it is not live out of the return block:
1087  // Clear Restored bit.
1088  Info.setRestored(false);
1089  } else
1090  LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
1091  }
1092 
1093  // If NoGap is true, pop consecutive registers and then leave the rest
1094  // for other instructions. e.g.
1095  // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1096  if (NoGap && LastReg && LastReg != Reg-1)
1097  break;
1098 
1099  LastReg = Reg;
1100  Regs.push_back(Reg);
1101  }
1102 
1103  if (Regs.empty())
1104  continue;
1105 
1106  llvm::sort(Regs, [&](unsigned LHS, unsigned RHS) {
1107  return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
1108  });
1109 
1110  if (Regs.size() > 1 || LdrOpc == 0) {
1111  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
1112  .addReg(ARM::SP)
1113  .add(predOps(ARMCC::AL));
1114  for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1115  MIB.addReg(Regs[i], getDefRegState(true));
1116  if (DeleteRet) {
1117  if (MI != MBB.end()) {
1118  MIB.copyImplicitOps(*MI);
1119  MI->eraseFromParent();
1120  }
1121  }
1122  MI = MIB;
1123  } else if (Regs.size() == 1) {
1124  // If we adjusted the reg to PC from LR above, switch it back here. We
1125  // only do that for LDM.
1126  if (Regs[0] == ARM::PC)
1127  Regs[0] = ARM::LR;
1128  MachineInstrBuilder MIB =
1129  BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
1130  .addReg(ARM::SP, RegState::Define)
1131  .addReg(ARM::SP);
1132  // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1133  // that refactoring is complete (eventually).
1134  if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1135  MIB.addReg(0);
1137  } else
1138  MIB.addImm(4);
1139  MIB.add(predOps(ARMCC::AL));
1140  }
1141  Regs.clear();
1142 
1143  // Put any subsequent vpop instructions after this one: they will refer to
1144  // higher register numbers so need to be popped afterwards.
1145  if (MI != MBB.end())
1146  ++MI;
1147  }
1148 }
1149 
1150 /// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1151 /// starting from d8. Also insert stack realignment code and leave the stack
1152 /// pointer pointing to the d8 spill slot.
1155  unsigned NumAlignedDPRCS2Regs,
1156  const std::vector<CalleeSavedInfo> &CSI,
1157  const TargetRegisterInfo *TRI) {
1158  MachineFunction &MF = *MBB.getParent();
1160  DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1161  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1162  MachineFrameInfo &MFI = MF.getFrameInfo();
1163 
1164  // Mark the D-register spill slots as properly aligned. Since MFI computes
1165  // stack slot layout backwards, this can actually mean that the d-reg stack
1166  // slot offsets can be wrong. The offset for d8 will always be correct.
1167  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1168  unsigned DNum = CSI[i].getReg() - ARM::D8;
1169  if (DNum > NumAlignedDPRCS2Regs - 1)
1170  continue;
1171  int FI = CSI[i].getFrameIdx();
1172  // The even-numbered registers will be 16-byte aligned, the odd-numbered
1173  // registers will be 8-byte aligned.
1174  MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16);
1175 
1176  // The stack slot for D8 needs to be maximally aligned because this is
1177  // actually the point where we align the stack pointer. MachineFrameInfo
1178  // computes all offsets relative to the incoming stack pointer which is a
1179  // bit weird when realigning the stack. Any extra padding for this
1180  // over-alignment is not realized because the code inserted below adjusts
1181  // the stack pointer by numregs * 8 before aligning the stack pointer.
1182  if (DNum == 0)
1183  MFI.setObjectAlignment(FI, MFI.getMaxAlignment());
1184  }
1185 
1186  // Move the stack pointer to the d8 spill slot, and align it at the same
1187  // time. Leave the stack slot address in the scratch register r4.
1188  //
1189  // sub r4, sp, #numregs * 8
1190  // bic r4, r4, #align - 1
1191  // mov sp, r4
1192  //
1193  bool isThumb = AFI->isThumbFunction();
1194  assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1195  AFI->setShouldRestoreSPFromFP(true);
1196 
1197  // sub r4, sp, #numregs * 8
1198  // The immediate is <= 64, so it doesn't need any special encoding.
1199  unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1200  BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1201  .addReg(ARM::SP)
1202  .addImm(8 * NumAlignedDPRCS2Regs)
1203  .add(predOps(ARMCC::AL))
1204  .add(condCodeOp());
1205 
1206  unsigned MaxAlign = MF.getFrameInfo().getMaxAlignment();
1207  // We must set parameter MustBeSingleInstruction to true, since
1208  // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1209  // stack alignment. Luckily, this can always be done since all ARM
1210  // architecture versions that support Neon also support the BFC
1211  // instruction.
1212  emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
1213 
1214  // mov sp, r4
1215  // The stack pointer must be adjusted before spilling anything, otherwise
1216  // the stack slots could be clobbered by an interrupt handler.
1217  // Leave r4 live, it is used below.
1218  Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
1219  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
1220  .addReg(ARM::R4)
1221  .add(predOps(ARMCC::AL));
1222  if (!isThumb)
1223  MIB.add(condCodeOp());
1224 
1225  // Now spill NumAlignedDPRCS2Regs registers starting from d8.
1226  // r4 holds the stack slot address.
1227  unsigned NextReg = ARM::D8;
1228 
1229  // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
1230  // The writeback is only needed when emitting two vst1.64 instructions.
1231  if (NumAlignedDPRCS2Regs >= 6) {
1232  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1233  &ARM::QQPRRegClass);
1234  MBB.addLiveIn(SupReg);
1235  BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
1236  .addReg(ARM::R4, RegState::Kill)
1237  .addImm(16)
1238  .addReg(NextReg)
1239  .addReg(SupReg, RegState::ImplicitKill)
1240  .add(predOps(ARMCC::AL));
1241  NextReg += 4;
1242  NumAlignedDPRCS2Regs -= 4;
1243  }
1244 
1245  // We won't modify r4 beyond this point. It currently points to the next
1246  // register to be spilled.
1247  unsigned R4BaseReg = NextReg;
1248 
1249  // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
1250  if (NumAlignedDPRCS2Regs >= 4) {
1251  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1252  &ARM::QQPRRegClass);
1253  MBB.addLiveIn(SupReg);
1254  BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
1255  .addReg(ARM::R4)
1256  .addImm(16)
1257  .addReg(NextReg)
1258  .addReg(SupReg, RegState::ImplicitKill)
1259  .add(predOps(ARMCC::AL));
1260  NextReg += 4;
1261  NumAlignedDPRCS2Regs -= 4;
1262  }
1263 
1264  // 16-byte aligned vst1.64 with 2 d-regs.
1265  if (NumAlignedDPRCS2Regs >= 2) {
1266  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1267  &ARM::QPRRegClass);
1268  MBB.addLiveIn(SupReg);
1269  BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
1270  .addReg(ARM::R4)
1271  .addImm(16)
1272  .addReg(SupReg)
1273  .add(predOps(ARMCC::AL));
1274  NextReg += 2;
1275  NumAlignedDPRCS2Regs -= 2;
1276  }
1277 
1278  // Finally, use a vanilla vstr.64 for the odd last register.
1279  if (NumAlignedDPRCS2Regs) {
1280  MBB.addLiveIn(NextReg);
1281  // vstr.64 uses addrmode5 which has an offset scale of 4.
1282  BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
1283  .addReg(NextReg)
1284  .addReg(ARM::R4)
1285  .addImm((NextReg - R4BaseReg) * 2)
1286  .add(predOps(ARMCC::AL));
1287  }
1288 
1289  // The last spill instruction inserted should kill the scratch register r4.
1290  std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1291 }
1292 
1293 /// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
1294 /// iterator to the following instruction.
1297  unsigned NumAlignedDPRCS2Regs) {
1298  // sub r4, sp, #numregs * 8
1299  // bic r4, r4, #align - 1
1300  // mov sp, r4
1301  ++MI; ++MI; ++MI;
1302  assert(MI->mayStore() && "Expecting spill instruction");
1303 
1304  // These switches all fall through.
1305  switch(NumAlignedDPRCS2Regs) {
1306  case 7:
1307  ++MI;
1308  assert(MI->mayStore() && "Expecting spill instruction");
1310  default:
1311  ++MI;
1312  assert(MI->mayStore() && "Expecting spill instruction");
1314  case 1:
1315  case 2:
1316  case 4:
1317  assert(MI->killsRegister(ARM::R4) && "Missed kill flag");
1318  ++MI;
1319  }
1320  return MI;
1321 }
1322 
1323 /// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
1324 /// starting from d8. These instructions are assumed to execute while the
1325 /// stack is still aligned, unlike the code inserted by emitPopInst.
1328  unsigned NumAlignedDPRCS2Regs,
1329  const std::vector<CalleeSavedInfo> &CSI,
1330  const TargetRegisterInfo *TRI) {
1331  MachineFunction &MF = *MBB.getParent();
1333  DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1334  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1335 
1336  // Find the frame index assigned to d8.
1337  int D8SpillFI = 0;
1338  for (unsigned i = 0, e = CSI.size(); i != e; ++i)
1339  if (CSI[i].getReg() == ARM::D8) {
1340  D8SpillFI = CSI[i].getFrameIdx();
1341  break;
1342  }
1343 
1344  // Materialize the address of the d8 spill slot into the scratch register r4.
1345  // This can be fairly complicated if the stack frame is large, so just use
1346  // the normal frame index elimination mechanism to do it. This code runs as
1347  // the initial part of the epilog where the stack and base pointers haven't
1348  // been changed yet.
1349  bool isThumb = AFI->isThumbFunction();
1350  assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1351 
1352  unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
1353  BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1354  .addFrameIndex(D8SpillFI)
1355  .addImm(0)
1356  .add(predOps(ARMCC::AL))
1357  .add(condCodeOp());
1358 
1359  // Now restore NumAlignedDPRCS2Regs registers starting from d8.
1360  unsigned NextReg = ARM::D8;
1361 
1362  // 16-byte aligned vld1.64 with 4 d-regs and writeback.
1363  if (NumAlignedDPRCS2Regs >= 6) {
1364  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1365  &ARM::QQPRRegClass);
1366  BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
1367  .addReg(ARM::R4, RegState::Define)
1369  .addImm(16)
1371  .add(predOps(ARMCC::AL));
1372  NextReg += 4;
1373  NumAlignedDPRCS2Regs -= 4;
1374  }
1375 
1376  // We won't modify r4 beyond this point. It currently points to the next
1377  // register to be spilled.
1378  unsigned R4BaseReg = NextReg;
1379 
1380  // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
1381  if (NumAlignedDPRCS2Regs >= 4) {
1382  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1383  &ARM::QQPRRegClass);
1384  BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
1385  .addReg(ARM::R4)
1386  .addImm(16)
1388  .add(predOps(ARMCC::AL));
1389  NextReg += 4;
1390  NumAlignedDPRCS2Regs -= 4;
1391  }
1392 
1393  // 16-byte aligned vld1.64 with 2 d-regs.
1394  if (NumAlignedDPRCS2Regs >= 2) {
1395  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1396  &ARM::QPRRegClass);
1397  BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
1398  .addReg(ARM::R4)
1399  .addImm(16)
1400  .add(predOps(ARMCC::AL));
1401  NextReg += 2;
1402  NumAlignedDPRCS2Regs -= 2;
1403  }
1404 
1405  // Finally, use a vanilla vldr.64 for the remaining odd register.
1406  if (NumAlignedDPRCS2Regs)
1407  BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
1408  .addReg(ARM::R4)
1409  .addImm(2 * (NextReg - R4BaseReg))
1410  .add(predOps(ARMCC::AL));
1411 
1412  // Last store kills r4.
1413  std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1414 }
1415 
1418  const std::vector<CalleeSavedInfo> &CSI,
1419  const TargetRegisterInfo *TRI) const {
1420  if (CSI.empty())
1421  return false;
1422 
1423  MachineFunction &MF = *MBB.getParent();
1425 
1426  unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
1427  unsigned PushOneOpc = AFI->isThumbFunction() ?
1428  ARM::t2STR_PRE : ARM::STR_PRE_IMM;
1429  unsigned FltOpc = ARM::VSTMDDB_UPD;
1430  unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1431  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,
1433  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,
1435  emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
1436  NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
1437 
1438  // The code above does not insert spill code for the aligned DPRCS2 registers.
1439  // The stack realignment code will be inserted between the push instructions
1440  // and these spills.
1441  if (NumAlignedDPRCS2Regs)
1442  emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
1443 
1444  return true;
1445 }
1446 
1449  std::vector<CalleeSavedInfo> &CSI,
1450  const TargetRegisterInfo *TRI) const {
1451  if (CSI.empty())
1452  return false;
1453 
1454  MachineFunction &MF = *MBB.getParent();
1456  bool isVarArg = AFI->getArgRegsSaveSize() > 0;
1457  unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1458 
1459  // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
1460  // registers. Do that here instead.
1461  if (NumAlignedDPRCS2Regs)
1462  emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
1463 
1464  unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
1465  unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
1466  unsigned FltOpc = ARM::VLDMDIA_UPD;
1467  emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
1468  NumAlignedDPRCS2Regs);
1469  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1470  &isARMArea2Register, 0);
1471  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1472  &isARMArea1Register, 0);
1473 
1474  return true;
1475 }
1476 
1477 // FIXME: Make generic?
1478 static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
1479  const ARMBaseInstrInfo &TII) {
1480  unsigned FnSize = 0;
1481  for (auto &MBB : MF) {
1482  for (auto &MI : MBB)
1483  FnSize += TII.getInstSizeInBytes(MI);
1484  }
1485  return FnSize;
1486 }
1487 
1488 /// estimateRSStackSizeLimit - Look at each instruction that references stack
1489 /// frames and return the stack size limit beyond which some of these
1490 /// instructions will require a scratch register during their expansion later.
1491 // FIXME: Move to TII?
1493  const TargetFrameLowering *TFI) {
1494  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1495  unsigned Limit = (1 << 12) - 1;
1496  for (auto &MBB : MF) {
1497  for (auto &MI : MBB) {
1498  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1499  if (!MI.getOperand(i).isFI())
1500  continue;
1501 
1502  // When using ADDri to get the address of a stack object, 255 is the
1503  // largest offset guaranteed to fit in the immediate offset.
1504  if (MI.getOpcode() == ARM::ADDri) {
1505  Limit = std::min(Limit, (1U << 8) - 1);
1506  break;
1507  }
1508 
1509  // Otherwise check the addressing mode.
1510  switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
1511  case ARMII::AddrMode3:
1512  case ARMII::AddrModeT2_i8:
1513  Limit = std::min(Limit, (1U << 8) - 1);
1514  break;
1515  case ARMII::AddrMode5:
1518  Limit = std::min(Limit, ((1U << 8) - 1) * 4);
1519  break;
1520  case ARMII::AddrModeT2_i12:
1521  // i12 supports only positive offset so these will be converted to
1522  // i8 opcodes. See llvm::rewriteT2FrameIndex.
1523  if (TFI->hasFP(MF) && AFI->hasStackFrame())
1524  Limit = std::min(Limit, (1U << 8) - 1);
1525  break;
1526  case ARMII::AddrMode4:
1527  case ARMII::AddrMode6:
1528  // Addressing modes 4 & 6 (load/store) instructions can't encode an
1529  // immediate offset for stack references.
1530  return 0;
1531  default:
1532  break;
1533  }
1534  break; // At most one FI per instruction
1535  }
1536  }
1537  }
1538 
1539  return Limit;
1540 }
1541 
1542 // In functions that realign the stack, it can be an advantage to spill the
1543 // callee-saved vector registers after realigning the stack. The vst1 and vld1
1544 // instructions take alignment hints that can improve performance.
1545 static void
1547  MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
1548  if (!SpillAlignedNEONRegs)
1549  return;
1550 
1551  // Naked functions don't spill callee-saved registers.
1552  if (MF.getFunction().hasFnAttribute(Attribute::Naked))
1553  return;
1554 
1555  // We are planning to use NEON instructions vst1 / vld1.
1556  if (!static_cast<const ARMSubtarget &>(MF.getSubtarget()).hasNEON())
1557  return;
1558 
1559  // Don't bother if the default stack alignment is sufficiently high.
1560  if (MF.getSubtarget().getFrameLowering()->getStackAlignment() >= 8)
1561  return;
1562 
1563  // Aligned spills require stack realignment.
1564  if (!static_cast<const ARMBaseRegisterInfo *>(
1565  MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))
1566  return;
1567 
1568  // We always spill contiguous d-registers starting from d8. Count how many
1569  // needs spilling. The register allocator will almost always use the
1570  // callee-saved registers in order, but it can happen that there are holes in
1571  // the range. Registers above the hole will be spilled to the standard DPRCS
1572  // area.
1573  unsigned NumSpills = 0;
1574  for (; NumSpills < 8; ++NumSpills)
1575  if (!SavedRegs.test(ARM::D8 + NumSpills))
1576  break;
1577 
1578  // Don't do this for just one d-register. It's not worth it.
1579  if (NumSpills < 2)
1580  return;
1581 
1582  // Spill the first NumSpills D-registers after realigning the stack.
1583  MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
1584 
1585  // A scratch register is required for the vst1 / vld1 instructions.
1586  SavedRegs.set(ARM::R4);
1587 }
1588 
1590  BitVector &SavedRegs,
1591  RegScavenger *RS) const {
1592  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1593  // This tells PEI to spill the FP as if it is any other callee-save register
1594  // to take advantage the eliminateFrameIndex machinery. This also ensures it
1595  // is spilled in the order specified by getCalleeSavedRegs() to make it easier
1596  // to combine multiple loads / stores.
1597  bool CanEliminateFrame = true;
1598  bool CS1Spilled = false;
1599  bool LRSpilled = false;
1600  unsigned NumGPRSpills = 0;
1601  unsigned NumFPRSpills = 0;
1602  SmallVector<unsigned, 4> UnspilledCS1GPRs;
1603  SmallVector<unsigned, 4> UnspilledCS2GPRs;
1604  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1605  MF.getSubtarget().getRegisterInfo());
1606  const ARMBaseInstrInfo &TII =
1607  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1609  MachineFrameInfo &MFI = MF.getFrameInfo();
1612  (void)TRI; // Silence unused warning in non-assert builds.
1613  unsigned FramePtr = RegInfo->getFrameRegister(MF);
1614 
1615  // Spill R4 if Thumb2 function requires stack realignment - it will be used as
1616  // scratch register. Also spill R4 if Thumb2 function has varsized objects,
1617  // since it's not always possible to restore sp from fp in a single
1618  // instruction.
1619  // FIXME: It will be better just to find spare register here.
1620  if (AFI->isThumb2Function() &&
1621  (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
1622  SavedRegs.set(ARM::R4);
1623 
1624  // If a stack probe will be emitted, spill R4 and LR, since they are
1625  // clobbered by the stack probe call.
1626  // This estimate should be a safe, conservative estimate. The actual
1627  // stack probe is enabled based on the size of the local objects;
1628  // this estimate also includes the varargs store size.
1629  if (STI.isTargetWindows() &&
1630  WindowsRequiresStackProbe(MF, MFI.estimateStackSize(MF))) {
1631  SavedRegs.set(ARM::R4);
1632  SavedRegs.set(ARM::LR);
1633  }
1634 
1635  if (AFI->isThumb1OnlyFunction()) {
1636  // Spill LR if Thumb1 function uses variable length argument lists.
1637  if (AFI->getArgRegsSaveSize() > 0)
1638  SavedRegs.set(ARM::LR);
1639 
1640  // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
1641  // requires stack alignment. We don't know for sure what the stack size
1642  // will be, but for this, an estimate is good enough. If there anything
1643  // changes it, it'll be a spill, which implies we've used all the registers
1644  // and so R4 is already used, so not marking it here will be OK.
1645  // FIXME: It will be better just to find spare register here.
1646  if (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF) ||
1647  MFI.estimateStackSize(MF) > 508)
1648  SavedRegs.set(ARM::R4);
1649  }
1650 
1651  // See if we can spill vector registers to aligned stack.
1652  checkNumAlignedDPRCS2Regs(MF, SavedRegs);
1653 
1654  // Spill the BasePtr if it's used.
1655  if (RegInfo->hasBasePointer(MF))
1656  SavedRegs.set(RegInfo->getBaseRegister());
1657 
1658  // Don't spill FP if the frame can be eliminated. This is determined
1659  // by scanning the callee-save registers to see if any is modified.
1660  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
1661  for (unsigned i = 0; CSRegs[i]; ++i) {
1662  unsigned Reg = CSRegs[i];
1663  bool Spilled = false;
1664  if (SavedRegs.test(Reg)) {
1665  Spilled = true;
1666  CanEliminateFrame = false;
1667  }
1668 
1669  if (!ARM::GPRRegClass.contains(Reg)) {
1670  if (Spilled) {
1671  if (ARM::SPRRegClass.contains(Reg))
1672  NumFPRSpills++;
1673  else if (ARM::DPRRegClass.contains(Reg))
1674  NumFPRSpills += 2;
1675  else if (ARM::QPRRegClass.contains(Reg))
1676  NumFPRSpills += 4;
1677  }
1678  continue;
1679  }
1680 
1681  if (Spilled) {
1682  NumGPRSpills++;
1683 
1684  if (!STI.splitFramePushPop(MF)) {
1685  if (Reg == ARM::LR)
1686  LRSpilled = true;
1687  CS1Spilled = true;
1688  continue;
1689  }
1690 
1691  // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
1692  switch (Reg) {
1693  case ARM::LR:
1694  LRSpilled = true;
1696  case ARM::R0: case ARM::R1:
1697  case ARM::R2: case ARM::R3:
1698  case ARM::R4: case ARM::R5:
1699  case ARM::R6: case ARM::R7:
1700  CS1Spilled = true;
1701  break;
1702  default:
1703  break;
1704  }
1705  } else {
1706  if (!STI.splitFramePushPop(MF)) {
1707  UnspilledCS1GPRs.push_back(Reg);
1708  continue;
1709  }
1710 
1711  switch (Reg) {
1712  case ARM::R0: case ARM::R1:
1713  case ARM::R2: case ARM::R3:
1714  case ARM::R4: case ARM::R5:
1715  case ARM::R6: case ARM::R7:
1716  case ARM::LR:
1717  UnspilledCS1GPRs.push_back(Reg);
1718  break;
1719  default:
1720  UnspilledCS2GPRs.push_back(Reg);
1721  break;
1722  }
1723  }
1724  }
1725 
1726  bool ForceLRSpill = false;
1727  if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
1728  unsigned FnSize = GetFunctionSizeInBytes(MF, TII);
1729  // Force LR to be spilled if the Thumb function size is > 2048. This enables
1730  // use of BL to implement far jump. If it turns out that it's not needed
1731  // then the branch fix up path will undo it.
1732  if (FnSize >= (1 << 11)) {
1733  CanEliminateFrame = false;
1734  ForceLRSpill = true;
1735  }
1736  }
1737 
1738  // If any of the stack slot references may be out of range of an immediate
1739  // offset, make sure a register (or a spill slot) is available for the
1740  // register scavenger. Note that if we're indexing off the frame pointer, the
1741  // effective stack size is 4 bytes larger since the FP points to the stack
1742  // slot of the previous FP. Also, if we have variable sized objects in the
1743  // function, stack slot references will often be negative, and some of
1744  // our instructions are positive-offset only, so conservatively consider
1745  // that case to want a spill slot (or register) as well. Similarly, if
1746  // the function adjusts the stack pointer during execution and the
1747  // adjustments aren't already part of our stack size estimate, our offset
1748  // calculations may be off, so be conservative.
1749  // FIXME: We could add logic to be more precise about negative offsets
1750  // and which instructions will need a scratch register for them. Is it
1751  // worth the effort and added fragility?
1752  unsigned EstimatedStackSize =
1753  MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
1754 
1755  // Determine biggest (positive) SP offset in MachineFrameInfo.
1756  int MaxFixedOffset = 0;
1757  for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
1758  int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
1759  MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
1760  }
1761 
1762  bool HasFP = hasFP(MF);
1763  if (HasFP) {
1764  if (AFI->hasStackFrame())
1765  EstimatedStackSize += 4;
1766  } else {
1767  // If FP is not used, SP will be used to access arguments, so count the
1768  // size of arguments into the estimation.
1769  EstimatedStackSize += MaxFixedOffset;
1770  }
1771  EstimatedStackSize += 16; // For possible paddings.
1772 
1773  unsigned EstimatedRSStackSizeLimit = estimateRSStackSizeLimit(MF, this);
1774  int MaxFPOffset = getMaxFPOffset(MF.getFunction(), *AFI);
1775  bool BigFrameOffsets = EstimatedStackSize >= EstimatedRSStackSizeLimit ||
1776  MFI.hasVarSizedObjects() ||
1777  (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF)) ||
1778  // For large argument stacks fp relative addressed may overflow.
1779  (HasFP && (MaxFixedOffset - MaxFPOffset) >= (int)EstimatedRSStackSizeLimit);
1780  if (BigFrameOffsets ||
1781  !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
1782  AFI->setHasStackFrame(true);
1783 
1784  if (HasFP) {
1785  SavedRegs.set(FramePtr);
1786  // If the frame pointer is required by the ABI, also spill LR so that we
1787  // emit a complete frame record.
1788  if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) {
1789  SavedRegs.set(ARM::LR);
1790  LRSpilled = true;
1791  NumGPRSpills++;
1792  auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
1793  if (LRPos != UnspilledCS1GPRs.end())
1794  UnspilledCS1GPRs.erase(LRPos);
1795  }
1796  auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);
1797  if (FPPos != UnspilledCS1GPRs.end())
1798  UnspilledCS1GPRs.erase(FPPos);
1799  NumGPRSpills++;
1800  if (FramePtr == ARM::R7)
1801  CS1Spilled = true;
1802  }
1803 
1804  // This is true when we inserted a spill for an unused register that can now
1805  // be used for register scavenging.
1806  bool ExtraCSSpill = false;
1807 
1808  if (AFI->isThumb1OnlyFunction()) {
1809  // For Thumb1-only targets, we need some low registers when we save and
1810  // restore the high registers (which aren't allocatable, but could be
1811  // used by inline assembly) because the push/pop instructions can not
1812  // access high registers. If necessary, we might need to push more low
1813  // registers to ensure that there is at least one free that can be used
1814  // for the saving & restoring, and preferably we should ensure that as
1815  // many as are needed are available so that fewer push/pop instructions
1816  // are required.
1817 
1818  // Low registers which are not currently pushed, but could be (r4-r7).
1819  SmallVector<unsigned, 4> AvailableRegs;
1820 
1821  // Unused argument registers (r0-r3) can be clobbered in the prologue for
1822  // free.
1823  int EntryRegDeficit = 0;
1824  for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
1825  if (!MF.getRegInfo().isLiveIn(Reg)) {
1826  --EntryRegDeficit;
1827  LLVM_DEBUG(dbgs()
1828  << printReg(Reg, TRI)
1829  << " is unused argument register, EntryRegDeficit = "
1830  << EntryRegDeficit << "\n");
1831  }
1832  }
1833 
1834  // Unused return registers can be clobbered in the epilogue for free.
1835  int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
1836  LLVM_DEBUG(dbgs() << AFI->getReturnRegsCount()
1837  << " return regs used, ExitRegDeficit = "
1838  << ExitRegDeficit << "\n");
1839 
1840  int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);
1841  LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
1842 
1843  // r4-r6 can be used in the prologue if they are pushed by the first push
1844  // instruction.
1845  for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
1846  if (SavedRegs.test(Reg)) {
1847  --RegDeficit;
1848  LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
1849  << " is saved low register, RegDeficit = "
1850  << RegDeficit << "\n");
1851  } else {
1852  AvailableRegs.push_back(Reg);
1853  LLVM_DEBUG(
1854  dbgs()
1855  << printReg(Reg, TRI)
1856  << " is non-saved low register, adding to AvailableRegs\n");
1857  }
1858  }
1859 
1860  // r7 can be used if it is not being used as the frame pointer.
1861  if (!HasFP) {
1862  if (SavedRegs.test(ARM::R7)) {
1863  --RegDeficit;
1864  LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
1865  << RegDeficit << "\n");
1866  } else {
1867  AvailableRegs.push_back(ARM::R7);
1868  LLVM_DEBUG(
1869  dbgs()
1870  << "%r7 is non-saved low register, adding to AvailableRegs\n");
1871  }
1872  }
1873 
1874  // Each of r8-r11 needs to be copied to a low register, then pushed.
1875  for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
1876  if (SavedRegs.test(Reg)) {
1877  ++RegDeficit;
1878  LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
1879  << " is saved high register, RegDeficit = "
1880  << RegDeficit << "\n");
1881  }
1882  }
1883 
1884  // LR can only be used by PUSH, not POP, and can't be used at all if the
1885  // llvm.returnaddress intrinsic is used. This is only worth doing if we
1886  // are more limited at function entry than exit.
1887  if ((EntryRegDeficit > ExitRegDeficit) &&
1888  !(MF.getRegInfo().isLiveIn(ARM::LR) &&
1890  if (SavedRegs.test(ARM::LR)) {
1891  --RegDeficit;
1892  LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
1893  << RegDeficit << "\n");
1894  } else {
1895  AvailableRegs.push_back(ARM::LR);
1896  LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
1897  }
1898  }
1899 
1900  // If there are more high registers that need pushing than low registers
1901  // available, push some more low registers so that we can use fewer push
1902  // instructions. This might not reduce RegDeficit all the way to zero,
1903  // because we can only guarantee that r4-r6 are available, but r8-r11 may
1904  // need saving.
1905  LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
1906  for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
1907  unsigned Reg = AvailableRegs.pop_back_val();
1908  LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
1909  << " to make up reg deficit\n");
1910  SavedRegs.set(Reg);
1911  NumGPRSpills++;
1912  CS1Spilled = true;
1913  assert(!MRI.isReserved(Reg) && "Should not be reserved");
1914  if (!MRI.isPhysRegUsed(Reg))
1915  ExtraCSSpill = true;
1916  UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
1917  if (Reg == ARM::LR)
1918  LRSpilled = true;
1919  }
1920  LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
1921  << "\n");
1922  }
1923 
1924  // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
1925  // restore LR in that case.
1926  bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
1927 
1928  // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
1929  // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
1930  if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
1931  SavedRegs.set(ARM::LR);
1932  NumGPRSpills++;
1934  LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
1935  if (LRPos != UnspilledCS1GPRs.end())
1936  UnspilledCS1GPRs.erase(LRPos);
1937 
1938  ForceLRSpill = false;
1939  if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR))
1940  ExtraCSSpill = true;
1941  }
1942 
1943  // If stack and double are 8-byte aligned and we are spilling an odd number
1944  // of GPRs, spill one extra callee save GPR so we won't have to pad between
1945  // the integer and double callee save areas.
1946  LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
1947  unsigned TargetAlign = getStackAlignment();
1948  if (TargetAlign >= 8 && (NumGPRSpills & 1)) {
1949  if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
1950  for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
1951  unsigned Reg = UnspilledCS1GPRs[i];
1952  // Don't spill high register if the function is thumb. In the case of
1953  // Windows on ARM, accept R11 (frame pointer)
1954  if (!AFI->isThumbFunction() ||
1955  (STI.isTargetWindows() && Reg == ARM::R11) ||
1956  isARMLowRegister(Reg) ||
1957  (Reg == ARM::LR && !ExpensiveLRRestore)) {
1958  SavedRegs.set(Reg);
1959  LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
1960  << " to make up alignment\n");
1961  if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
1962  ExtraCSSpill = true;
1963  break;
1964  }
1965  }
1966  } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
1967  unsigned Reg = UnspilledCS2GPRs.front();
1968  SavedRegs.set(Reg);
1969  LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
1970  << " to make up alignment\n");
1971  if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
1972  ExtraCSSpill = true;
1973  }
1974  }
1975 
1976  // Estimate if we might need to scavenge a register at some point in order
1977  // to materialize a stack offset. If so, either spill one additional
1978  // callee-saved register or reserve a special spill slot to facilitate
1979  // register scavenging. Thumb1 needs a spill slot for stack pointer
1980  // adjustments also, even when the frame itself is small.
1981  if (BigFrameOffsets && !ExtraCSSpill) {
1982  // If any non-reserved CS register isn't spilled, just spill one or two
1983  // extra. That should take care of it!
1984  unsigned NumExtras = TargetAlign / 4;
1985  SmallVector<unsigned, 2> Extras;
1986  while (NumExtras && !UnspilledCS1GPRs.empty()) {
1987  unsigned Reg = UnspilledCS1GPRs.back();
1988  UnspilledCS1GPRs.pop_back();
1989  if (!MRI.isReserved(Reg) &&
1990  (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
1991  Reg == ARM::LR)) {
1992  Extras.push_back(Reg);
1993  NumExtras--;
1994  }
1995  }
1996  // For non-Thumb1 functions, also check for hi-reg CS registers
1997  if (!AFI->isThumb1OnlyFunction()) {
1998  while (NumExtras && !UnspilledCS2GPRs.empty()) {
1999  unsigned Reg = UnspilledCS2GPRs.back();
2000  UnspilledCS2GPRs.pop_back();
2001  if (!MRI.isReserved(Reg)) {
2002  Extras.push_back(Reg);
2003  NumExtras--;
2004  }
2005  }
2006  }
2007  if (NumExtras == 0) {
2008  for (unsigned Reg : Extras) {
2009  SavedRegs.set(Reg);
2010  if (!MRI.isPhysRegUsed(Reg))
2011  ExtraCSSpill = true;
2012  }
2013  }
2014  if (!ExtraCSSpill && !AFI->isThumb1OnlyFunction()) {
2015  // note: Thumb1 functions spill to R12, not the stack. Reserve a slot
2016  // closest to SP or frame pointer.
2017  assert(RS && "Register scavenging not provided");
2018  const TargetRegisterClass &RC = ARM::GPRRegClass;
2019  unsigned Size = TRI->getSpillSize(RC);
2020  unsigned Align = TRI->getSpillAlignment(RC);
2021  RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2022  }
2023  }
2024  }
2025 
2026  if (ForceLRSpill) {
2027  SavedRegs.set(ARM::LR);
2028  AFI->setLRIsSpilledForFarJump(true);
2029  }
2030 }
2031 
2032 MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
2035  const ARMBaseInstrInfo &TII =
2036  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2037  if (!hasReservedCallFrame(MF)) {
2038  // If we have alloca, convert as follows:
2039  // ADJCALLSTACKDOWN -> sub, sp, sp, amount
2040  // ADJCALLSTACKUP -> add, sp, sp, amount
2041  MachineInstr &Old = *I;
2042  DebugLoc dl = Old.getDebugLoc();
2043  unsigned Amount = TII.getFrameSize(Old);
2044  if (Amount != 0) {
2045  // We need to keep the stack aligned properly. To do this, we round the
2046  // amount of space needed for the outgoing arguments up to the next
2047  // alignment boundary.
2048  Amount = alignSPAdjust(Amount);
2049 
2051  assert(!AFI->isThumb1OnlyFunction() &&
2052  "This eliminateCallFramePseudoInstr does not support Thumb1!");
2053  bool isARM = !AFI->isThumbFunction();
2054 
2055  // Replace the pseudo instruction with a new instruction...
2056  unsigned Opc = Old.getOpcode();
2057  int PIdx = Old.findFirstPredOperandIdx();
2058  ARMCC::CondCodes Pred =
2059  (PIdx == -1) ? ARMCC::AL
2060  : (ARMCC::CondCodes)Old.getOperand(PIdx).getImm();
2061  unsigned PredReg = TII.getFramePred(Old);
2062  if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
2063  emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
2064  Pred, PredReg);
2065  } else {
2066  assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
2067  emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
2068  Pred, PredReg);
2069  }
2070  }
2071  }
2072  return MBB.erase(I);
2073 }
2074 
2075 /// Get the minimum constant for ARM that is greater than or equal to the
2076 /// argument. In ARM, constants can have any value that can be produced by
2077 /// rotating an 8-bit value to the right by an even number of bits within a
2078 /// 32-bit word.
2080  unsigned Shifted = 0;
2081 
2082  if (Value == 0)
2083  return 0;
2084 
2085  while (!(Value & 0xC0000000)) {
2086  Value = Value << 2;
2087  Shifted += 2;
2088  }
2089 
2090  bool Carry = (Value & 0x00FFFFFF);
2091  Value = ((Value & 0xFF000000) >> 24) + Carry;
2092 
2093  if (Value & 0x0000100)
2094  Value = Value & 0x000001FC;
2095 
2096  if (Shifted > 24)
2097  Value = Value >> (Shifted - 24);
2098  else
2099  Value = Value << (24 - Shifted);
2100 
2101  return Value;
2102 }
2103 
2104 // The stack limit in the TCB is set to this many bytes above the actual
2105 // stack limit.
2106 static const uint64_t kSplitStackAvailable = 256;
2107 
2108 // Adjust the function prologue to enable split stacks. This currently only
2109 // supports android and linux.
2110 //
2111 // The ABI of the segmented stack prologue is a little arbitrarily chosen, but
2112 // must be well defined in order to allow for consistent implementations of the
2113 // __morestack helper function. The ABI is also not a normal ABI in that it
2114 // doesn't follow the normal calling conventions because this allows the
2115 // prologue of each function to be optimized further.
2116 //
2117 // Currently, the ABI looks like (when calling __morestack)
2118 //
2119 // * r4 holds the minimum stack size requested for this function call
2120 // * r5 holds the stack size of the arguments to the function
2121 // * the beginning of the function is 3 instructions after the call to
2122 // __morestack
2123 //
2124 // Implementations of __morestack should use r4 to allocate a new stack, r5 to
2125 // place the arguments on to the new stack, and the 3-instruction knowledge to
2126 // jump directly to the body of the function when working on the new stack.
2127 //
2128 // An old (and possibly no longer compatible) implementation of __morestack for
2129 // ARM can be found at [1].
2130 //
2131 // [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
2133  MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
2134  unsigned Opcode;
2135  unsigned CFIIndex;
2136  const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
2137  bool Thumb = ST->isThumb();
2138 
2139  // Sadly, this currently doesn't support varargs, platforms other than
2140  // android/linux. Note that thumb1/thumb2 are support for android/linux.
2141  if (MF.getFunction().isVarArg())
2142  report_fatal_error("Segmented stacks do not support vararg functions.");
2143  if (!ST->isTargetAndroid() && !ST->isTargetLinux())
2144  report_fatal_error("Segmented stacks not supported on this platform.");
2145 
2146  MachineFrameInfo &MFI = MF.getFrameInfo();
2147  MachineModuleInfo &MMI = MF.getMMI();
2148  MCContext &Context = MMI.getContext();
2149  const MCRegisterInfo *MRI = Context.getRegisterInfo();
2150  const ARMBaseInstrInfo &TII =
2151  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2152  ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
2153  DebugLoc DL;
2154 
2155  uint64_t StackSize = MFI.getStackSize();
2156 
2157  // Do not generate a prologue for leaf functions with a stack of size zero.
2158  // For non-leaf functions we have to allow for the possibility that the
2159  // callis to a non-split function, as in PR37807. This function could also
2160  // take the address of a non-split function. When the linker tries to adjust
2161  // its non-existent prologue, it would fail with an error. Mark the object
2162  // file so that such failures are not errors. See this Go language bug-report
2163  // https://go-review.googlesource.com/c/go/+/148819/
2164  if (StackSize == 0 && !MFI.hasTailCall()) {
2165  MF.getMMI().setHasNosplitStack(true);
2166  return;
2167  }
2168 
2169  // Use R4 and R5 as scratch registers.
2170  // We save R4 and R5 before use and restore them before leaving the function.
2171  unsigned ScratchReg0 = ARM::R4;
2172  unsigned ScratchReg1 = ARM::R5;
2173  uint64_t AlignedStackSize;
2174 
2175  MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
2176  MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
2177  MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock();
2180 
2181  // Grab everything that reaches PrologueMBB to update there liveness as well.
2182  SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
2184  WalkList.push_back(&PrologueMBB);
2185 
2186  do {
2187  MachineBasicBlock *CurMBB = WalkList.pop_back_val();
2188  for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
2189  if (BeforePrologueRegion.insert(PredBB).second)
2190  WalkList.push_back(PredBB);
2191  }
2192  } while (!WalkList.empty());
2193 
2194  // The order in that list is important.
2195  // The blocks will all be inserted before PrologueMBB using that order.
2196  // Therefore the block that should appear first in the CFG should appear
2197  // first in the list.
2198  MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
2199  PostStackMBB};
2200 
2201  for (MachineBasicBlock *B : AddedBlocks)
2202  BeforePrologueRegion.insert(B);
2203 
2204  for (const auto &LI : PrologueMBB.liveins()) {
2205  for (MachineBasicBlock *PredBB : BeforePrologueRegion)
2206  PredBB->addLiveIn(LI);
2207  }
2208 
2209  // Remove the newly added blocks from the list, since we know
2210  // we do not have to do the following updates for them.
2211  for (MachineBasicBlock *B : AddedBlocks) {
2212  BeforePrologueRegion.erase(B);
2213  MF.insert(PrologueMBB.getIterator(), B);
2214  }
2215 
2216  for (MachineBasicBlock *MBB : BeforePrologueRegion) {
2217  // Make sure the LiveIns are still sorted and unique.
2218  MBB->sortUniqueLiveIns();
2219  // Replace the edges to PrologueMBB by edges to the sequences
2220  // we are about to add.
2221  MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
2222  }
2223 
2224  // The required stack size that is aligned to ARM constant criterion.
2225  AlignedStackSize = alignToARMConstant(StackSize);
2226 
2227  // When the frame size is less than 256 we just compare the stack
2228  // boundary directly to the value of the stack pointer, per gcc.
2229  bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
2230 
2231  // We will use two of the callee save registers as scratch registers so we
2232  // need to save those registers onto the stack.
2233  // We will use SR0 to hold stack limit and SR1 to hold the stack size
2234  // requested and arguments for __morestack().
2235  // SR0: Scratch Register #0
2236  // SR1: Scratch Register #1
2237  // push {SR0, SR1}
2238  if (Thumb) {
2239  BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
2240  .add(predOps(ARMCC::AL))
2241  .addReg(ScratchReg0)
2242  .addReg(ScratchReg1);
2243  } else {
2244  BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
2245  .addReg(ARM::SP, RegState::Define)
2246  .addReg(ARM::SP)
2247  .add(predOps(ARMCC::AL))
2248  .addReg(ScratchReg0)
2249  .addReg(ScratchReg1);
2250  }
2251 
2252  // Emit the relevant DWARF information about the change in stack pointer as
2253  // well as where to find both r4 and r5 (the callee-save registers)
2254  CFIIndex =
2256  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2257  .addCFIIndex(CFIIndex);
2259  nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
2260  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2261  .addCFIIndex(CFIIndex);
2263  nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
2264  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2265  .addCFIIndex(CFIIndex);
2266 
2267  // mov SR1, sp
2268  if (Thumb) {
2269  BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
2270  .addReg(ARM::SP)
2271  .add(predOps(ARMCC::AL));
2272  } else if (CompareStackPointer) {
2273  BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
2274  .addReg(ARM::SP)
2275  .add(predOps(ARMCC::AL))
2276  .add(condCodeOp());
2277  }
2278 
2279  // sub SR1, sp, #StackSize
2280  if (!CompareStackPointer && Thumb) {
2281  BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
2282  .add(condCodeOp())
2283  .addReg(ScratchReg1)
2284  .addImm(AlignedStackSize)
2285  .add(predOps(ARMCC::AL));
2286  } else if (!CompareStackPointer) {
2287  BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
2288  .addReg(ARM::SP)
2289  .addImm(AlignedStackSize)
2290  .add(predOps(ARMCC::AL))
2291  .add(condCodeOp());
2292  }
2293 
2294  if (Thumb && ST->isThumb1Only()) {
2295  unsigned PCLabelId = ARMFI->createPICLabelUId();
2297  MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0);
2299  unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4);
2300 
2301  // ldr SR0, [pc, offset(STACK_LIMIT)]
2302  BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
2303  .addConstantPoolIndex(CPI)
2304  .add(predOps(ARMCC::AL));
2305 
2306  // ldr SR0, [SR0]
2307  BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
2308  .addReg(ScratchReg0)
2309  .addImm(0)
2310  .add(predOps(ARMCC::AL));
2311  } else {
2312  // Get TLS base address from the coprocessor
2313  // mrc p15, #0, SR0, c13, c0, #3
2314  BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0)
2315  .addImm(15)
2316  .addImm(0)
2317  .addImm(13)
2318  .addImm(0)
2319  .addImm(3)
2320  .add(predOps(ARMCC::AL));
2321 
2322  // Use the last tls slot on android and a private field of the TCP on linux.
2323  assert(ST->isTargetAndroid() || ST->isTargetLinux());
2324  unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
2325 
2326  // Get the stack limit from the right offset
2327  // ldr SR0, [sr0, #4 * TlsOffset]
2328  BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0)
2329  .addReg(ScratchReg0)
2330  .addImm(4 * TlsOffset)
2331  .add(predOps(ARMCC::AL));
2332  }
2333 
2334  // Compare stack limit with stack size requested.
2335  // cmp SR0, SR1
2336  Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
2337  BuildMI(GetMBB, DL, TII.get(Opcode))
2338  .addReg(ScratchReg0)
2339  .addReg(ScratchReg1)
2340  .add(predOps(ARMCC::AL));
2341 
2342  // This jump is taken if StackLimit < SP - stack required.
2343  Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
2344  BuildMI(GetMBB, DL, TII.get(Opcode)).addMBB(PostStackMBB)
2345  .addImm(ARMCC::LO)
2346  .addReg(ARM::CPSR);
2347 
2348 
2349  // Calling __morestack(StackSize, Size of stack arguments).
2350  // __morestack knows that the stack size requested is in SR0(r4)
2351  // and amount size of stack arguments is in SR1(r5).
2352 
2353  // Pass first argument for the __morestack by Scratch Register #0.
2354  // The amount size of stack required
2355  if (Thumb) {
2356  BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
2357  .add(condCodeOp())
2358  .addImm(AlignedStackSize)
2359  .add(predOps(ARMCC::AL));
2360  } else {
2361  BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
2362  .addImm(AlignedStackSize)
2363  .add(predOps(ARMCC::AL))
2364  .add(condCodeOp());
2365  }
2366  // Pass second argument for the __morestack by Scratch Register #1.
2367  // The amount size of stack consumed to save function arguments.
2368  if (Thumb) {
2369  BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
2370  .add(condCodeOp())
2371  .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
2372  .add(predOps(ARMCC::AL));
2373  } else {
2374  BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
2375  .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
2376  .add(predOps(ARMCC::AL))
2377  .add(condCodeOp());
2378  }
2379 
2380  // push {lr} - Save return address of this function.
2381  if (Thumb) {
2382  BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
2383  .add(predOps(ARMCC::AL))
2384  .addReg(ARM::LR);
2385  } else {
2386  BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
2387  .addReg(ARM::SP, RegState::Define)
2388  .addReg(ARM::SP)
2389  .add(predOps(ARMCC::AL))
2390  .addReg(ARM::LR);
2391  }
2392 
2393  // Emit the DWARF info about the change in stack as well as where to find the
2394  // previous link register
2395  CFIIndex =
2397  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2398  .addCFIIndex(CFIIndex);
2400  nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
2401  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2402  .addCFIIndex(CFIIndex);
2403 
2404  // Call __morestack().
2405  if (Thumb) {
2406  BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
2407  .add(predOps(ARMCC::AL))
2408  .addExternalSymbol("__morestack");
2409  } else {
2410  BuildMI(AllocMBB, DL, TII.get(ARM::BL))
2411  .addExternalSymbol("__morestack");
2412  }
2413 
2414  // pop {lr} - Restore return address of this original function.
2415  if (Thumb) {
2416  if (ST->isThumb1Only()) {
2417  BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
2418  .add(predOps(ARMCC::AL))
2419  .addReg(ScratchReg0);
2420  BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
2421  .addReg(ScratchReg0)
2422  .add(predOps(ARMCC::AL));
2423  } else {
2424  BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
2425  .addReg(ARM::LR, RegState::Define)
2426  .addReg(ARM::SP, RegState::Define)
2427  .addReg(ARM::SP)
2428  .addImm(4)
2429  .add(predOps(ARMCC::AL));
2430  }
2431  } else {
2432  BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
2433  .addReg(ARM::SP, RegState::Define)
2434  .addReg(ARM::SP)
2435  .add(predOps(ARMCC::AL))
2436  .addReg(ARM::LR);
2437  }
2438 
2439  // Restore SR0 and SR1 in case of __morestack() was called.
2440  // __morestack() will skip PostStackMBB block so we need to restore
2441  // scratch registers from here.
2442  // pop {SR0, SR1}
2443  if (Thumb) {
2444  BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
2445  .add(predOps(ARMCC::AL))
2446  .addReg(ScratchReg0)
2447  .addReg(ScratchReg1);
2448  } else {
2449  BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
2450  .addReg(ARM::SP, RegState::Define)
2451  .addReg(ARM::SP)
2452  .add(predOps(ARMCC::AL))
2453  .addReg(ScratchReg0)
2454  .addReg(ScratchReg1);
2455  }
2456 
2457  // Update the CFA offset now that we've popped
2458  CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
2459  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2460  .addCFIIndex(CFIIndex);
2461 
2462  // Return from this function.
2463  BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
2464 
2465  // Restore SR0 and SR1 in case of __morestack() was not called.
2466  // pop {SR0, SR1}
2467  if (Thumb) {
2468  BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
2469  .add(predOps(ARMCC::AL))
2470  .addReg(ScratchReg0)
2471  .addReg(ScratchReg1);
2472  } else {
2473  BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
2474  .addReg(ARM::SP, RegState::Define)
2475  .addReg(ARM::SP)
2476  .add(predOps(ARMCC::AL))
2477  .addReg(ScratchReg0)
2478  .addReg(ScratchReg1);
2479  }
2480 
2481  // Update the CFA offset now that we've popped
2482  CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
2483  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2484  .addCFIIndex(CFIIndex);
2485 
2486  // Tell debuggers that r4 and r5 are now the same as they were in the
2487  // previous function, that they're the "Same Value".
2489  nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
2490  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2491  .addCFIIndex(CFIIndex);
2493  nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
2494  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2495  .addCFIIndex(CFIIndex);
2496 
2497  // Organizing MBB lists
2498  PostStackMBB->addSuccessor(&PrologueMBB);
2499 
2500  AllocMBB->addSuccessor(PostStackMBB);
2501 
2502  GetMBB->addSuccessor(PostStackMBB);
2503  GetMBB->addSuccessor(AllocMBB);
2504 
2505  McrMBB->addSuccessor(GetMBB);
2506 
2507  PrevStackMBB->addSuccessor(McrMBB);
2508 
2509 #ifdef EXPENSIVE_CHECKS
2510  MF.verify();
2511 #endif
2512 }
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:176
const MachineInstrBuilder & add(const MachineOperand &MO) const
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BitVector & set()
Definition: BitVector.h:397
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
#define R4(n)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
LLVMContext & Context
int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate...
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
bool isThumb() const
Definition: ARMSubtarget.h:711
This class represents lattice values for constants.
Definition: AllocatorList.h:23
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static cl::opt< bool > SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), cl::desc("Align ARM NEON spills in prolog and epilog"))
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:382
ARMConstantPoolValue - ARM specific constantpool value.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getReg() const
getReg - Returns the register number.
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset)
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:487
unsigned Reg
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override
Adjust the prologue to have the function use segmented stacks.
bool hasV7Ops() const
Definition: ARMSubtarget.h:539
bool test(unsigned Idx) const
Definition: BitVector.h:501
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:320
unsigned second
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
F(f)
MachineModuleInfo & getMMI() const
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
bool isThumb1Only() const
Definition: ARMSubtarget.h:712
#define R2(n)
void setGPRCalleeSavedArea2Offset(unsigned o)
static uint32_t alignToARMConstant(uint32_t Value)
Get the minimum constant for ARM that is greater than or equal to the argument.
static MCCFIInstruction createDefCfaOffset(MCSymbol *L, int Offset)
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:474
static bool isThumb(const MCSubtargetInfo &STI)
unsigned getSpillSize(const TargetRegisterClass &RC) const
Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...
return AArch64::GPR64RegClass contains(Reg)
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:490
static const uint64_t kSplitStackAvailable
bool isTargetELF() const
Definition: ARMSubtarget.h:656
unsigned getSpillAlignment(const TargetRegisterClass &RC) const
Return the minimum required alignment in bytes for a spill slot for a register of this class...
void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Given a machine basic block that branched to &#39;Old&#39;, change the code and CFG so that it branches to &#39;N...
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
unsigned getFramePred(const MachineInstr &MI) const
Returns predicate register associated with the given frame instruction.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:411
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register...
unsigned getFrameRegister(const MachineFunction &MF) const override
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required, we reserve argument space for call sites in the function immediately on entry to the current function.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
void emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
This file contains the simple types necessary to represent the attributes associated with functions a...
static int sizeOfSPAdjustment(const MachineInstr &MI)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
int getDwarfRegNum(unsigned RegNum, bool isEH) const
Map a target register to an equivalent dwarf register number.
bool keepFramePointer(const MachineFunction &MF) const override
Return true if the target wants to keep the frame pointer regardless of the function attribute "frame...
void setDPRCalleeSavedAreaOffset(unsigned o)
LLVM_NODISCARD unsigned addFrameInst(const MCCFIInstruction &Inst)
bool enableCalleeSaveSkip(const MachineFunction &MF) const override
Returns true if the target can safely skip saving callee-saved registers for noreturn nounwind functi...
unsigned getArgRegsSaveSize() const
void setFramePtrSpillOffset(unsigned o)
Context object for machine code objects.
Definition: MCContext.h:62
static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg, unsigned SrcReg, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
unsigned getDPRCalleeSavedAreaSize() const
bool splitFramePushPop(const MachineFunction &MF) const
Returns true if the frame setup is split into two separate pushes (first r0-r7,lr then r8-r11)...
Definition: ARMSubtarget.h:732
void setDPRCalleeSavedGapSize(unsigned s)
unsigned getNumAlignedDPRCS2Regs() const
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
const MCContext & getContext() const
int alignSPAdjust(int SPAdj) const
alignSPAdjust - This method aligns the stack adjustment to the correct alignment. ...
bool hasV6T2Ops() const
Definition: ARMSubtarget.h:538
virtual const TargetInstrInfo * getInstrInfo() const
static bool isARMArea1Register(unsigned Reg, bool isIOS)
isARMArea1Register - Returns true if the register is a low register (r0-r7) or a stack/pc register th...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1251
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
TargetInstrInfo - Interface to description of machine instruction set.
unsigned getDefRegState(bool B)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static MachineBasicBlock::iterator skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs)
Skip past the code inserted by emitAlignedDPRCS2Spills, and return an iterator to the following instr...
static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register)
.cfi_same_value Current value of Register is the same as in the previous frame.
Definition: MCDwarf.h:532
static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs)
int ResolveFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg, int SPAdj) const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:422
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
This file declares the machine register scavenger class.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:119
static MCCFIInstruction createDefCfa(MCSymbol *L, unsigned Register, int Offset)
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it...
Definition: MCDwarf.h:460
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool cannotEliminateFrame(const MachineFunction &MF) const
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register)
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:467
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Returns the size of the specified MachineInstr.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
unsigned getGPRCalleeSavedArea1Size() const
int getStackProtectorIndex() const
Return the index for the stack protector object.
unsigned getMaxAlignment() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
bool hasBasePointer(const MachineFunction &MF) const
void setGPRCalleeSavedArea2Size(unsigned s)
static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI)
We need the offset of the frame pointer relative to other MachineFrameInfo offsets which are encoded ...
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:766
unsigned getReturnRegsCount() const
static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI)
Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers starting from d8...
self_iterator getIterator()
Definition: ilist_node.h:81
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
iterator_range< pred_iterator > predecessors()
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1213
const ARMFrameLowering * getFrameLowering() const override
Definition: ARMSubtarget.h:498
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:192
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
iterator erase(const_iterator CI)
Definition: SmallVector.h:437
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
size_t size() const
Definition: SmallVector.h:52
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1206
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned first
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1115
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:478
int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override
getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.
unsigned getDPRCalleeSavedGapSize() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI)
Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers starting from d8...
#define R6(n)
void setGPRCalleeSavedArea1Size(unsigned s)
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call...
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:377
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
bool isTargetLinux() const
Definition: ARMSubtarget.h:650
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:839
static bool isARMArea2Register(unsigned Reg, bool isIOS)
bool isTargetAndroid() const
Definition: ARMSubtarget.h:697
Information about stack frame layout on the target.
static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, const TargetInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const unsigned Reg, const unsigned Alignment, const bool MustBeSingleInstruction)
Emit an instruction sequence that will align the address in register Reg by zero-ing out the lower bi...
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:373
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
void setOffsetAdjustment(int Adj)
Set the correction for frame offsets.
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
CodeModel::Model getCodeModel() const
Returns the code model.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
ARMFrameLowering(const ARMSubtarget &sti)
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
unsigned getReturnOpcode() const
Returns the correct return opcode for the current feature set.
Definition: ARMSubtarget.h:802
unsigned getArgumentStackSize() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
static unsigned estimateRSStackSizeLimit(MachineFunction &MF, const TargetFrameLowering *TFI)
estimateRSStackSizeLimit - Look at each instruction that references stack frames and return the stack...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
static bool isPopOpcode(int Opc)
bool isLiveIn(unsigned Reg) const
static bool isARMLowRegister(unsigned Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:160
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
uint16_t getEncodingValue(unsigned RegNo) const
Returns the encoding for RegNo.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
unsigned getGPRCalleeSavedArea2Size() const
void setGPRCalleeSavedArea1Offset(unsigned o)
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use...
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg...
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:194
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
bool isReturnAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
TargetOptions Options
Definition: TargetMachine.h:96
#define I(x, y, z)
Definition: MD5.cpp:58
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:502
virtual const TargetFrameLowering * getFrameLowering() const
void setDPRCalleeSavedAreaSize(unsigned s)
int getOffsetAdjustment() const
Return the correction for frame offsets.
uint32_t Size
Definition: Profile.cpp:46
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool hasV5TOps() const
Definition: ARMSubtarget.h:533
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
iterator_range< livein_iterator > liveins() const
static bool isARMArea3Register(unsigned Reg, bool isIOS)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
static bool WindowsRequiresStackProbe(const MachineFunction &MF, size_t StackSizeInBytes)
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:294
bool needsStackRealignment(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
LLVM Value Representation.
Definition: Value.h:72
static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, const ARMBaseInstrInfo &TII)
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
unsigned getFramePtrSpillOffset() const
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:330
bool isTargetWindows() const
Definition: ARMSubtarget.h:653
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:58
bool addRegisterKilled(unsigned IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
IRTranslator LLVM IR MI
static bool isCSRestore(MachineInstr &MI, const ARMBaseInstrInfo &TII, const MCPhysReg *CSRegs)
BitVector getPristineRegs(const MachineFunction &MF) const
Return a set of physical registers that are pristine.
const ARMSubtarget & STI
static void checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs)
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
unsigned getReg() const
bool hasTailCall() const
Returns true if the function contains a tail call.
void setObjectAlignment(int ObjectIdx, unsigned Align)
setObjectAlignment - Change the alignment of the specified stack object.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects...
unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one...
static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
bool isReserved(unsigned PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
This class contains meta information specific to a module.