LLVM  8.0.0svn
ARMFrameLowering.cpp
Go to the documentation of this file.
1 //===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the ARM implementation of TargetFrameLowering class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMFrameLowering.h"
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMBaseRegisterInfo.h"
17 #include "ARMConstantPoolValue.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMSubtarget.h"
22 #include "Utils/ARMBaseInfo.h"
23 #include "llvm/ADT/BitVector.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallVector.h"
41 #include "llvm/IR/Attributes.h"
42 #include "llvm/IR/CallingConv.h"
43 #include "llvm/IR/DebugLoc.h"
44 #include "llvm/IR/Function.h"
45 #include "llvm/MC/MCContext.h"
46 #include "llvm/MC/MCDwarf.h"
47 #include "llvm/MC/MCInstrDesc.h"
48 #include "llvm/MC/MCRegisterInfo.h"
49 #include "llvm/Support/CodeGen.h"
51 #include "llvm/Support/Compiler.h"
52 #include "llvm/Support/Debug.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <cstddef>
61 #include <cstdint>
62 #include <iterator>
63 #include <utility>
64 #include <vector>
65 
66 #define DEBUG_TYPE "arm-frame-lowering"
67 
68 using namespace llvm;
69 
70 static cl::opt<bool>
71 SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
72  cl::desc("Align ARM NEON spills in prolog and epilog"));
73 
76  unsigned NumAlignedDPRCS2Regs);
77 
79  : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
80  STI(sti) {}
81 
83  // iOS always has a FP for backtracking, force other targets to keep their FP
84  // when doing FastISel. The emitted code is currently superior, and in cases
85  // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
87  MF.getSubtarget<ARMSubtarget>().useFastISel();
88 }
89 
90 /// Returns true if the target can safely skip saving callee-saved registers
91 /// for noreturn nounwind functions.
93  assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
94  MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
95  !MF.getFunction().hasFnAttribute(Attribute::UWTable));
96 
97  // Frame pointer and link register are not treated as normal CSR, thus we
98  // can always skip CSR saves for nonreturning functions.
99  return true;
100 }
101 
102 /// hasFP - Return true if the specified function should have a dedicated frame
103 /// pointer register. This is true if the function has variable sized allocas
104 /// or if frame pointer elimination is disabled.
106  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
107  const MachineFrameInfo &MFI = MF.getFrameInfo();
108 
109  // ABI-required frame pointer.
111  return true;
112 
113  // Frame pointer required for use within this function.
114  return (RegInfo->needsStackRealignment(MF) ||
115  MFI.hasVarSizedObjects() ||
116  MFI.isFrameAddressTaken());
117 }
118 
119 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
120 /// not required, we reserve argument space for call sites in the function
121 /// immediately on entry to the current function. This eliminates the need for
122 /// add/sub sp brackets around call sites. Returns true if the call frame is
123 /// included as part of the stack frame.
125  const MachineFrameInfo &MFI = MF.getFrameInfo();
126  unsigned CFSize = MFI.getMaxCallFrameSize();
127  // It's not always a good idea to include the call frame as part of the
128  // stack frame. ARM (especially Thumb) has small immediate offset to
129  // address the stack frame. So a large call frame can cause poor codegen
130  // and may even makes it impossible to scavenge a register.
131  if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
132  return false;
133 
134  return !MFI.hasVarSizedObjects();
135 }
136 
137 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the
138 /// call frame pseudos can be simplified. Unlike most targets, having a FP
139 /// is not sufficient here since we still may reference some objects via SP
140 /// even when FP is available in Thumb2 mode.
141 bool
144 }
145 
147  const MCPhysReg *CSRegs) {
148  // Integer spill area is handled with "pop".
149  if (isPopOpcode(MI.getOpcode())) {
150  // The first two operands are predicates. The last two are
151  // imp-def and imp-use of SP. Check everything in between.
152  for (int i = 5, e = MI.getNumOperands(); i != e; ++i)
153  if (!isCalleeSavedRegister(MI.getOperand(i).getReg(), CSRegs))
154  return false;
155  return true;
156  }
157  if ((MI.getOpcode() == ARM::LDR_POST_IMM ||
158  MI.getOpcode() == ARM::LDR_POST_REG ||
159  MI.getOpcode() == ARM::t2LDR_POST) &&
160  isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs) &&
161  MI.getOperand(1).getReg() == ARM::SP)
162  return true;
163 
164  return false;
165 }
166 
168  bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
169  const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
170  unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
171  ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
172  if (isARM)
173  emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
174  Pred, PredReg, TII, MIFlags);
175  else
176  emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
177  Pred, PredReg, TII, MIFlags);
178 }
179 
180 static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
181  MachineBasicBlock::iterator &MBBI, const DebugLoc &dl,
182  const ARMBaseInstrInfo &TII, int NumBytes,
183  unsigned MIFlags = MachineInstr::NoFlags,
185  unsigned PredReg = 0) {
186  emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
187  MIFlags, Pred, PredReg);
188 }
189 
190 static int sizeOfSPAdjustment(const MachineInstr &MI) {
191  int RegSize;
192  switch (MI.getOpcode()) {
193  case ARM::VSTMDDB_UPD:
194  RegSize = 8;
195  break;
196  case ARM::STMDB_UPD:
197  case ARM::t2STMDB_UPD:
198  RegSize = 4;
199  break;
200  case ARM::t2STR_PRE:
201  case ARM::STR_PRE_IMM:
202  return 4;
203  default:
204  llvm_unreachable("Unknown push or pop like instruction");
205  }
206 
207  int count = 0;
208  // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
209  // pred) so the list starts at 4.
210  for (int i = MI.getNumOperands() - 1; i >= 4; --i)
211  count += RegSize;
212  return count;
213 }
214 
216  size_t StackSizeInBytes) {
217  const MachineFrameInfo &MFI = MF.getFrameInfo();
218  const Function &F = MF.getFunction();
219  unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
220  if (F.hasFnAttribute("stack-probe-size"))
221  F.getFnAttribute("stack-probe-size")
223  .getAsInteger(0, StackProbeSize);
224  return (StackSizeInBytes >= StackProbeSize) &&
225  !F.hasFnAttribute("no-stack-arg-probe");
226 }
227 
228 namespace {
229 
230 struct StackAdjustingInsts {
231  struct InstInfo {
233  unsigned SPAdjust;
234  bool BeforeFPSet;
235  };
236 
238 
239  void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
240  bool BeforeFPSet = false) {
241  InstInfo Info = {I, SPAdjust, BeforeFPSet};
242  Insts.push_back(Info);
243  }
244 
245  void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
246  auto Info =
247  llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
248  assert(Info != Insts.end() && "invalid sp adjusting instruction");
249  Info->SPAdjust += ExtraBytes;
250  }
251 
252  void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl,
253  const ARMBaseInstrInfo &TII, bool HasFP) {
254  MachineFunction &MF = *MBB.getParent();
255  unsigned CFAOffset = 0;
256  for (auto &Info : Insts) {
257  if (HasFP && !Info.BeforeFPSet)
258  return;
259 
260  CFAOffset -= Info.SPAdjust;
261  unsigned CFIIndex = MF.addFrameInst(
262  MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
263  BuildMI(MBB, std::next(Info.I), dl,
264  TII.get(TargetOpcode::CFI_INSTRUCTION))
265  .addCFIIndex(CFIIndex)
267  }
268  }
269 };
270 
271 } // end anonymous namespace
272 
273 /// Emit an instruction sequence that will align the address in
274 /// register Reg by zero-ing out the lower bits. For versions of the
275 /// architecture that support Neon, this must be done in a single
276 /// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
277 /// single instruction. That function only gets called when optimizing
278 /// spilling of D registers on a core with the Neon instruction set
279 /// present.
281  const TargetInstrInfo &TII,
282  MachineBasicBlock &MBB,
284  const DebugLoc &DL, const unsigned Reg,
285  const unsigned Alignment,
286  const bool MustBeSingleInstruction) {
287  const ARMSubtarget &AST =
288  static_cast<const ARMSubtarget &>(MF.getSubtarget());
289  const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
290  const unsigned AlignMask = Alignment - 1;
291  const unsigned NrBitsToZero = countTrailingZeros(Alignment);
292  assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
293  if (!AFI->isThumbFunction()) {
294  // if the BFC instruction is available, use that to zero the lower
295  // bits:
296  // bfc Reg, #0, log2(Alignment)
297  // otherwise use BIC, if the mask to zero the required number of bits
298  // can be encoded in the bic immediate field
299  // bic Reg, Reg, Alignment-1
300  // otherwise, emit
301  // lsr Reg, Reg, log2(Alignment)
302  // lsl Reg, Reg, log2(Alignment)
303  if (CanUseBFC) {
304  BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
305  .addReg(Reg, RegState::Kill)
306  .addImm(~AlignMask)
307  .add(predOps(ARMCC::AL));
308  } else if (AlignMask <= 255) {
309  BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
310  .addReg(Reg, RegState::Kill)
311  .addImm(AlignMask)
313  .add(condCodeOp());
314  } else {
315  assert(!MustBeSingleInstruction &&
316  "Shouldn't call emitAligningInstructions demanding a single "
317  "instruction to be emitted for large stack alignment for a target "
318  "without BFC.");
319  BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
320  .addReg(Reg, RegState::Kill)
321  .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
323  .add(condCodeOp());
324  BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
325  .addReg(Reg, RegState::Kill)
326  .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
328  .add(condCodeOp());
329  }
330  } else {
331  // Since this is only reached for Thumb-2 targets, the BFC instruction
332  // should always be available.
333  assert(CanUseBFC);
334  BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
335  .addReg(Reg, RegState::Kill)
336  .addImm(~AlignMask)
337  .add(predOps(ARMCC::AL));
338  }
339 }
340 
341 /// We need the offset of the frame pointer relative to other MachineFrameInfo
342 /// offsets which are encoded relative to SP at function begin.
343 /// See also emitPrologue() for how the FP is set up.
344 /// Unfortunately we cannot determine this value in determineCalleeSaves() yet
345 /// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
346 /// this to produce a conservative estimate that we check in an assert() later.
347 static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI) {
348  // This is a conservative estimation: Assume the frame pointer being r7 and
349  // pc("r15") up to r8 getting spilled before (= 8 registers).
350  return -AFI.getArgRegsSaveSize() - (8 * 4);
351 }
352 
354  MachineBasicBlock &MBB) const {
355  MachineBasicBlock::iterator MBBI = MBB.begin();
356  MachineFrameInfo &MFI = MF.getFrameInfo();
358  MachineModuleInfo &MMI = MF.getMMI();
359  MCContext &Context = MMI.getContext();
360  const TargetMachine &TM = MF.getTarget();
361  const MCRegisterInfo *MRI = Context.getRegisterInfo();
362  const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
363  const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
364  assert(!AFI->isThumb1OnlyFunction() &&
365  "This emitPrologue does not support Thumb1!");
366  bool isARM = !AFI->isThumbFunction();
368  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
369  unsigned NumBytes = MFI.getStackSize();
370  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
371 
372  // Debug location must be unknown since the first debug location is used
373  // to determine the end of the prologue.
374  DebugLoc dl;
375 
376  unsigned FramePtr = RegInfo->getFrameRegister(MF);
377 
378  // Determine the sizes of each callee-save spill areas and record which frame
379  // belongs to which callee-save spill areas.
380  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
381  int FramePtrSpillFI = 0;
382  int D8SpillFI = 0;
383 
384  // All calls are tail calls in GHC calling conv, and functions have no
385  // prologue/epilogue.
387  return;
388 
389  StackAdjustingInsts DefCFAOffsetCandidates;
390  bool HasFP = hasFP(MF);
391 
392  // Allocate the vararg register save area.
393  if (ArgRegsSaveSize) {
394  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
396  DefCFAOffsetCandidates.addInst(std::prev(MBBI), ArgRegsSaveSize, true);
397  }
398 
399  if (!AFI->hasStackFrame() &&
400  (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
401  if (NumBytes - ArgRegsSaveSize != 0) {
402  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize),
404  DefCFAOffsetCandidates.addInst(std::prev(MBBI),
405  NumBytes - ArgRegsSaveSize, true);
406  }
407  DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
408  return;
409  }
410 
411  // Determine spill area sizes.
412  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
413  unsigned Reg = CSI[i].getReg();
414  int FI = CSI[i].getFrameIdx();
415  switch (Reg) {
416  case ARM::R8:
417  case ARM::R9:
418  case ARM::R10:
419  case ARM::R11:
420  case ARM::R12:
421  if (STI.splitFramePushPop(MF)) {
422  GPRCS2Size += 4;
423  break;
424  }
426  case ARM::R0:
427  case ARM::R1:
428  case ARM::R2:
429  case ARM::R3:
430  case ARM::R4:
431  case ARM::R5:
432  case ARM::R6:
433  case ARM::R7:
434  case ARM::LR:
435  if (Reg == FramePtr)
436  FramePtrSpillFI = FI;
437  GPRCS1Size += 4;
438  break;
439  default:
440  // This is a DPR. Exclude the aligned DPRCS2 spills.
441  if (Reg == ARM::D8)
442  D8SpillFI = FI;
443  if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
444  DPRCSSize += 8;
445  }
446  }
447 
448  // Move past area 1.
449  MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
450  if (GPRCS1Size > 0) {
451  GPRCS1Push = LastPush = MBBI++;
452  DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
453  }
454 
455  // Determine starting offsets of spill areas.
456  unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size;
457  unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
458  unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U;
459  unsigned DPRGapSize = (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign;
460  unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
461  int FramePtrOffsetInPush = 0;
462  if (HasFP) {
463  int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
464  assert(getMaxFPOffset(MF.getFunction(), *AFI) <= FPOffset &&
465  "Max FP estimation is wrong");
466  FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize;
467  AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
468  NumBytes);
469  }
470  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
471  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
472  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
473 
474  // Move past area 2.
475  if (GPRCS2Size > 0) {
476  GPRCS2Push = LastPush = MBBI++;
477  DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
478  }
479 
480  // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
481  // .cfi_offset operations will reflect that.
482  if (DPRGapSize) {
483  assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
484  if (LastPush != MBB.end() &&
485  tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
486  DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
487  else {
488  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
490  DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
491  }
492  }
493 
494  // Move past area 3.
495  if (DPRCSSize > 0) {
496  // Since vpush register list cannot have gaps, there may be multiple vpush
497  // instructions in the prologue.
498  while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
499  DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));
500  LastPush = MBBI++;
501  }
502  }
503 
504  // Move past the aligned DPRCS2 area.
505  if (AFI->getNumAlignedDPRCS2Regs() > 0) {
507  // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
508  // leaves the stack pointer pointing to the DPRCS2 area.
509  //
510  // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
511  NumBytes += MFI.getObjectOffset(D8SpillFI);
512  } else
513  NumBytes = DPRCSOffset;
514 
515  if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
516  uint32_t NumWords = NumBytes >> 2;
517 
518  if (NumWords < 65536)
519  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
520  .addImm(NumWords)
522  .add(predOps(ARMCC::AL));
523  else
524  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4)
525  .addImm(NumWords)
527 
528  switch (TM.getCodeModel()) {
529  case CodeModel::Tiny:
530  llvm_unreachable("Tiny code model not available on ARM.");
531  case CodeModel::Small:
532  case CodeModel::Medium:
533  case CodeModel::Kernel:
534  BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
536  .addExternalSymbol("__chkstk")
537  .addReg(ARM::R4, RegState::Implicit)
538  .setMIFlags(MachineInstr::FrameSetup);
539  break;
540  case CodeModel::Large:
541  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
542  .addExternalSymbol("__chkstk")
544 
545  BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
547  .addReg(ARM::R12, RegState::Kill)
548  .addReg(ARM::R4, RegState::Implicit)
549  .setMIFlags(MachineInstr::FrameSetup);
550  break;
551  }
552 
553  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
554  .addReg(ARM::SP, RegState::Kill)
558  .add(condCodeOp());
559  NumBytes = 0;
560  }
561 
562  if (NumBytes) {
563  // Adjust SP after all the callee-save spills.
564  if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
565  tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes))
566  DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
567  else {
568  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
570  DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
571  }
572 
573  if (HasFP && isARM)
574  // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
575  // Note it's not safe to do this in Thumb2 mode because it would have
576  // taken two instructions:
577  // mov sp, r7
578  // sub sp, #24
579  // If an interrupt is taken between the two instructions, then sp is in
580  // an inconsistent state (pointing to the middle of callee-saved area).
581  // The interrupt handler can end up clobbering the registers.
582  AFI->setShouldRestoreSPFromFP(true);
583  }
584 
585  // Set FP to point to the stack slot that contains the previous FP.
586  // For iOS, FP is R7, which has now been stored in spill area 1.
587  // Otherwise, if this is not iOS, all the callee-saved registers go
588  // into spill area 1, including the FP in R11. In either case, it
589  // is in area one and the adjustment needs to take place just after
590  // that push.
591  if (HasFP) {
592  MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push);
593  unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
594  emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush,
595  dl, TII, FramePtr, ARM::SP,
596  PushSize + FramePtrOffsetInPush,
598  if (FramePtrOffsetInPush + PushSize != 0) {
599  unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
600  nullptr, MRI->getDwarfRegNum(FramePtr, true),
601  -(ArgRegsSaveSize - FramePtrOffsetInPush)));
602  BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
603  .addCFIIndex(CFIIndex)
605  } else {
606  unsigned CFIIndex =
608  nullptr, MRI->getDwarfRegNum(FramePtr, true)));
609  BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
610  .addCFIIndex(CFIIndex)
612  }
613  }
614 
615  // Now that the prologue's actual instructions are finalised, we can insert
616  // the necessary DWARF cf instructions to describe the situation. Start by
617  // recording where each register ended up:
618  if (GPRCS1Size > 0) {
619  MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
620  int CFIIndex;
621  for (const auto &Entry : CSI) {
622  unsigned Reg = Entry.getReg();
623  int FI = Entry.getFrameIdx();
624  switch (Reg) {
625  case ARM::R8:
626  case ARM::R9:
627  case ARM::R10:
628  case ARM::R11:
629  case ARM::R12:
630  if (STI.splitFramePushPop(MF))
631  break;
633  case ARM::R0:
634  case ARM::R1:
635  case ARM::R2:
636  case ARM::R3:
637  case ARM::R4:
638  case ARM::R5:
639  case ARM::R6:
640  case ARM::R7:
641  case ARM::LR:
643  nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
644  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
645  .addCFIIndex(CFIIndex)
647  break;
648  }
649  }
650  }
651 
652  if (GPRCS2Size > 0) {
653  MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
654  for (const auto &Entry : CSI) {
655  unsigned Reg = Entry.getReg();
656  int FI = Entry.getFrameIdx();
657  switch (Reg) {
658  case ARM::R8:
659  case ARM::R9:
660  case ARM::R10:
661  case ARM::R11:
662  case ARM::R12:
663  if (STI.splitFramePushPop(MF)) {
664  unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
665  unsigned Offset = MFI.getObjectOffset(FI);
666  unsigned CFIIndex = MF.addFrameInst(
667  MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
668  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
669  .addCFIIndex(CFIIndex)
671  }
672  break;
673  }
674  }
675  }
676 
677  if (DPRCSSize > 0) {
678  // Since vpush register list cannot have gaps, there may be multiple vpush
679  // instructions in the prologue.
680  MachineBasicBlock::iterator Pos = std::next(LastPush);
681  for (const auto &Entry : CSI) {
682  unsigned Reg = Entry.getReg();
683  int FI = Entry.getFrameIdx();
684  if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
685  (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
686  unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
687  unsigned Offset = MFI.getObjectOffset(FI);
688  unsigned CFIIndex = MF.addFrameInst(
689  MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
690  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
691  .addCFIIndex(CFIIndex)
693  }
694  }
695  }
696 
697  // Now we can emit descriptions of where the canonical frame address was
698  // throughout the process. If we have a frame pointer, it takes over the job
699  // half-way through, so only the first few .cfi_def_cfa_offset instructions
700  // actually get emitted.
701  DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
702 
703  if (STI.isTargetELF() && hasFP(MF))
705  AFI->getFramePtrSpillOffset());
706 
707  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
708  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
709  AFI->setDPRCalleeSavedGapSize(DPRGapSize);
710  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
711 
712  // If we need dynamic stack realignment, do it here. Be paranoid and make
713  // sure if we also have VLAs, we have a base pointer for frame access.
714  // If aligned NEON registers were spilled, the stack has already been
715  // realigned.
716  if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) {
717  unsigned MaxAlign = MFI.getMaxAlignment();
718  assert(!AFI->isThumb1OnlyFunction());
719  if (!AFI->isThumbFunction()) {
720  emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
721  false);
722  } else {
723  // We cannot use sp as source/dest register here, thus we're using r4 to
724  // perform the calculations. We're emitting the following sequence:
725  // mov r4, sp
726  // -- use emitAligningInstructions to produce best sequence to zero
727  // -- out lower bits in r4
728  // mov sp, r4
729  // FIXME: It will be better just to find spare register here.
730  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
731  .addReg(ARM::SP, RegState::Kill)
732  .add(predOps(ARMCC::AL));
733  emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
734  false);
735  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
736  .addReg(ARM::R4, RegState::Kill)
737  .add(predOps(ARMCC::AL));
738  }
739 
740  AFI->setShouldRestoreSPFromFP(true);
741  }
742 
743  // If we need a base pointer, set it up here. It's whatever the value
744  // of the stack pointer is at this point. Any variable size objects
745  // will be allocated after this, so we can still use the base pointer
746  // to reference locals.
747  // FIXME: Clarify FrameSetup flags here.
748  if (RegInfo->hasBasePointer(MF)) {
749  if (isARM)
750  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
751  .addReg(ARM::SP)
753  .add(condCodeOp());
754  else
755  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
756  .addReg(ARM::SP)
757  .add(predOps(ARMCC::AL));
758  }
759 
760  // If the frame has variable sized objects then the epilogue must restore
761  // the sp from fp. We can assume there's an FP here since hasFP already
762  // checks for hasVarSizedObjects.
763  if (MFI.hasVarSizedObjects())
764  AFI->setShouldRestoreSPFromFP(true);
765 }
766 
768  MachineBasicBlock &MBB) const {
769  MachineFrameInfo &MFI = MF.getFrameInfo();
771  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
772  const ARMBaseInstrInfo &TII =
773  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
774  assert(!AFI->isThumb1OnlyFunction() &&
775  "This emitEpilogue does not support Thumb1!");
776  bool isARM = !AFI->isThumbFunction();
777 
778  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
779  int NumBytes = (int)MFI.getStackSize();
780  unsigned FramePtr = RegInfo->getFrameRegister(MF);
781 
782  // All calls are tail calls in GHC calling conv, and functions have no
783  // prologue/epilogue.
785  return;
786 
787  // First put ourselves on the first (from top) terminator instructions.
789  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
790 
791  if (!AFI->hasStackFrame()) {
792  if (NumBytes - ArgRegsSaveSize != 0)
793  emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize);
794  } else {
795  // Unwind MBBI to point to first LDR / VLDRD.
796  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
797  if (MBBI != MBB.begin()) {
798  do {
799  --MBBI;
800  } while (MBBI != MBB.begin() && isCSRestore(*MBBI, TII, CSRegs));
801  if (!isCSRestore(*MBBI, TII, CSRegs))
802  ++MBBI;
803  }
804 
805  // Move SP to start of FP callee save spill area.
806  NumBytes -= (ArgRegsSaveSize +
809  AFI->getDPRCalleeSavedGapSize() +
811 
812  // Reset SP based on frame pointer only if the stack frame extends beyond
813  // frame pointer stack slot or target is ELF and the function has FP.
814  if (AFI->shouldRestoreSPFromFP()) {
815  NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
816  if (NumBytes) {
817  if (isARM)
818  emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
819  ARMCC::AL, 0, TII);
820  else {
821  // It's not possible to restore SP from FP in a single instruction.
822  // For iOS, this looks like:
823  // mov sp, r7
824  // sub sp, #24
825  // This is bad, if an interrupt is taken after the mov, sp is in an
826  // inconsistent state.
827  // Use the first callee-saved register as a scratch register.
828  assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
829  "No scratch register to restore SP from FP!");
830  emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
831  ARMCC::AL, 0, TII);
832  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
833  .addReg(ARM::R4)
834  .add(predOps(ARMCC::AL));
835  }
836  } else {
837  // Thumb2 or ARM.
838  if (isARM)
839  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
840  .addReg(FramePtr)
842  .add(condCodeOp());
843  else
844  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
845  .addReg(FramePtr)
846  .add(predOps(ARMCC::AL));
847  }
848  } else if (NumBytes &&
849  !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
850  emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
851 
852  // Increment past our save areas.
853  if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
854  MBBI++;
855  // Since vpop register list cannot have gaps, there may be multiple vpop
856  // instructions in the epilogue.
857  while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
858  MBBI++;
859  }
860  if (AFI->getDPRCalleeSavedGapSize()) {
861  assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
862  "unexpected DPR alignment gap");
863  emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize());
864  }
865 
866  if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
867  if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
868  }
869 
870  if (ArgRegsSaveSize)
871  emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
872 }
873 
874 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
875 /// debug info. It's the same as what we use for resolving the code-gen
876 /// references for now. FIXME: This can go wrong when references are
877 /// SP-relative and simple call frames aren't used.
878 int
880  unsigned &FrameReg) const {
881  return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
882 }
883 
884 int
886  int FI, unsigned &FrameReg,
887  int SPAdj) const {
888  const MachineFrameInfo &MFI = MF.getFrameInfo();
889  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
891  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
892  int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
893  int FPOffset = Offset - AFI->getFramePtrSpillOffset();
894  bool isFixed = MFI.isFixedObjectIndex(FI);
895 
896  FrameReg = ARM::SP;
897  Offset += SPAdj;
898 
899  // SP can move around if there are allocas. We may also lose track of SP
900  // when emergency spilling inside a non-reserved call frame setup.
901  bool hasMovingSP = !hasReservedCallFrame(MF);
902 
903  // When dynamically realigning the stack, use the frame pointer for
904  // parameters, and the stack/base pointer for locals.
905  if (RegInfo->needsStackRealignment(MF)) {
906  assert(hasFP(MF) && "dynamic stack realignment without a FP!");
907  if (isFixed) {
908  FrameReg = RegInfo->getFrameRegister(MF);
909  Offset = FPOffset;
910  } else if (hasMovingSP) {
911  assert(RegInfo->hasBasePointer(MF) &&
912  "VLAs and dynamic stack alignment, but missing base pointer!");
913  FrameReg = RegInfo->getBaseRegister();
914  }
915  return Offset;
916  }
917 
918  // If there is a frame pointer, use it when we can.
919  if (hasFP(MF) && AFI->hasStackFrame()) {
920  // Use frame pointer to reference fixed objects. Use it for locals if
921  // there are VLAs (and thus the SP isn't reliable as a base).
922  if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
923  FrameReg = RegInfo->getFrameRegister(MF);
924  return FPOffset;
925  } else if (hasMovingSP) {
926  assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
927  if (AFI->isThumb2Function()) {
928  // Try to use the frame pointer if we can, else use the base pointer
929  // since it's available. This is handy for the emergency spill slot, in
930  // particular.
931  if (FPOffset >= -255 && FPOffset < 0) {
932  FrameReg = RegInfo->getFrameRegister(MF);
933  return FPOffset;
934  }
935  }
936  } else if (AFI->isThumbFunction()) {
937  // Prefer SP to base pointer, if the offset is suitably aligned and in
938  // range as the effective range of the immediate offset is bigger when
939  // basing off SP.
940  // Use add <rd>, sp, #<imm8>
941  // ldr <rd>, [sp, #<imm8>]
942  if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
943  return Offset;
944  // In Thumb2 mode, the negative offset is very limited. Try to avoid
945  // out of range references. ldr <rt>,[<rn>, #-<imm8>]
946  if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
947  FrameReg = RegInfo->getFrameRegister(MF);
948  return FPOffset;
949  }
950  } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
951  // Otherwise, use SP or FP, whichever is closer to the stack slot.
952  FrameReg = RegInfo->getFrameRegister(MF);
953  return FPOffset;
954  }
955  }
956  // Use the base pointer if we have one.
957  if (RegInfo->hasBasePointer(MF))
958  FrameReg = RegInfo->getBaseRegister();
959  return Offset;
960 }
961 
962 void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
964  const std::vector<CalleeSavedInfo> &CSI,
965  unsigned StmOpc, unsigned StrOpc,
966  bool NoGap,
967  bool(*Func)(unsigned, bool),
968  unsigned NumAlignedDPRCS2Regs,
969  unsigned MIFlags) const {
970  MachineFunction &MF = *MBB.getParent();
971  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
973 
974  DebugLoc DL;
975 
976  using RegAndKill = std::pair<unsigned, bool>;
977 
979  unsigned i = CSI.size();
980  while (i != 0) {
981  unsigned LastReg = 0;
982  for (; i != 0; --i) {
983  unsigned Reg = CSI[i-1].getReg();
984  if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
985 
986  // D-registers in the aligned area DPRCS2 are NOT spilled here.
987  if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
988  continue;
989 
990  const MachineRegisterInfo &MRI = MF.getRegInfo();
991  bool isLiveIn = MRI.isLiveIn(Reg);
992  if (!isLiveIn && !MRI.isReserved(Reg))
993  MBB.addLiveIn(Reg);
994  // If NoGap is true, push consecutive registers and then leave the rest
995  // for other instructions. e.g.
996  // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
997  if (NoGap && LastReg && LastReg != Reg-1)
998  break;
999  LastReg = Reg;
1000  // Do not set a kill flag on values that are also marked as live-in. This
1001  // happens with the @llvm-returnaddress intrinsic and with arguments
1002  // passed in callee saved registers.
1003  // Omitting the kill flags is conservatively correct even if the live-in
1004  // is not used after all.
1005  Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn));
1006  }
1007 
1008  if (Regs.empty())
1009  continue;
1010 
1011  llvm::sort(Regs, [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1012  return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
1013  });
1014 
1015  if (Regs.size() > 1 || StrOpc== 0) {
1016  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
1017  .addReg(ARM::SP)
1018  .setMIFlags(MIFlags)
1019  .add(predOps(ARMCC::AL));
1020  for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1021  MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
1022  } else if (Regs.size() == 1) {
1023  BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
1024  .addReg(Regs[0].first, getKillRegState(Regs[0].second))
1025  .addReg(ARM::SP)
1026  .setMIFlags(MIFlags)
1027  .addImm(-4)
1028  .add(predOps(ARMCC::AL));
1029  }
1030  Regs.clear();
1031 
1032  // Put any subsequent vpush instructions before this one: they will refer to
1033  // higher register numbers so need to be pushed first in order to preserve
1034  // monotonicity.
1035  if (MI != MBB.begin())
1036  --MI;
1037  }
1038 }
1039 
1040 void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1042  std::vector<CalleeSavedInfo> &CSI,
1043  unsigned LdmOpc, unsigned LdrOpc,
1044  bool isVarArg, bool NoGap,
1045  bool(*Func)(unsigned, bool),
1046  unsigned NumAlignedDPRCS2Regs) const {
1047  MachineFunction &MF = *MBB.getParent();
1048  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1051  DebugLoc DL;
1052  bool isTailCall = false;
1053  bool isInterrupt = false;
1054  bool isTrap = false;
1055  if (MBB.end() != MI) {
1056  DL = MI->getDebugLoc();
1057  unsigned RetOpcode = MI->getOpcode();
1058  isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri);
1059  isInterrupt =
1060  RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1061  isTrap =
1062  RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
1063  RetOpcode == ARM::tTRAP;
1064  }
1065 
1067  unsigned i = CSI.size();
1068  while (i != 0) {
1069  unsigned LastReg = 0;
1070  bool DeleteRet = false;
1071  for (; i != 0; --i) {
1072  CalleeSavedInfo &Info = CSI[i-1];
1073  unsigned Reg = Info.getReg();
1074  if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1075 
1076  // The aligned reloads from area DPRCS2 are not inserted here.
1077  if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1078  continue;
1079 
1080  if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1081  !isTrap && STI.hasV5TOps()) {
1082  if (MBB.succ_empty()) {
1083  Reg = ARM::PC;
1084  // Fold the return instruction into the LDM.
1085  DeleteRet = true;
1086  LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1087  // We 'restore' LR into PC so it is not live out of the return block:
1088  // Clear Restored bit.
1089  Info.setRestored(false);
1090  } else
1091  LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
1092  }
1093 
1094  // If NoGap is true, pop consecutive registers and then leave the rest
1095  // for other instructions. e.g.
1096  // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1097  if (NoGap && LastReg && LastReg != Reg-1)
1098  break;
1099 
1100  LastReg = Reg;
1101  Regs.push_back(Reg);
1102  }
1103 
1104  if (Regs.empty())
1105  continue;
1106 
1107  llvm::sort(Regs, [&](unsigned LHS, unsigned RHS) {
1108  return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
1109  });
1110 
1111  if (Regs.size() > 1 || LdrOpc == 0) {
1112  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
1113  .addReg(ARM::SP)
1114  .add(predOps(ARMCC::AL));
1115  for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1116  MIB.addReg(Regs[i], getDefRegState(true));
1117  if (DeleteRet) {
1118  if (MI != MBB.end()) {
1119  MIB.copyImplicitOps(*MI);
1120  MI->eraseFromParent();
1121  }
1122  }
1123  MI = MIB;
1124  } else if (Regs.size() == 1) {
1125  // If we adjusted the reg to PC from LR above, switch it back here. We
1126  // only do that for LDM.
1127  if (Regs[0] == ARM::PC)
1128  Regs[0] = ARM::LR;
1129  MachineInstrBuilder MIB =
1130  BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
1131  .addReg(ARM::SP, RegState::Define)
1132  .addReg(ARM::SP);
1133  // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1134  // that refactoring is complete (eventually).
1135  if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1136  MIB.addReg(0);
1138  } else
1139  MIB.addImm(4);
1140  MIB.add(predOps(ARMCC::AL));
1141  }
1142  Regs.clear();
1143 
1144  // Put any subsequent vpop instructions after this one: they will refer to
1145  // higher register numbers so need to be popped afterwards.
1146  if (MI != MBB.end())
1147  ++MI;
1148  }
1149 }
1150 
1151 /// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1152 /// starting from d8. Also insert stack realignment code and leave the stack
1153 /// pointer pointing to the d8 spill slot.
1156  unsigned NumAlignedDPRCS2Regs,
1157  const std::vector<CalleeSavedInfo> &CSI,
1158  const TargetRegisterInfo *TRI) {
1159  MachineFunction &MF = *MBB.getParent();
1161  DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1162  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1163  MachineFrameInfo &MFI = MF.getFrameInfo();
1164 
1165  // Mark the D-register spill slots as properly aligned. Since MFI computes
1166  // stack slot layout backwards, this can actually mean that the d-reg stack
1167  // slot offsets can be wrong. The offset for d8 will always be correct.
1168  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1169  unsigned DNum = CSI[i].getReg() - ARM::D8;
1170  if (DNum > NumAlignedDPRCS2Regs - 1)
1171  continue;
1172  int FI = CSI[i].getFrameIdx();
1173  // The even-numbered registers will be 16-byte aligned, the odd-numbered
1174  // registers will be 8-byte aligned.
1175  MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16);
1176 
1177  // The stack slot for D8 needs to be maximally aligned because this is
1178  // actually the point where we align the stack pointer. MachineFrameInfo
1179  // computes all offsets relative to the incoming stack pointer which is a
1180  // bit weird when realigning the stack. Any extra padding for this
1181  // over-alignment is not realized because the code inserted below adjusts
1182  // the stack pointer by numregs * 8 before aligning the stack pointer.
1183  if (DNum == 0)
1184  MFI.setObjectAlignment(FI, MFI.getMaxAlignment());
1185  }
1186 
1187  // Move the stack pointer to the d8 spill slot, and align it at the same
1188  // time. Leave the stack slot address in the scratch register r4.
1189  //
1190  // sub r4, sp, #numregs * 8
1191  // bic r4, r4, #align - 1
1192  // mov sp, r4
1193  //
1194  bool isThumb = AFI->isThumbFunction();
1195  assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1196  AFI->setShouldRestoreSPFromFP(true);
1197 
1198  // sub r4, sp, #numregs * 8
1199  // The immediate is <= 64, so it doesn't need any special encoding.
1200  unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1201  BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1202  .addReg(ARM::SP)
1203  .addImm(8 * NumAlignedDPRCS2Regs)
1204  .add(predOps(ARMCC::AL))
1205  .add(condCodeOp());
1206 
1207  unsigned MaxAlign = MF.getFrameInfo().getMaxAlignment();
1208  // We must set parameter MustBeSingleInstruction to true, since
1209  // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1210  // stack alignment. Luckily, this can always be done since all ARM
1211  // architecture versions that support Neon also support the BFC
1212  // instruction.
1213  emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
1214 
1215  // mov sp, r4
1216  // The stack pointer must be adjusted before spilling anything, otherwise
1217  // the stack slots could be clobbered by an interrupt handler.
1218  // Leave r4 live, it is used below.
1219  Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
1220  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
1221  .addReg(ARM::R4)
1222  .add(predOps(ARMCC::AL));
1223  if (!isThumb)
1224  MIB.add(condCodeOp());
1225 
1226  // Now spill NumAlignedDPRCS2Regs registers starting from d8.
1227  // r4 holds the stack slot address.
1228  unsigned NextReg = ARM::D8;
1229 
1230  // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
1231  // The writeback is only needed when emitting two vst1.64 instructions.
1232  if (NumAlignedDPRCS2Regs >= 6) {
1233  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1234  &ARM::QQPRRegClass);
1235  MBB.addLiveIn(SupReg);
1236  BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
1237  .addReg(ARM::R4, RegState::Kill)
1238  .addImm(16)
1239  .addReg(NextReg)
1240  .addReg(SupReg, RegState::ImplicitKill)
1241  .add(predOps(ARMCC::AL));
1242  NextReg += 4;
1243  NumAlignedDPRCS2Regs -= 4;
1244  }
1245 
1246  // We won't modify r4 beyond this point. It currently points to the next
1247  // register to be spilled.
1248  unsigned R4BaseReg = NextReg;
1249 
1250  // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
1251  if (NumAlignedDPRCS2Regs >= 4) {
1252  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1253  &ARM::QQPRRegClass);
1254  MBB.addLiveIn(SupReg);
1255  BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
1256  .addReg(ARM::R4)
1257  .addImm(16)
1258  .addReg(NextReg)
1259  .addReg(SupReg, RegState::ImplicitKill)
1260  .add(predOps(ARMCC::AL));
1261  NextReg += 4;
1262  NumAlignedDPRCS2Regs -= 4;
1263  }
1264 
1265  // 16-byte aligned vst1.64 with 2 d-regs.
1266  if (NumAlignedDPRCS2Regs >= 2) {
1267  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1268  &ARM::QPRRegClass);
1269  MBB.addLiveIn(SupReg);
1270  BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
1271  .addReg(ARM::R4)
1272  .addImm(16)
1273  .addReg(SupReg)
1274  .add(predOps(ARMCC::AL));
1275  NextReg += 2;
1276  NumAlignedDPRCS2Regs -= 2;
1277  }
1278 
1279  // Finally, use a vanilla vstr.64 for the odd last register.
1280  if (NumAlignedDPRCS2Regs) {
1281  MBB.addLiveIn(NextReg);
1282  // vstr.64 uses addrmode5 which has an offset scale of 4.
1283  BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
1284  .addReg(NextReg)
1285  .addReg(ARM::R4)
1286  .addImm((NextReg - R4BaseReg) * 2)
1287  .add(predOps(ARMCC::AL));
1288  }
1289 
1290  // The last spill instruction inserted should kill the scratch register r4.
1291  std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1292 }
1293 
1294 /// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
1295 /// iterator to the following instruction.
1298  unsigned NumAlignedDPRCS2Regs) {
1299  // sub r4, sp, #numregs * 8
1300  // bic r4, r4, #align - 1
1301  // mov sp, r4
1302  ++MI; ++MI; ++MI;
1303  assert(MI->mayStore() && "Expecting spill instruction");
1304 
1305  // These switches all fall through.
1306  switch(NumAlignedDPRCS2Regs) {
1307  case 7:
1308  ++MI;
1309  assert(MI->mayStore() && "Expecting spill instruction");
1311  default:
1312  ++MI;
1313  assert(MI->mayStore() && "Expecting spill instruction");
1315  case 1:
1316  case 2:
1317  case 4:
1318  assert(MI->killsRegister(ARM::R4) && "Missed kill flag");
1319  ++MI;
1320  }
1321  return MI;
1322 }
1323 
1324 /// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
1325 /// starting from d8. These instructions are assumed to execute while the
1326 /// stack is still aligned, unlike the code inserted by emitPopInst.
1329  unsigned NumAlignedDPRCS2Regs,
1330  const std::vector<CalleeSavedInfo> &CSI,
1331  const TargetRegisterInfo *TRI) {
1332  MachineFunction &MF = *MBB.getParent();
1334  DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1335  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1336 
1337  // Find the frame index assigned to d8.
1338  int D8SpillFI = 0;
1339  for (unsigned i = 0, e = CSI.size(); i != e; ++i)
1340  if (CSI[i].getReg() == ARM::D8) {
1341  D8SpillFI = CSI[i].getFrameIdx();
1342  break;
1343  }
1344 
1345  // Materialize the address of the d8 spill slot into the scratch register r4.
1346  // This can be fairly complicated if the stack frame is large, so just use
1347  // the normal frame index elimination mechanism to do it. This code runs as
1348  // the initial part of the epilog where the stack and base pointers haven't
1349  // been changed yet.
1350  bool isThumb = AFI->isThumbFunction();
1351  assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1352 
1353  unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
1354  BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1355  .addFrameIndex(D8SpillFI)
1356  .addImm(0)
1357  .add(predOps(ARMCC::AL))
1358  .add(condCodeOp());
1359 
1360  // Now restore NumAlignedDPRCS2Regs registers starting from d8.
1361  unsigned NextReg = ARM::D8;
1362 
1363  // 16-byte aligned vld1.64 with 4 d-regs and writeback.
1364  if (NumAlignedDPRCS2Regs >= 6) {
1365  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1366  &ARM::QQPRRegClass);
1367  BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
1368  .addReg(ARM::R4, RegState::Define)
1370  .addImm(16)
1372  .add(predOps(ARMCC::AL));
1373  NextReg += 4;
1374  NumAlignedDPRCS2Regs -= 4;
1375  }
1376 
1377  // We won't modify r4 beyond this point. It currently points to the next
1378  // register to be spilled.
1379  unsigned R4BaseReg = NextReg;
1380 
1381  // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
1382  if (NumAlignedDPRCS2Regs >= 4) {
1383  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1384  &ARM::QQPRRegClass);
1385  BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
1386  .addReg(ARM::R4)
1387  .addImm(16)
1389  .add(predOps(ARMCC::AL));
1390  NextReg += 4;
1391  NumAlignedDPRCS2Regs -= 4;
1392  }
1393 
1394  // 16-byte aligned vld1.64 with 2 d-regs.
1395  if (NumAlignedDPRCS2Regs >= 2) {
1396  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1397  &ARM::QPRRegClass);
1398  BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
1399  .addReg(ARM::R4)
1400  .addImm(16)
1401  .add(predOps(ARMCC::AL));
1402  NextReg += 2;
1403  NumAlignedDPRCS2Regs -= 2;
1404  }
1405 
1406  // Finally, use a vanilla vldr.64 for the remaining odd register.
1407  if (NumAlignedDPRCS2Regs)
1408  BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
1409  .addReg(ARM::R4)
1410  .addImm(2 * (NextReg - R4BaseReg))
1411  .add(predOps(ARMCC::AL));
1412 
1413  // Last store kills r4.
1414  std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1415 }
1416 
1419  const std::vector<CalleeSavedInfo> &CSI,
1420  const TargetRegisterInfo *TRI) const {
1421  if (CSI.empty())
1422  return false;
1423 
1424  MachineFunction &MF = *MBB.getParent();
1426 
1427  unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
1428  unsigned PushOneOpc = AFI->isThumbFunction() ?
1429  ARM::t2STR_PRE : ARM::STR_PRE_IMM;
1430  unsigned FltOpc = ARM::VSTMDDB_UPD;
1431  unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1432  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,
1434  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,
1436  emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
1437  NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
1438 
1439  // The code above does not insert spill code for the aligned DPRCS2 registers.
1440  // The stack realignment code will be inserted between the push instructions
1441  // and these spills.
1442  if (NumAlignedDPRCS2Regs)
1443  emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
1444 
1445  return true;
1446 }
1447 
1450  std::vector<CalleeSavedInfo> &CSI,
1451  const TargetRegisterInfo *TRI) const {
1452  if (CSI.empty())
1453  return false;
1454 
1455  MachineFunction &MF = *MBB.getParent();
1457  bool isVarArg = AFI->getArgRegsSaveSize() > 0;
1458  unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1459 
1460  // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
1461  // registers. Do that here instead.
1462  if (NumAlignedDPRCS2Regs)
1463  emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
1464 
1465  unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
1466  unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
1467  unsigned FltOpc = ARM::VLDMDIA_UPD;
1468  emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
1469  NumAlignedDPRCS2Regs);
1470  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1471  &isARMArea2Register, 0);
1472  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1473  &isARMArea1Register, 0);
1474 
1475  return true;
1476 }
1477 
1478 // FIXME: Make generic?
1479 static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
1480  const ARMBaseInstrInfo &TII) {
1481  unsigned FnSize = 0;
1482  for (auto &MBB : MF) {
1483  for (auto &MI : MBB)
1484  FnSize += TII.getInstSizeInBytes(MI);
1485  }
1486  return FnSize;
1487 }
1488 
1489 /// estimateRSStackSizeLimit - Look at each instruction that references stack
1490 /// frames and return the stack size limit beyond which some of these
1491 /// instructions will require a scratch register during their expansion later.
1492 // FIXME: Move to TII?
1494  const TargetFrameLowering *TFI) {
1495  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1496  unsigned Limit = (1 << 12) - 1;
1497  for (auto &MBB : MF) {
1498  for (auto &MI : MBB) {
1499  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1500  if (!MI.getOperand(i).isFI())
1501  continue;
1502 
1503  // When using ADDri to get the address of a stack object, 255 is the
1504  // largest offset guaranteed to fit in the immediate offset.
1505  if (MI.getOpcode() == ARM::ADDri) {
1506  Limit = std::min(Limit, (1U << 8) - 1);
1507  break;
1508  }
1509 
1510  // Otherwise check the addressing mode.
1511  switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
1512  case ARMII::AddrMode3:
1513  case ARMII::AddrModeT2_i8:
1514  Limit = std::min(Limit, (1U << 8) - 1);
1515  break;
1516  case ARMII::AddrMode5:
1519  Limit = std::min(Limit, ((1U << 8) - 1) * 4);
1520  break;
1521  case ARMII::AddrModeT2_i12:
1522  // i12 supports only positive offset so these will be converted to
1523  // i8 opcodes. See llvm::rewriteT2FrameIndex.
1524  if (TFI->hasFP(MF) && AFI->hasStackFrame())
1525  Limit = std::min(Limit, (1U << 8) - 1);
1526  break;
1527  case ARMII::AddrMode4:
1528  case ARMII::AddrMode6:
1529  // Addressing modes 4 & 6 (load/store) instructions can't encode an
1530  // immediate offset for stack references.
1531  return 0;
1532  default:
1533  break;
1534  }
1535  break; // At most one FI per instruction
1536  }
1537  }
1538  }
1539 
1540  return Limit;
1541 }
1542 
1543 // In functions that realign the stack, it can be an advantage to spill the
1544 // callee-saved vector registers after realigning the stack. The vst1 and vld1
1545 // instructions take alignment hints that can improve performance.
1546 static void
1548  MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
1549  if (!SpillAlignedNEONRegs)
1550  return;
1551 
1552  // Naked functions don't spill callee-saved registers.
1553  if (MF.getFunction().hasFnAttribute(Attribute::Naked))
1554  return;
1555 
1556  // We are planning to use NEON instructions vst1 / vld1.
1557  if (!static_cast<const ARMSubtarget &>(MF.getSubtarget()).hasNEON())
1558  return;
1559 
1560  // Don't bother if the default stack alignment is sufficiently high.
1561  if (MF.getSubtarget().getFrameLowering()->getStackAlignment() >= 8)
1562  return;
1563 
1564  // Aligned spills require stack realignment.
1565  if (!static_cast<const ARMBaseRegisterInfo *>(
1566  MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))
1567  return;
1568 
1569  // We always spill contiguous d-registers starting from d8. Count how many
1570  // needs spilling. The register allocator will almost always use the
1571  // callee-saved registers in order, but it can happen that there are holes in
1572  // the range. Registers above the hole will be spilled to the standard DPRCS
1573  // area.
1574  unsigned NumSpills = 0;
1575  for (; NumSpills < 8; ++NumSpills)
1576  if (!SavedRegs.test(ARM::D8 + NumSpills))
1577  break;
1578 
1579  // Don't do this for just one d-register. It's not worth it.
1580  if (NumSpills < 2)
1581  return;
1582 
1583  // Spill the first NumSpills D-registers after realigning the stack.
1584  MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
1585 
1586  // A scratch register is required for the vst1 / vld1 instructions.
1587  SavedRegs.set(ARM::R4);
1588 }
1589 
1591  BitVector &SavedRegs,
1592  RegScavenger *RS) const {
1593  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1594  // This tells PEI to spill the FP as if it is any other callee-save register
1595  // to take advantage the eliminateFrameIndex machinery. This also ensures it
1596  // is spilled in the order specified by getCalleeSavedRegs() to make it easier
1597  // to combine multiple loads / stores.
1598  bool CanEliminateFrame = true;
1599  bool CS1Spilled = false;
1600  bool LRSpilled = false;
1601  unsigned NumGPRSpills = 0;
1602  unsigned NumFPRSpills = 0;
1603  SmallVector<unsigned, 4> UnspilledCS1GPRs;
1604  SmallVector<unsigned, 4> UnspilledCS2GPRs;
1605  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1606  MF.getSubtarget().getRegisterInfo());
1607  const ARMBaseInstrInfo &TII =
1608  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1610  MachineFrameInfo &MFI = MF.getFrameInfo();
1613  (void)TRI; // Silence unused warning in non-assert builds.
1614  unsigned FramePtr = RegInfo->getFrameRegister(MF);
1615 
1616  // Spill R4 if Thumb2 function requires stack realignment - it will be used as
1617  // scratch register. Also spill R4 if Thumb2 function has varsized objects,
1618  // since it's not always possible to restore sp from fp in a single
1619  // instruction.
1620  // FIXME: It will be better just to find spare register here.
1621  if (AFI->isThumb2Function() &&
1622  (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
1623  SavedRegs.set(ARM::R4);
1624 
1625  // If a stack probe will be emitted, spill R4 and LR, since they are
1626  // clobbered by the stack probe call.
1627  // This estimate should be a safe, conservative estimate. The actual
1628  // stack probe is enabled based on the size of the local objects;
1629  // this estimate also includes the varargs store size.
1630  if (STI.isTargetWindows() &&
1631  WindowsRequiresStackProbe(MF, MFI.estimateStackSize(MF))) {
1632  SavedRegs.set(ARM::R4);
1633  SavedRegs.set(ARM::LR);
1634  }
1635 
1636  if (AFI->isThumb1OnlyFunction()) {
1637  // Spill LR if Thumb1 function uses variable length argument lists.
1638  if (AFI->getArgRegsSaveSize() > 0)
1639  SavedRegs.set(ARM::LR);
1640 
1641  // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
1642  // requires stack alignment. We don't know for sure what the stack size
1643  // will be, but for this, an estimate is good enough. If there anything
1644  // changes it, it'll be a spill, which implies we've used all the registers
1645  // and so R4 is already used, so not marking it here will be OK.
1646  // FIXME: It will be better just to find spare register here.
1647  if (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF) ||
1648  MFI.estimateStackSize(MF) > 508)
1649  SavedRegs.set(ARM::R4);
1650  }
1651 
1652  // See if we can spill vector registers to aligned stack.
1653  checkNumAlignedDPRCS2Regs(MF, SavedRegs);
1654 
1655  // Spill the BasePtr if it's used.
1656  if (RegInfo->hasBasePointer(MF))
1657  SavedRegs.set(RegInfo->getBaseRegister());
1658 
1659  // Don't spill FP if the frame can be eliminated. This is determined
1660  // by scanning the callee-save registers to see if any is modified.
1661  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
1662  for (unsigned i = 0; CSRegs[i]; ++i) {
1663  unsigned Reg = CSRegs[i];
1664  bool Spilled = false;
1665  if (SavedRegs.test(Reg)) {
1666  Spilled = true;
1667  CanEliminateFrame = false;
1668  }
1669 
1670  if (!ARM::GPRRegClass.contains(Reg)) {
1671  if (Spilled) {
1672  if (ARM::SPRRegClass.contains(Reg))
1673  NumFPRSpills++;
1674  else if (ARM::DPRRegClass.contains(Reg))
1675  NumFPRSpills += 2;
1676  else if (ARM::QPRRegClass.contains(Reg))
1677  NumFPRSpills += 4;
1678  }
1679  continue;
1680  }
1681 
1682  if (Spilled) {
1683  NumGPRSpills++;
1684 
1685  if (!STI.splitFramePushPop(MF)) {
1686  if (Reg == ARM::LR)
1687  LRSpilled = true;
1688  CS1Spilled = true;
1689  continue;
1690  }
1691 
1692  // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
1693  switch (Reg) {
1694  case ARM::LR:
1695  LRSpilled = true;
1697  case ARM::R0: case ARM::R1:
1698  case ARM::R2: case ARM::R3:
1699  case ARM::R4: case ARM::R5:
1700  case ARM::R6: case ARM::R7:
1701  CS1Spilled = true;
1702  break;
1703  default:
1704  break;
1705  }
1706  } else {
1707  if (!STI.splitFramePushPop(MF)) {
1708  UnspilledCS1GPRs.push_back(Reg);
1709  continue;
1710  }
1711 
1712  switch (Reg) {
1713  case ARM::R0: case ARM::R1:
1714  case ARM::R2: case ARM::R3:
1715  case ARM::R4: case ARM::R5:
1716  case ARM::R6: case ARM::R7:
1717  case ARM::LR:
1718  UnspilledCS1GPRs.push_back(Reg);
1719  break;
1720  default:
1721  UnspilledCS2GPRs.push_back(Reg);
1722  break;
1723  }
1724  }
1725  }
1726 
1727  bool ForceLRSpill = false;
1728  if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
1729  unsigned FnSize = GetFunctionSizeInBytes(MF, TII);
1730  // Force LR to be spilled if the Thumb function size is > 2048. This enables
1731  // use of BL to implement far jump. If it turns out that it's not needed
1732  // then the branch fix up path will undo it.
1733  if (FnSize >= (1 << 11)) {
1734  CanEliminateFrame = false;
1735  ForceLRSpill = true;
1736  }
1737  }
1738 
1739  // If any of the stack slot references may be out of range of an immediate
1740  // offset, make sure a register (or a spill slot) is available for the
1741  // register scavenger. Note that if we're indexing off the frame pointer, the
1742  // effective stack size is 4 bytes larger since the FP points to the stack
1743  // slot of the previous FP. Also, if we have variable sized objects in the
1744  // function, stack slot references will often be negative, and some of
1745  // our instructions are positive-offset only, so conservatively consider
1746  // that case to want a spill slot (or register) as well. Similarly, if
1747  // the function adjusts the stack pointer during execution and the
1748  // adjustments aren't already part of our stack size estimate, our offset
1749  // calculations may be off, so be conservative.
1750  // FIXME: We could add logic to be more precise about negative offsets
1751  // and which instructions will need a scratch register for them. Is it
1752  // worth the effort and added fragility?
1753  unsigned EstimatedStackSize =
1754  MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
1755 
1756  // Determine biggest (positive) SP offset in MachineFrameInfo.
1757  int MaxFixedOffset = 0;
1758  for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
1759  int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
1760  MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
1761  }
1762 
1763  bool HasFP = hasFP(MF);
1764  if (HasFP) {
1765  if (AFI->hasStackFrame())
1766  EstimatedStackSize += 4;
1767  } else {
1768  // If FP is not used, SP will be used to access arguments, so count the
1769  // size of arguments into the estimation.
1770  EstimatedStackSize += MaxFixedOffset;
1771  }
1772  EstimatedStackSize += 16; // For possible paddings.
1773 
1774  unsigned EstimatedRSStackSizeLimit = estimateRSStackSizeLimit(MF, this);
1775  int MaxFPOffset = getMaxFPOffset(MF.getFunction(), *AFI);
1776  bool BigFrameOffsets = EstimatedStackSize >= EstimatedRSStackSizeLimit ||
1777  MFI.hasVarSizedObjects() ||
1778  (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF)) ||
1779  // For large argument stacks fp relative addressed may overflow.
1780  (HasFP && (MaxFixedOffset - MaxFPOffset) >= (int)EstimatedRSStackSizeLimit);
1781  if (BigFrameOffsets ||
1782  !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
1783  AFI->setHasStackFrame(true);
1784 
1785  if (HasFP) {
1786  SavedRegs.set(FramePtr);
1787  // If the frame pointer is required by the ABI, also spill LR so that we
1788  // emit a complete frame record.
1789  if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) {
1790  SavedRegs.set(ARM::LR);
1791  LRSpilled = true;
1792  NumGPRSpills++;
1793  auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
1794  if (LRPos != UnspilledCS1GPRs.end())
1795  UnspilledCS1GPRs.erase(LRPos);
1796  }
1797  auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);
1798  if (FPPos != UnspilledCS1GPRs.end())
1799  UnspilledCS1GPRs.erase(FPPos);
1800  NumGPRSpills++;
1801  if (FramePtr == ARM::R7)
1802  CS1Spilled = true;
1803  }
1804 
1805  // This is true when we inserted a spill for an unused register that can now
1806  // be used for register scavenging.
1807  bool ExtraCSSpill = false;
1808 
1809  if (AFI->isThumb1OnlyFunction()) {
1810  // For Thumb1-only targets, we need some low registers when we save and
1811  // restore the high registers (which aren't allocatable, but could be
1812  // used by inline assembly) because the push/pop instructions can not
1813  // access high registers. If necessary, we might need to push more low
1814  // registers to ensure that there is at least one free that can be used
1815  // for the saving & restoring, and preferably we should ensure that as
1816  // many as are needed are available so that fewer push/pop instructions
1817  // are required.
1818 
1819  // Low registers which are not currently pushed, but could be (r4-r7).
1820  SmallVector<unsigned, 4> AvailableRegs;
1821 
1822  // Unused argument registers (r0-r3) can be clobbered in the prologue for
1823  // free.
1824  int EntryRegDeficit = 0;
1825  for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
1826  if (!MF.getRegInfo().isLiveIn(Reg)) {
1827  --EntryRegDeficit;
1828  LLVM_DEBUG(dbgs()
1829  << printReg(Reg, TRI)
1830  << " is unused argument register, EntryRegDeficit = "
1831  << EntryRegDeficit << "\n");
1832  }
1833  }
1834 
1835  // Unused return registers can be clobbered in the epilogue for free.
1836  int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
1837  LLVM_DEBUG(dbgs() << AFI->getReturnRegsCount()
1838  << " return regs used, ExitRegDeficit = "
1839  << ExitRegDeficit << "\n");
1840 
1841  int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);
1842  LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
1843 
1844  // r4-r6 can be used in the prologue if they are pushed by the first push
1845  // instruction.
1846  for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
1847  if (SavedRegs.test(Reg)) {
1848  --RegDeficit;
1849  LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
1850  << " is saved low register, RegDeficit = "
1851  << RegDeficit << "\n");
1852  } else {
1853  AvailableRegs.push_back(Reg);
1854  LLVM_DEBUG(
1855  dbgs()
1856  << printReg(Reg, TRI)
1857  << " is non-saved low register, adding to AvailableRegs\n");
1858  }
1859  }
1860 
1861  // r7 can be used if it is not being used as the frame pointer.
1862  if (!HasFP) {
1863  if (SavedRegs.test(ARM::R7)) {
1864  --RegDeficit;
1865  LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
1866  << RegDeficit << "\n");
1867  } else {
1868  AvailableRegs.push_back(ARM::R7);
1869  LLVM_DEBUG(
1870  dbgs()
1871  << "%r7 is non-saved low register, adding to AvailableRegs\n");
1872  }
1873  }
1874 
1875  // Each of r8-r11 needs to be copied to a low register, then pushed.
1876  for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
1877  if (SavedRegs.test(Reg)) {
1878  ++RegDeficit;
1879  LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
1880  << " is saved high register, RegDeficit = "
1881  << RegDeficit << "\n");
1882  }
1883  }
1884 
1885  // LR can only be used by PUSH, not POP, and can't be used at all if the
1886  // llvm.returnaddress intrinsic is used. This is only worth doing if we
1887  // are more limited at function entry than exit.
1888  if ((EntryRegDeficit > ExitRegDeficit) &&
1889  !(MF.getRegInfo().isLiveIn(ARM::LR) &&
1891  if (SavedRegs.test(ARM::LR)) {
1892  --RegDeficit;
1893  LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
1894  << RegDeficit << "\n");
1895  } else {
1896  AvailableRegs.push_back(ARM::LR);
1897  LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
1898  }
1899  }
1900 
1901  // If there are more high registers that need pushing than low registers
1902  // available, push some more low registers so that we can use fewer push
1903  // instructions. This might not reduce RegDeficit all the way to zero,
1904  // because we can only guarantee that r4-r6 are available, but r8-r11 may
1905  // need saving.
1906  LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
1907  for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
1908  unsigned Reg = AvailableRegs.pop_back_val();
1909  LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
1910  << " to make up reg deficit\n");
1911  SavedRegs.set(Reg);
1912  NumGPRSpills++;
1913  CS1Spilled = true;
1914  assert(!MRI.isReserved(Reg) && "Should not be reserved");
1915  if (!MRI.isPhysRegUsed(Reg))
1916  ExtraCSSpill = true;
1917  UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
1918  if (Reg == ARM::LR)
1919  LRSpilled = true;
1920  }
1921  LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
1922  << "\n");
1923  }
1924 
1925  // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
1926  // restore LR in that case.
1927  bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
1928 
1929  // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
1930  // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
1931  if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
1932  SavedRegs.set(ARM::LR);
1933  NumGPRSpills++;
1935  LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
1936  if (LRPos != UnspilledCS1GPRs.end())
1937  UnspilledCS1GPRs.erase(LRPos);
1938 
1939  ForceLRSpill = false;
1940  if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR))
1941  ExtraCSSpill = true;
1942  }
1943 
1944  // If stack and double are 8-byte aligned and we are spilling an odd number
1945  // of GPRs, spill one extra callee save GPR so we won't have to pad between
1946  // the integer and double callee save areas.
1947  LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
1948  unsigned TargetAlign = getStackAlignment();
1949  if (TargetAlign >= 8 && (NumGPRSpills & 1)) {
1950  if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
1951  for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
1952  unsigned Reg = UnspilledCS1GPRs[i];
1953  // Don't spill high register if the function is thumb. In the case of
1954  // Windows on ARM, accept R11 (frame pointer)
1955  if (!AFI->isThumbFunction() ||
1956  (STI.isTargetWindows() && Reg == ARM::R11) ||
1957  isARMLowRegister(Reg) ||
1958  (Reg == ARM::LR && !ExpensiveLRRestore)) {
1959  SavedRegs.set(Reg);
1960  LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
1961  << " to make up alignment\n");
1962  if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
1963  ExtraCSSpill = true;
1964  break;
1965  }
1966  }
1967  } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
1968  unsigned Reg = UnspilledCS2GPRs.front();
1969  SavedRegs.set(Reg);
1970  LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
1971  << " to make up alignment\n");
1972  if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
1973  ExtraCSSpill = true;
1974  }
1975  }
1976 
1977  // Estimate if we might need to scavenge a register at some point in order
1978  // to materialize a stack offset. If so, either spill one additional
1979  // callee-saved register or reserve a special spill slot to facilitate
1980  // register scavenging. Thumb1 needs a spill slot for stack pointer
1981  // adjustments also, even when the frame itself is small.
1982  if (BigFrameOffsets && !ExtraCSSpill) {
1983  // If any non-reserved CS register isn't spilled, just spill one or two
1984  // extra. That should take care of it!
1985  unsigned NumExtras = TargetAlign / 4;
1986  SmallVector<unsigned, 2> Extras;
1987  while (NumExtras && !UnspilledCS1GPRs.empty()) {
1988  unsigned Reg = UnspilledCS1GPRs.back();
1989  UnspilledCS1GPRs.pop_back();
1990  if (!MRI.isReserved(Reg) &&
1991  (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
1992  Reg == ARM::LR)) {
1993  Extras.push_back(Reg);
1994  NumExtras--;
1995  }
1996  }
1997  // For non-Thumb1 functions, also check for hi-reg CS registers
1998  if (!AFI->isThumb1OnlyFunction()) {
1999  while (NumExtras && !UnspilledCS2GPRs.empty()) {
2000  unsigned Reg = UnspilledCS2GPRs.back();
2001  UnspilledCS2GPRs.pop_back();
2002  if (!MRI.isReserved(Reg)) {
2003  Extras.push_back(Reg);
2004  NumExtras--;
2005  }
2006  }
2007  }
2008  if (NumExtras == 0) {
2009  for (unsigned Reg : Extras) {
2010  SavedRegs.set(Reg);
2011  if (!MRI.isPhysRegUsed(Reg))
2012  ExtraCSSpill = true;
2013  }
2014  }
2015  if (!ExtraCSSpill && !AFI->isThumb1OnlyFunction()) {
2016  // note: Thumb1 functions spill to R12, not the stack. Reserve a slot
2017  // closest to SP or frame pointer.
2018  assert(RS && "Register scavenging not provided");
2019  const TargetRegisterClass &RC = ARM::GPRRegClass;
2020  unsigned Size = TRI->getSpillSize(RC);
2021  unsigned Align = TRI->getSpillAlignment(RC);
2022  RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2023  }
2024  }
2025  }
2026 
2027  if (ForceLRSpill) {
2028  SavedRegs.set(ARM::LR);
2029  AFI->setLRIsSpilledForFarJump(true);
2030  }
2031 }
2032 
2033 MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
2036  const ARMBaseInstrInfo &TII =
2037  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2038  if (!hasReservedCallFrame(MF)) {
2039  // If we have alloca, convert as follows:
2040  // ADJCALLSTACKDOWN -> sub, sp, sp, amount
2041  // ADJCALLSTACKUP -> add, sp, sp, amount
2042  MachineInstr &Old = *I;
2043  DebugLoc dl = Old.getDebugLoc();
2044  unsigned Amount = TII.getFrameSize(Old);
2045  if (Amount != 0) {
2046  // We need to keep the stack aligned properly. To do this, we round the
2047  // amount of space needed for the outgoing arguments up to the next
2048  // alignment boundary.
2049  Amount = alignSPAdjust(Amount);
2050 
2052  assert(!AFI->isThumb1OnlyFunction() &&
2053  "This eliminateCallFramePseudoInstr does not support Thumb1!");
2054  bool isARM = !AFI->isThumbFunction();
2055 
2056  // Replace the pseudo instruction with a new instruction...
2057  unsigned Opc = Old.getOpcode();
2058  int PIdx = Old.findFirstPredOperandIdx();
2059  ARMCC::CondCodes Pred =
2060  (PIdx == -1) ? ARMCC::AL
2061  : (ARMCC::CondCodes)Old.getOperand(PIdx).getImm();
2062  unsigned PredReg = TII.getFramePred(Old);
2063  if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
2064  emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
2065  Pred, PredReg);
2066  } else {
2067  assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
2068  emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
2069  Pred, PredReg);
2070  }
2071  }
2072  }
2073  return MBB.erase(I);
2074 }
2075 
2076 /// Get the minimum constant for ARM that is greater than or equal to the
2077 /// argument. In ARM, constants can have any value that can be produced by
2078 /// rotating an 8-bit value to the right by an even number of bits within a
2079 /// 32-bit word.
2081  unsigned Shifted = 0;
2082 
2083  if (Value == 0)
2084  return 0;
2085 
2086  while (!(Value & 0xC0000000)) {
2087  Value = Value << 2;
2088  Shifted += 2;
2089  }
2090 
2091  bool Carry = (Value & 0x00FFFFFF);
2092  Value = ((Value & 0xFF000000) >> 24) + Carry;
2093 
2094  if (Value & 0x0000100)
2095  Value = Value & 0x000001FC;
2096 
2097  if (Shifted > 24)
2098  Value = Value >> (Shifted - 24);
2099  else
2100  Value = Value << (24 - Shifted);
2101 
2102  return Value;
2103 }
2104 
2105 // The stack limit in the TCB is set to this many bytes above the actual
2106 // stack limit.
2107 static const uint64_t kSplitStackAvailable = 256;
2108 
2109 // Adjust the function prologue to enable split stacks. This currently only
2110 // supports android and linux.
2111 //
2112 // The ABI of the segmented stack prologue is a little arbitrarily chosen, but
2113 // must be well defined in order to allow for consistent implementations of the
2114 // __morestack helper function. The ABI is also not a normal ABI in that it
2115 // doesn't follow the normal calling conventions because this allows the
2116 // prologue of each function to be optimized further.
2117 //
2118 // Currently, the ABI looks like (when calling __morestack)
2119 //
2120 // * r4 holds the minimum stack size requested for this function call
2121 // * r5 holds the stack size of the arguments to the function
2122 // * the beginning of the function is 3 instructions after the call to
2123 // __morestack
2124 //
2125 // Implementations of __morestack should use r4 to allocate a new stack, r5 to
2126 // place the arguments on to the new stack, and the 3-instruction knowledge to
2127 // jump directly to the body of the function when working on the new stack.
2128 //
2129 // An old (and possibly no longer compatible) implementation of __morestack for
2130 // ARM can be found at [1].
2131 //
2132 // [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
2134  MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
2135  unsigned Opcode;
2136  unsigned CFIIndex;
2137  const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
2138  bool Thumb = ST->isThumb();
2139 
2140  // Sadly, this currently doesn't support varargs, platforms other than
2141  // android/linux. Note that thumb1/thumb2 are support for android/linux.
2142  if (MF.getFunction().isVarArg())
2143  report_fatal_error("Segmented stacks do not support vararg functions.");
2144  if (!ST->isTargetAndroid() && !ST->isTargetLinux())
2145  report_fatal_error("Segmented stacks not supported on this platform.");
2146 
2147  MachineFrameInfo &MFI = MF.getFrameInfo();
2148  MachineModuleInfo &MMI = MF.getMMI();
2149  MCContext &Context = MMI.getContext();
2150  const MCRegisterInfo *MRI = Context.getRegisterInfo();
2151  const ARMBaseInstrInfo &TII =
2152  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2153  ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
2154  DebugLoc DL;
2155 
2156  uint64_t StackSize = MFI.getStackSize();
2157 
2158  // Do not generate a prologue for leaf functions with a stack of size zero.
2159  // For non-leaf functions we have to allow for the possibility that the
2160  // call is to a non-split function, as in PR37807.
2161  if (StackSize == 0 && !MFI.hasTailCall())
2162  return;
2163 
2164  // Use R4 and R5 as scratch registers.
2165  // We save R4 and R5 before use and restore them before leaving the function.
2166  unsigned ScratchReg0 = ARM::R4;
2167  unsigned ScratchReg1 = ARM::R5;
2168  uint64_t AlignedStackSize;
2169 
2170  MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
2171  MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
2172  MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock();
2175 
2176  // Grab everything that reaches PrologueMBB to update there liveness as well.
2177  SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
2179  WalkList.push_back(&PrologueMBB);
2180 
2181  do {
2182  MachineBasicBlock *CurMBB = WalkList.pop_back_val();
2183  for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
2184  if (BeforePrologueRegion.insert(PredBB).second)
2185  WalkList.push_back(PredBB);
2186  }
2187  } while (!WalkList.empty());
2188 
2189  // The order in that list is important.
2190  // The blocks will all be inserted before PrologueMBB using that order.
2191  // Therefore the block that should appear first in the CFG should appear
2192  // first in the list.
2193  MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
2194  PostStackMBB};
2195 
2196  for (MachineBasicBlock *B : AddedBlocks)
2197  BeforePrologueRegion.insert(B);
2198 
2199  for (const auto &LI : PrologueMBB.liveins()) {
2200  for (MachineBasicBlock *PredBB : BeforePrologueRegion)
2201  PredBB->addLiveIn(LI);
2202  }
2203 
2204  // Remove the newly added blocks from the list, since we know
2205  // we do not have to do the following updates for them.
2206  for (MachineBasicBlock *B : AddedBlocks) {
2207  BeforePrologueRegion.erase(B);
2208  MF.insert(PrologueMBB.getIterator(), B);
2209  }
2210 
2211  for (MachineBasicBlock *MBB : BeforePrologueRegion) {
2212  // Make sure the LiveIns are still sorted and unique.
2213  MBB->sortUniqueLiveIns();
2214  // Replace the edges to PrologueMBB by edges to the sequences
2215  // we are about to add.
2216  MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
2217  }
2218 
2219  // The required stack size that is aligned to ARM constant criterion.
2220  AlignedStackSize = alignToARMConstant(StackSize);
2221 
2222  // When the frame size is less than 256 we just compare the stack
2223  // boundary directly to the value of the stack pointer, per gcc.
2224  bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
2225 
2226  // We will use two of the callee save registers as scratch registers so we
2227  // need to save those registers onto the stack.
2228  // We will use SR0 to hold stack limit and SR1 to hold the stack size
2229  // requested and arguments for __morestack().
2230  // SR0: Scratch Register #0
2231  // SR1: Scratch Register #1
2232  // push {SR0, SR1}
2233  if (Thumb) {
2234  BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
2235  .add(predOps(ARMCC::AL))
2236  .addReg(ScratchReg0)
2237  .addReg(ScratchReg1);
2238  } else {
2239  BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
2240  .addReg(ARM::SP, RegState::Define)
2241  .addReg(ARM::SP)
2242  .add(predOps(ARMCC::AL))
2243  .addReg(ScratchReg0)
2244  .addReg(ScratchReg1);
2245  }
2246 
2247  // Emit the relevant DWARF information about the change in stack pointer as
2248  // well as where to find both r4 and r5 (the callee-save registers)
2249  CFIIndex =
2251  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2252  .addCFIIndex(CFIIndex);
2254  nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
2255  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2256  .addCFIIndex(CFIIndex);
2258  nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
2259  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2260  .addCFIIndex(CFIIndex);
2261 
2262  // mov SR1, sp
2263  if (Thumb) {
2264  BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
2265  .addReg(ARM::SP)
2266  .add(predOps(ARMCC::AL));
2267  } else if (CompareStackPointer) {
2268  BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
2269  .addReg(ARM::SP)
2270  .add(predOps(ARMCC::AL))
2271  .add(condCodeOp());
2272  }
2273 
2274  // sub SR1, sp, #StackSize
2275  if (!CompareStackPointer && Thumb) {
2276  BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
2277  .add(condCodeOp())
2278  .addReg(ScratchReg1)
2279  .addImm(AlignedStackSize)
2280  .add(predOps(ARMCC::AL));
2281  } else if (!CompareStackPointer) {
2282  BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
2283  .addReg(ARM::SP)
2284  .addImm(AlignedStackSize)
2285  .add(predOps(ARMCC::AL))
2286  .add(condCodeOp());
2287  }
2288 
2289  if (Thumb && ST->isThumb1Only()) {
2290  unsigned PCLabelId = ARMFI->createPICLabelUId();
2292  MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0);
2294  unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4);
2295 
2296  // ldr SR0, [pc, offset(STACK_LIMIT)]
2297  BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
2298  .addConstantPoolIndex(CPI)
2299  .add(predOps(ARMCC::AL));
2300 
2301  // ldr SR0, [SR0]
2302  BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
2303  .addReg(ScratchReg0)
2304  .addImm(0)
2305  .add(predOps(ARMCC::AL));
2306  } else {
2307  // Get TLS base address from the coprocessor
2308  // mrc p15, #0, SR0, c13, c0, #3
2309  BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0)
2310  .addImm(15)
2311  .addImm(0)
2312  .addImm(13)
2313  .addImm(0)
2314  .addImm(3)
2315  .add(predOps(ARMCC::AL));
2316 
2317  // Use the last tls slot on android and a private field of the TCP on linux.
2318  assert(ST->isTargetAndroid() || ST->isTargetLinux());
2319  unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
2320 
2321  // Get the stack limit from the right offset
2322  // ldr SR0, [sr0, #4 * TlsOffset]
2323  BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0)
2324  .addReg(ScratchReg0)
2325  .addImm(4 * TlsOffset)
2326  .add(predOps(ARMCC::AL));
2327  }
2328 
2329  // Compare stack limit with stack size requested.
2330  // cmp SR0, SR1
2331  Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
2332  BuildMI(GetMBB, DL, TII.get(Opcode))
2333  .addReg(ScratchReg0)
2334  .addReg(ScratchReg1)
2335  .add(predOps(ARMCC::AL));
2336 
2337  // This jump is taken if StackLimit < SP - stack required.
2338  Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
2339  BuildMI(GetMBB, DL, TII.get(Opcode)).addMBB(PostStackMBB)
2340  .addImm(ARMCC::LO)
2341  .addReg(ARM::CPSR);
2342 
2343 
2344  // Calling __morestack(StackSize, Size of stack arguments).
2345  // __morestack knows that the stack size requested is in SR0(r4)
2346  // and amount size of stack arguments is in SR1(r5).
2347 
2348  // Pass first argument for the __morestack by Scratch Register #0.
2349  // The amount size of stack required
2350  if (Thumb) {
2351  BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
2352  .add(condCodeOp())
2353  .addImm(AlignedStackSize)
2354  .add(predOps(ARMCC::AL));
2355  } else {
2356  BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
2357  .addImm(AlignedStackSize)
2358  .add(predOps(ARMCC::AL))
2359  .add(condCodeOp());
2360  }
2361  // Pass second argument for the __morestack by Scratch Register #1.
2362  // The amount size of stack consumed to save function arguments.
2363  if (Thumb) {
2364  BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
2365  .add(condCodeOp())
2366  .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
2367  .add(predOps(ARMCC::AL));
2368  } else {
2369  BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
2370  .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
2371  .add(predOps(ARMCC::AL))
2372  .add(condCodeOp());
2373  }
2374 
2375  // push {lr} - Save return address of this function.
2376  if (Thumb) {
2377  BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
2378  .add(predOps(ARMCC::AL))
2379  .addReg(ARM::LR);
2380  } else {
2381  BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
2382  .addReg(ARM::SP, RegState::Define)
2383  .addReg(ARM::SP)
2384  .add(predOps(ARMCC::AL))
2385  .addReg(ARM::LR);
2386  }
2387 
2388  // Emit the DWARF info about the change in stack as well as where to find the
2389  // previous link register
2390  CFIIndex =
2392  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2393  .addCFIIndex(CFIIndex);
2395  nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
2396  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2397  .addCFIIndex(CFIIndex);
2398 
2399  // Call __morestack().
2400  if (Thumb) {
2401  BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
2402  .add(predOps(ARMCC::AL))
2403  .addExternalSymbol("__morestack");
2404  } else {
2405  BuildMI(AllocMBB, DL, TII.get(ARM::BL))
2406  .addExternalSymbol("__morestack");
2407  }
2408 
2409  // pop {lr} - Restore return address of this original function.
2410  if (Thumb) {
2411  if (ST->isThumb1Only()) {
2412  BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
2413  .add(predOps(ARMCC::AL))
2414  .addReg(ScratchReg0);
2415  BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
2416  .addReg(ScratchReg0)
2417  .add(predOps(ARMCC::AL));
2418  } else {
2419  BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
2420  .addReg(ARM::LR, RegState::Define)
2421  .addReg(ARM::SP, RegState::Define)
2422  .addReg(ARM::SP)
2423  .addImm(4)
2424  .add(predOps(ARMCC::AL));
2425  }
2426  } else {
2427  BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
2428  .addReg(ARM::SP, RegState::Define)
2429  .addReg(ARM::SP)
2430  .add(predOps(ARMCC::AL))
2431  .addReg(ARM::LR);
2432  }
2433 
2434  // Restore SR0 and SR1 in case of __morestack() was called.
2435  // __morestack() will skip PostStackMBB block so we need to restore
2436  // scratch registers from here.
2437  // pop {SR0, SR1}
2438  if (Thumb) {
2439  BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
2440  .add(predOps(ARMCC::AL))
2441  .addReg(ScratchReg0)
2442  .addReg(ScratchReg1);
2443  } else {
2444  BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
2445  .addReg(ARM::SP, RegState::Define)
2446  .addReg(ARM::SP)
2447  .add(predOps(ARMCC::AL))
2448  .addReg(ScratchReg0)
2449  .addReg(ScratchReg1);
2450  }
2451 
2452  // Update the CFA offset now that we've popped
2453  CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
2454  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2455  .addCFIIndex(CFIIndex);
2456 
2457  // Return from this function.
2458  BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
2459 
2460  // Restore SR0 and SR1 in case of __morestack() was not called.
2461  // pop {SR0, SR1}
2462  if (Thumb) {
2463  BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
2464  .add(predOps(ARMCC::AL))
2465  .addReg(ScratchReg0)
2466  .addReg(ScratchReg1);
2467  } else {
2468  BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
2469  .addReg(ARM::SP, RegState::Define)
2470  .addReg(ARM::SP)
2471  .add(predOps(ARMCC::AL))
2472  .addReg(ScratchReg0)
2473  .addReg(ScratchReg1);
2474  }
2475 
2476  // Update the CFA offset now that we've popped
2477  CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
2478  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2479  .addCFIIndex(CFIIndex);
2480 
2481  // Tell debuggers that r4 and r5 are now the same as they were in the
2482  // previous function, that they're the "Same Value".
2484  nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
2485  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2486  .addCFIIndex(CFIIndex);
2488  nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
2489  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2490  .addCFIIndex(CFIIndex);
2491 
2492  // Organizing MBB lists
2493  PostStackMBB->addSuccessor(&PrologueMBB);
2494 
2495  AllocMBB->addSuccessor(PostStackMBB);
2496 
2497  GetMBB->addSuccessor(PostStackMBB);
2498  GetMBB->addSuccessor(AllocMBB);
2499 
2500  McrMBB->addSuccessor(GetMBB);
2501 
2502  PrevStackMBB->addSuccessor(McrMBB);
2503 
2504 #ifdef EXPENSIVE_CHECKS
2505  MF.verify();
2506 #endif
2507 }
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:177
const MachineInstrBuilder & add(const MachineOperand &MO) const
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BitVector & set()
Definition: BitVector.h:398
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
#define R4(n)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
LLVMContext & Context
int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate...
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
bool isThumb() const
Definition: ARMSubtarget.h:712
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static cl::opt< bool > SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), cl::desc("Align ARM NEON spills in prolog and epilog"))
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:383
ARMConstantPoolValue - ARM specific constantpool value.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getReg() const
getReg - Returns the register number.
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset)
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:487
unsigned Reg
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override
Adjust the prologue to have the function use segmented stacks.
bool hasV7Ops() const
Definition: ARMSubtarget.h:540
bool test(unsigned Idx) const
Definition: BitVector.h:502
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:321
unsigned second
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
F(f)
MachineModuleInfo & getMMI() const
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
bool isThumb1Only() const
Definition: ARMSubtarget.h:713
#define R2(n)
void setGPRCalleeSavedArea2Offset(unsigned o)
static uint32_t alignToARMConstant(uint32_t Value)
Get the minimum constant for ARM that is greater than or equal to the argument.
static MCCFIInstruction createDefCfaOffset(MCSymbol *L, int Offset)
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:474
static bool isThumb(const MCSubtargetInfo &STI)
unsigned getSpillSize(const TargetRegisterClass &RC) const
Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...
return AArch64::GPR64RegClass contains(Reg)
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:491
static const uint64_t kSplitStackAvailable
bool isTargetELF() const
Definition: ARMSubtarget.h:657
unsigned getSpillAlignment(const TargetRegisterClass &RC) const
Return the minimum required alignment in bytes for a spill slot for a register of this class...
void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Given a machine basic block that branched to &#39;Old&#39;, change the code and CFG so that it branches to &#39;N...
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
unsigned getFramePred(const MachineInstr &MI) const
Returns predicate register associated with the given frame instruction.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:412
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register...
unsigned getFrameRegister(const MachineFunction &MF) const override
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required, we reserve argument space for call sites in the function immediately on entry to the current function.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
This file contains the simple types necessary to represent the attributes associated with functions a...
static int sizeOfSPAdjustment(const MachineInstr &MI)
virtual bool noFramePointerElim(const MachineFunction &MF) const
Return true if the target needs to disable frame pointer elimination.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
int getDwarfRegNum(unsigned RegNum, bool isEH) const
Map a target register to an equivalent dwarf register number.
void setDPRCalleeSavedAreaOffset(unsigned o)
LLVM_NODISCARD unsigned addFrameInst(const MCCFIInstruction &Inst)
bool enableCalleeSaveSkip(const MachineFunction &MF) const override
Returns true if the target can safely skip saving callee-saved registers for noreturn nounwind functi...
unsigned getArgRegsSaveSize() const
void setFramePtrSpillOffset(unsigned o)
Context object for machine code objects.
Definition: MCContext.h:63
static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg, unsigned SrcReg, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
unsigned getDPRCalleeSavedAreaSize() const
bool splitFramePushPop(const MachineFunction &MF) const
Returns true if the frame setup is split into two separate pushes (first r0-r7,lr then r8-r11)...
Definition: ARMSubtarget.h:733
void setDPRCalleeSavedGapSize(unsigned s)
unsigned getNumAlignedDPRCS2Regs() const
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
const MCContext & getContext() const
int alignSPAdjust(int SPAdj) const
alignSPAdjust - This method aligns the stack adjustment to the correct alignment. ...
bool hasV6T2Ops() const
Definition: ARMSubtarget.h:539
virtual const TargetInstrInfo * getInstrInfo() const
static bool isARMArea1Register(unsigned Reg, bool isIOS)
isARMArea1Register - Returns true if the register is a low register (r0-r7) or a stack/pc register th...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1114
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
TargetInstrInfo - Interface to description of machine instruction set.
unsigned getDefRegState(bool B)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static MachineBasicBlock::iterator skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs)
Skip past the code inserted by emitAlignedDPRCS2Spills, and return an iterator to the following instr...
static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register)
.cfi_same_value Current value of Register is the same as in the previous frame.
Definition: MCDwarf.h:527
static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs)
int ResolveFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg, int SPAdj) const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:419
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
This file declares the machine register scavenger class.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:120
static MCCFIInstruction createDefCfa(MCSymbol *L, unsigned Register, int Offset)
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it...
Definition: MCDwarf.h:460
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool cannotEliminateFrame(const MachineFunction &MF) const
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register)
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:467
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Returns the size of the specified MachineInstr.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
unsigned getGPRCalleeSavedArea1Size() const
int getStackProtectorIndex() const
Return the index for the stack protector object.
unsigned getMaxAlignment() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
bool hasBasePointer(const MachineFunction &MF) const
void setGPRCalleeSavedArea2Size(unsigned s)
static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI)
We need the offset of the frame pointer relative to other MachineFrameInfo offsets which are encoded ...
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:756
unsigned getReturnRegsCount() const
static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI)
Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers starting from d8...
self_iterator getIterator()
Definition: ilist_node.h:82
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
iterator_range< pred_iterator > predecessors()
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1076
const ARMFrameLowering * getFrameLowering() const override
Definition: ARMSubtarget.h:499
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:194
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
iterator erase(const_iterator CI)
Definition: SmallVector.h:445
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
size_t size() const
Definition: SmallVector.h:53
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1069
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned first
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:978
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:497
int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override
getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.
unsigned getDPRCalleeSavedGapSize() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI)
Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers starting from d8...
#define R6(n)
void setGPRCalleeSavedArea1Size(unsigned s)
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call...
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:378
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
bool isTargetLinux() const
Definition: ARMSubtarget.h:651
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
static bool isARMArea2Register(unsigned Reg, bool isIOS)
bool isTargetAndroid() const
Definition: ARMSubtarget.h:698
Information about stack frame layout on the target.
static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, const TargetInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const unsigned Reg, const unsigned Alignment, const bool MustBeSingleInstruction)
Emit an instruction sequence that will align the address in register Reg by zero-ing out the lower bi...
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
void setOffsetAdjustment(int Adj)
Set the correction for frame offsets.
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
CodeModel::Model getCodeModel() const
Returns the code model.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
ARMFrameLowering(const ARMSubtarget &sti)
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
unsigned getReturnOpcode() const
Returns the correct return opcode for the current feature set.
Definition: ARMSubtarget.h:803
unsigned getArgumentStackSize() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
static unsigned estimateRSStackSizeLimit(MachineFunction &MF, const TargetFrameLowering *TFI)
estimateRSStackSizeLimit - Look at each instruction that references stack frames and return the stack...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
static bool isPopOpcode(int Opc)
bool isLiveIn(unsigned Reg) const
bool noFramePointerElim(const MachineFunction &MF) const override
Return true if the target needs to disable frame pointer elimination.
static bool isARMLowRegister(unsigned Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:161
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
uint16_t getEncodingValue(unsigned RegNo) const
Returns the encoding for RegNo.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
unsigned getGPRCalleeSavedArea2Size() const
void setGPRCalleeSavedArea1Offset(unsigned o)
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use...
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg...
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
bool isReturnAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
TargetOptions Options
Definition: TargetMachine.h:97
#define I(x, y, z)
Definition: MD5.cpp:58
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:503
virtual const TargetFrameLowering * getFrameLowering() const
void setDPRCalleeSavedAreaSize(unsigned s)
int getOffsetAdjustment() const
Return the correction for frame offsets.
uint32_t Size
Definition: Profile.cpp:47
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool hasV5TOps() const
Definition: ARMSubtarget.h:534
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
iterator_range< livein_iterator > liveins() const
static bool isARMArea3Register(unsigned Reg, bool isIOS)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
static bool WindowsRequiresStackProbe(const MachineFunction &MF, size_t StackSizeInBytes)
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:295
bool needsStackRealignment(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
LLVM Value Representation.
Definition: Value.h:73
static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, const ARMBaseInstrInfo &TII)
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
unsigned getFramePtrSpillOffset() const
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:331
bool isTargetWindows() const
Definition: ARMSubtarget.h:654
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
bool addRegisterKilled(unsigned IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
IRTranslator LLVM IR MI
static bool isCSRestore(MachineInstr &MI, const ARMBaseInstrInfo &TII, const MCPhysReg *CSRegs)
BitVector getPristineRegs(const MachineFunction &MF) const
Return a set of physical registers that are pristine.
const ARMSubtarget & STI
static void checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs)
#define LLVM_DEBUG(X)
Definition: Debug.h:123
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
unsigned getReg() const
bool hasTailCall() const
Returns true if the function contains a tail call.
void setObjectAlignment(int ObjectIdx, unsigned Align)
setObjectAlignment - Change the alignment of the specified stack object.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects...
unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one...
static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
bool isReserved(unsigned PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
This class contains meta information specific to a module.