LLVM  7.0.0svn
ARMFrameLowering.cpp
Go to the documentation of this file.
1 //===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the ARM implementation of TargetFrameLowering class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMFrameLowering.h"
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMBaseRegisterInfo.h"
17 #include "ARMConstantPoolValue.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMSubtarget.h"
22 #include "Utils/ARMBaseInfo.h"
23 #include "llvm/ADT/BitVector.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallVector.h"
41 #include "llvm/IR/Attributes.h"
42 #include "llvm/IR/CallingConv.h"
43 #include "llvm/IR/DebugLoc.h"
44 #include "llvm/IR/Function.h"
45 #include "llvm/MC/MCContext.h"
46 #include "llvm/MC/MCDwarf.h"
47 #include "llvm/MC/MCInstrDesc.h"
48 #include "llvm/MC/MCRegisterInfo.h"
49 #include "llvm/Support/CodeGen.h"
51 #include "llvm/Support/Compiler.h"
52 #include "llvm/Support/Debug.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <cstddef>
61 #include <cstdint>
62 #include <iterator>
63 #include <utility>
64 #include <vector>
65 
66 #define DEBUG_TYPE "arm-frame-lowering"
67 
68 using namespace llvm;
69 
70 static cl::opt<bool>
71 SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
72  cl::desc("Align ARM NEON spills in prolog and epilog"));
73 
76  unsigned NumAlignedDPRCS2Regs);
77 
79  : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
80  STI(sti) {}
81 
83  // iOS always has a FP for backtracking, force other targets to keep their FP
84  // when doing FastISel. The emitted code is currently superior, and in cases
85  // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
87  MF.getSubtarget<ARMSubtarget>().useFastISel();
88 }
89 
90 /// Returns true if the target can safely skip saving callee-saved registers
91 /// for noreturn nounwind functions.
93  assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
94  MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
95  !MF.getFunction().hasFnAttribute(Attribute::UWTable));
96 
97  // Frame pointer and link register are not treated as normal CSR, thus we
98  // can always skip CSR saves for nonreturning functions.
99  return true;
100 }
101 
102 /// hasFP - Return true if the specified function should have a dedicated frame
103 /// pointer register. This is true if the function has variable sized allocas
104 /// or if frame pointer elimination is disabled.
106  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
107  const MachineFrameInfo &MFI = MF.getFrameInfo();
108 
109  // ABI-required frame pointer.
111  return true;
112 
113  // Frame pointer required for use within this function.
114  return (RegInfo->needsStackRealignment(MF) ||
115  MFI.hasVarSizedObjects() ||
116  MFI.isFrameAddressTaken());
117 }
118 
119 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
120 /// not required, we reserve argument space for call sites in the function
121 /// immediately on entry to the current function. This eliminates the need for
122 /// add/sub sp brackets around call sites. Returns true if the call frame is
123 /// included as part of the stack frame.
125  const MachineFrameInfo &MFI = MF.getFrameInfo();
126  unsigned CFSize = MFI.getMaxCallFrameSize();
127  // It's not always a good idea to include the call frame as part of the
128  // stack frame. ARM (especially Thumb) has small immediate offset to
129  // address the stack frame. So a large call frame can cause poor codegen
130  // and may even makes it impossible to scavenge a register.
131  if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
132  return false;
133 
134  return !MFI.hasVarSizedObjects();
135 }
136 
137 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the
138 /// call frame pseudos can be simplified. Unlike most targets, having a FP
139 /// is not sufficient here since we still may reference some objects via SP
140 /// even when FP is available in Thumb2 mode.
141 bool
144 }
145 
147  const MCPhysReg *CSRegs) {
148  // Integer spill area is handled with "pop".
149  if (isPopOpcode(MI.getOpcode())) {
150  // The first two operands are predicates. The last two are
151  // imp-def and imp-use of SP. Check everything in between.
152  for (int i = 5, e = MI.getNumOperands(); i != e; ++i)
153  if (!isCalleeSavedRegister(MI.getOperand(i).getReg(), CSRegs))
154  return false;
155  return true;
156  }
157  if ((MI.getOpcode() == ARM::LDR_POST_IMM ||
158  MI.getOpcode() == ARM::LDR_POST_REG ||
159  MI.getOpcode() == ARM::t2LDR_POST) &&
160  isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs) &&
161  MI.getOperand(1).getReg() == ARM::SP)
162  return true;
163 
164  return false;
165 }
166 
168  bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
169  const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
170  unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
171  ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
172  if (isARM)
173  emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
174  Pred, PredReg, TII, MIFlags);
175  else
176  emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
177  Pred, PredReg, TII, MIFlags);
178 }
179 
180 static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
181  MachineBasicBlock::iterator &MBBI, const DebugLoc &dl,
182  const ARMBaseInstrInfo &TII, int NumBytes,
183  unsigned MIFlags = MachineInstr::NoFlags,
185  unsigned PredReg = 0) {
186  emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
187  MIFlags, Pred, PredReg);
188 }
189 
190 static int sizeOfSPAdjustment(const MachineInstr &MI) {
191  int RegSize;
192  switch (MI.getOpcode()) {
193  case ARM::VSTMDDB_UPD:
194  RegSize = 8;
195  break;
196  case ARM::STMDB_UPD:
197  case ARM::t2STMDB_UPD:
198  RegSize = 4;
199  break;
200  case ARM::t2STR_PRE:
201  case ARM::STR_PRE_IMM:
202  return 4;
203  default:
204  llvm_unreachable("Unknown push or pop like instruction");
205  }
206 
207  int count = 0;
208  // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
209  // pred) so the list starts at 4.
210  for (int i = MI.getNumOperands() - 1; i >= 4; --i)
211  count += RegSize;
212  return count;
213 }
214 
216  size_t StackSizeInBytes) {
217  const MachineFrameInfo &MFI = MF.getFrameInfo();
218  const Function &F = MF.getFunction();
219  unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
220  if (F.hasFnAttribute("stack-probe-size"))
221  F.getFnAttribute("stack-probe-size")
223  .getAsInteger(0, StackProbeSize);
224  return (StackSizeInBytes >= StackProbeSize) &&
225  !F.hasFnAttribute("no-stack-arg-probe");
226 }
227 
228 namespace {
229 
230 struct StackAdjustingInsts {
231  struct InstInfo {
233  unsigned SPAdjust;
234  bool BeforeFPSet;
235  };
236 
238 
239  void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
240  bool BeforeFPSet = false) {
241  InstInfo Info = {I, SPAdjust, BeforeFPSet};
242  Insts.push_back(Info);
243  }
244 
245  void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
246  auto Info =
247  llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
248  assert(Info != Insts.end() && "invalid sp adjusting instruction");
249  Info->SPAdjust += ExtraBytes;
250  }
251 
252  void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl,
253  const ARMBaseInstrInfo &TII, bool HasFP) {
254  MachineFunction &MF = *MBB.getParent();
255  unsigned CFAOffset = 0;
256  for (auto &Info : Insts) {
257  if (HasFP && !Info.BeforeFPSet)
258  return;
259 
260  CFAOffset -= Info.SPAdjust;
261  unsigned CFIIndex = MF.addFrameInst(
262  MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
263  BuildMI(MBB, std::next(Info.I), dl,
264  TII.get(TargetOpcode::CFI_INSTRUCTION))
265  .addCFIIndex(CFIIndex)
267  }
268  }
269 };
270 
271 } // end anonymous namespace
272 
273 /// Emit an instruction sequence that will align the address in
274 /// register Reg by zero-ing out the lower bits. For versions of the
275 /// architecture that support Neon, this must be done in a single
276 /// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
277 /// single instruction. That function only gets called when optimizing
278 /// spilling of D registers on a core with the Neon instruction set
279 /// present.
281  const TargetInstrInfo &TII,
282  MachineBasicBlock &MBB,
284  const DebugLoc &DL, const unsigned Reg,
285  const unsigned Alignment,
286  const bool MustBeSingleInstruction) {
287  const ARMSubtarget &AST =
288  static_cast<const ARMSubtarget &>(MF.getSubtarget());
289  const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
290  const unsigned AlignMask = Alignment - 1;
291  const unsigned NrBitsToZero = countTrailingZeros(Alignment);
292  assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
293  if (!AFI->isThumbFunction()) {
294  // if the BFC instruction is available, use that to zero the lower
295  // bits:
296  // bfc Reg, #0, log2(Alignment)
297  // otherwise use BIC, if the mask to zero the required number of bits
298  // can be encoded in the bic immediate field
299  // bic Reg, Reg, Alignment-1
300  // otherwise, emit
301  // lsr Reg, Reg, log2(Alignment)
302  // lsl Reg, Reg, log2(Alignment)
303  if (CanUseBFC) {
304  BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
305  .addReg(Reg, RegState::Kill)
306  .addImm(~AlignMask)
307  .add(predOps(ARMCC::AL));
308  } else if (AlignMask <= 255) {
309  BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
310  .addReg(Reg, RegState::Kill)
311  .addImm(AlignMask)
313  .add(condCodeOp());
314  } else {
315  assert(!MustBeSingleInstruction &&
316  "Shouldn't call emitAligningInstructions demanding a single "
317  "instruction to be emitted for large stack alignment for a target "
318  "without BFC.");
319  BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
320  .addReg(Reg, RegState::Kill)
321  .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
323  .add(condCodeOp());
324  BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
325  .addReg(Reg, RegState::Kill)
326  .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
328  .add(condCodeOp());
329  }
330  } else {
331  // Since this is only reached for Thumb-2 targets, the BFC instruction
332  // should always be available.
333  assert(CanUseBFC);
334  BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
335  .addReg(Reg, RegState::Kill)
336  .addImm(~AlignMask)
337  .add(predOps(ARMCC::AL));
338  }
339 }
340 
341 /// We need the offset of the frame pointer relative to other MachineFrameInfo
342 /// offsets which are encoded relative to SP at function begin.
343 /// See also emitPrologue() for how the FP is set up.
344 /// Unfortunately we cannot determine this value in determineCalleeSaves() yet
345 /// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
346 /// this to produce a conservative estimate that we check in an assert() later.
347 static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI) {
348  // This is a conservative estimation: Assume the frame pointer being r7 and
349  // pc("r15") up to r8 getting spilled before (= 8 registers).
350  return -AFI.getArgRegsSaveSize() - (8 * 4);
351 }
352 
354  MachineBasicBlock &MBB) const {
355  MachineBasicBlock::iterator MBBI = MBB.begin();
356  MachineFrameInfo &MFI = MF.getFrameInfo();
358  MachineModuleInfo &MMI = MF.getMMI();
359  MCContext &Context = MMI.getContext();
360  const TargetMachine &TM = MF.getTarget();
361  const MCRegisterInfo *MRI = Context.getRegisterInfo();
362  const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
363  const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
364  assert(!AFI->isThumb1OnlyFunction() &&
365  "This emitPrologue does not support Thumb1!");
366  bool isARM = !AFI->isThumbFunction();
368  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
369  unsigned NumBytes = MFI.getStackSize();
370  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
371 
372  // Debug location must be unknown since the first debug location is used
373  // to determine the end of the prologue.
374  DebugLoc dl;
375 
376  unsigned FramePtr = RegInfo->getFrameRegister(MF);
377 
378  // Determine the sizes of each callee-save spill areas and record which frame
379  // belongs to which callee-save spill areas.
380  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
381  int FramePtrSpillFI = 0;
382  int D8SpillFI = 0;
383 
384  // All calls are tail calls in GHC calling conv, and functions have no
385  // prologue/epilogue.
387  return;
388 
389  StackAdjustingInsts DefCFAOffsetCandidates;
390  bool HasFP = hasFP(MF);
391 
392  // Allocate the vararg register save area.
393  if (ArgRegsSaveSize) {
394  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
396  DefCFAOffsetCandidates.addInst(std::prev(MBBI), ArgRegsSaveSize, true);
397  }
398 
399  if (!AFI->hasStackFrame() &&
400  (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
401  if (NumBytes - ArgRegsSaveSize != 0) {
402  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize),
404  DefCFAOffsetCandidates.addInst(std::prev(MBBI),
405  NumBytes - ArgRegsSaveSize, true);
406  }
407  DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
408  return;
409  }
410 
411  // Determine spill area sizes.
412  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
413  unsigned Reg = CSI[i].getReg();
414  int FI = CSI[i].getFrameIdx();
415  switch (Reg) {
416  case ARM::R8:
417  case ARM::R9:
418  case ARM::R10:
419  case ARM::R11:
420  case ARM::R12:
421  if (STI.splitFramePushPop(MF)) {
422  GPRCS2Size += 4;
423  break;
424  }
426  case ARM::R0:
427  case ARM::R1:
428  case ARM::R2:
429  case ARM::R3:
430  case ARM::R4:
431  case ARM::R5:
432  case ARM::R6:
433  case ARM::R7:
434  case ARM::LR:
435  if (Reg == FramePtr)
436  FramePtrSpillFI = FI;
437  GPRCS1Size += 4;
438  break;
439  default:
440  // This is a DPR. Exclude the aligned DPRCS2 spills.
441  if (Reg == ARM::D8)
442  D8SpillFI = FI;
443  if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
444  DPRCSSize += 8;
445  }
446  }
447 
448  // Move past area 1.
449  MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
450  if (GPRCS1Size > 0) {
451  GPRCS1Push = LastPush = MBBI++;
452  DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
453  }
454 
455  // Determine starting offsets of spill areas.
456  unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size;
457  unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
458  unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U;
459  unsigned DPRGapSize = (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign;
460  unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
461  int FramePtrOffsetInPush = 0;
462  if (HasFP) {
463  int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
464  assert(getMaxFPOffset(MF.getFunction(), *AFI) <= FPOffset &&
465  "Max FP estimation is wrong");
466  FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize;
467  AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
468  NumBytes);
469  }
470  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
471  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
472  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
473 
474  // Move past area 2.
475  if (GPRCS2Size > 0) {
476  GPRCS2Push = LastPush = MBBI++;
477  DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
478  }
479 
480  // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
481  // .cfi_offset operations will reflect that.
482  if (DPRGapSize) {
483  assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
484  if (LastPush != MBB.end() &&
485  tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
486  DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
487  else {
488  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
490  DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
491  }
492  }
493 
494  // Move past area 3.
495  if (DPRCSSize > 0) {
496  // Since vpush register list cannot have gaps, there may be multiple vpush
497  // instructions in the prologue.
498  while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
499  DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));
500  LastPush = MBBI++;
501  }
502  }
503 
504  // Move past the aligned DPRCS2 area.
505  if (AFI->getNumAlignedDPRCS2Regs() > 0) {
507  // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
508  // leaves the stack pointer pointing to the DPRCS2 area.
509  //
510  // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
511  NumBytes += MFI.getObjectOffset(D8SpillFI);
512  } else
513  NumBytes = DPRCSOffset;
514 
515  if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
516  uint32_t NumWords = NumBytes >> 2;
517 
518  if (NumWords < 65536)
519  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
520  .addImm(NumWords)
522  .add(predOps(ARMCC::AL));
523  else
524  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4)
525  .addImm(NumWords)
527 
528  switch (TM.getCodeModel()) {
529  case CodeModel::Small:
530  case CodeModel::Medium:
531  case CodeModel::Kernel:
532  BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
534  .addExternalSymbol("__chkstk")
535  .addReg(ARM::R4, RegState::Implicit)
536  .setMIFlags(MachineInstr::FrameSetup);
537  break;
538  case CodeModel::Large:
539  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
540  .addExternalSymbol("__chkstk")
542 
543  BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
545  .addReg(ARM::R12, RegState::Kill)
546  .addReg(ARM::R4, RegState::Implicit)
547  .setMIFlags(MachineInstr::FrameSetup);
548  break;
549  }
550 
551  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
552  .addReg(ARM::SP, RegState::Kill)
556  .add(condCodeOp());
557  NumBytes = 0;
558  }
559 
560  if (NumBytes) {
561  // Adjust SP after all the callee-save spills.
562  if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
563  tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes))
564  DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
565  else {
566  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
568  DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
569  }
570 
571  if (HasFP && isARM)
572  // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
573  // Note it's not safe to do this in Thumb2 mode because it would have
574  // taken two instructions:
575  // mov sp, r7
576  // sub sp, #24
577  // If an interrupt is taken between the two instructions, then sp is in
578  // an inconsistent state (pointing to the middle of callee-saved area).
579  // The interrupt handler can end up clobbering the registers.
580  AFI->setShouldRestoreSPFromFP(true);
581  }
582 
583  // Set FP to point to the stack slot that contains the previous FP.
584  // For iOS, FP is R7, which has now been stored in spill area 1.
585  // Otherwise, if this is not iOS, all the callee-saved registers go
586  // into spill area 1, including the FP in R11. In either case, it
587  // is in area one and the adjustment needs to take place just after
588  // that push.
589  if (HasFP) {
590  MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push);
591  unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
592  emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush,
593  dl, TII, FramePtr, ARM::SP,
594  PushSize + FramePtrOffsetInPush,
596  if (FramePtrOffsetInPush + PushSize != 0) {
597  unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
598  nullptr, MRI->getDwarfRegNum(FramePtr, true),
599  -(ArgRegsSaveSize - FramePtrOffsetInPush)));
600  BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
601  .addCFIIndex(CFIIndex)
603  } else {
604  unsigned CFIIndex =
606  nullptr, MRI->getDwarfRegNum(FramePtr, true)));
607  BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
608  .addCFIIndex(CFIIndex)
610  }
611  }
612 
613  // Now that the prologue's actual instructions are finalised, we can insert
614  // the necessary DWARF cf instructions to describe the situation. Start by
615  // recording where each register ended up:
616  if (GPRCS1Size > 0) {
617  MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
618  int CFIIndex;
619  for (const auto &Entry : CSI) {
620  unsigned Reg = Entry.getReg();
621  int FI = Entry.getFrameIdx();
622  switch (Reg) {
623  case ARM::R8:
624  case ARM::R9:
625  case ARM::R10:
626  case ARM::R11:
627  case ARM::R12:
628  if (STI.splitFramePushPop(MF))
629  break;
631  case ARM::R0:
632  case ARM::R1:
633  case ARM::R2:
634  case ARM::R3:
635  case ARM::R4:
636  case ARM::R5:
637  case ARM::R6:
638  case ARM::R7:
639  case ARM::LR:
641  nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
642  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
643  .addCFIIndex(CFIIndex)
645  break;
646  }
647  }
648  }
649 
650  if (GPRCS2Size > 0) {
651  MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
652  for (const auto &Entry : CSI) {
653  unsigned Reg = Entry.getReg();
654  int FI = Entry.getFrameIdx();
655  switch (Reg) {
656  case ARM::R8:
657  case ARM::R9:
658  case ARM::R10:
659  case ARM::R11:
660  case ARM::R12:
661  if (STI.splitFramePushPop(MF)) {
662  unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
663  unsigned Offset = MFI.getObjectOffset(FI);
664  unsigned CFIIndex = MF.addFrameInst(
665  MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
666  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
667  .addCFIIndex(CFIIndex)
669  }
670  break;
671  }
672  }
673  }
674 
675  if (DPRCSSize > 0) {
676  // Since vpush register list cannot have gaps, there may be multiple vpush
677  // instructions in the prologue.
678  MachineBasicBlock::iterator Pos = std::next(LastPush);
679  for (const auto &Entry : CSI) {
680  unsigned Reg = Entry.getReg();
681  int FI = Entry.getFrameIdx();
682  if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
683  (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
684  unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
685  unsigned Offset = MFI.getObjectOffset(FI);
686  unsigned CFIIndex = MF.addFrameInst(
687  MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
688  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
689  .addCFIIndex(CFIIndex)
691  }
692  }
693  }
694 
695  // Now we can emit descriptions of where the canonical frame address was
696  // throughout the process. If we have a frame pointer, it takes over the job
697  // half-way through, so only the first few .cfi_def_cfa_offset instructions
698  // actually get emitted.
699  DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
700 
701  if (STI.isTargetELF() && hasFP(MF))
703  AFI->getFramePtrSpillOffset());
704 
705  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
706  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
707  AFI->setDPRCalleeSavedGapSize(DPRGapSize);
708  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
709 
710  // If we need dynamic stack realignment, do it here. Be paranoid and make
711  // sure if we also have VLAs, we have a base pointer for frame access.
712  // If aligned NEON registers were spilled, the stack has already been
713  // realigned.
714  if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) {
715  unsigned MaxAlign = MFI.getMaxAlignment();
716  assert(!AFI->isThumb1OnlyFunction());
717  if (!AFI->isThumbFunction()) {
718  emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
719  false);
720  } else {
721  // We cannot use sp as source/dest register here, thus we're using r4 to
722  // perform the calculations. We're emitting the following sequence:
723  // mov r4, sp
724  // -- use emitAligningInstructions to produce best sequence to zero
725  // -- out lower bits in r4
726  // mov sp, r4
727  // FIXME: It will be better just to find spare register here.
728  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
729  .addReg(ARM::SP, RegState::Kill)
730  .add(predOps(ARMCC::AL));
731  emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
732  false);
733  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
734  .addReg(ARM::R4, RegState::Kill)
735  .add(predOps(ARMCC::AL));
736  }
737 
738  AFI->setShouldRestoreSPFromFP(true);
739  }
740 
741  // If we need a base pointer, set it up here. It's whatever the value
742  // of the stack pointer is at this point. Any variable size objects
743  // will be allocated after this, so we can still use the base pointer
744  // to reference locals.
745  // FIXME: Clarify FrameSetup flags here.
746  if (RegInfo->hasBasePointer(MF)) {
747  if (isARM)
748  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
749  .addReg(ARM::SP)
751  .add(condCodeOp());
752  else
753  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
754  .addReg(ARM::SP)
755  .add(predOps(ARMCC::AL));
756  }
757 
758  // If the frame has variable sized objects then the epilogue must restore
759  // the sp from fp. We can assume there's an FP here since hasFP already
760  // checks for hasVarSizedObjects.
761  if (MFI.hasVarSizedObjects())
762  AFI->setShouldRestoreSPFromFP(true);
763 }
764 
766  MachineBasicBlock &MBB) const {
767  MachineFrameInfo &MFI = MF.getFrameInfo();
769  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
770  const ARMBaseInstrInfo &TII =
771  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
772  assert(!AFI->isThumb1OnlyFunction() &&
773  "This emitEpilogue does not support Thumb1!");
774  bool isARM = !AFI->isThumbFunction();
775 
776  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
777  int NumBytes = (int)MFI.getStackSize();
778  unsigned FramePtr = RegInfo->getFrameRegister(MF);
779 
780  // All calls are tail calls in GHC calling conv, and functions have no
781  // prologue/epilogue.
783  return;
784 
785  // First put ourselves on the first (from top) terminator instructions.
787  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
788 
789  if (!AFI->hasStackFrame()) {
790  if (NumBytes - ArgRegsSaveSize != 0)
791  emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize);
792  } else {
793  // Unwind MBBI to point to first LDR / VLDRD.
794  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
795  if (MBBI != MBB.begin()) {
796  do {
797  --MBBI;
798  } while (MBBI != MBB.begin() && isCSRestore(*MBBI, TII, CSRegs));
799  if (!isCSRestore(*MBBI, TII, CSRegs))
800  ++MBBI;
801  }
802 
803  // Move SP to start of FP callee save spill area.
804  NumBytes -= (ArgRegsSaveSize +
807  AFI->getDPRCalleeSavedGapSize() +
809 
810  // Reset SP based on frame pointer only if the stack frame extends beyond
811  // frame pointer stack slot or target is ELF and the function has FP.
812  if (AFI->shouldRestoreSPFromFP()) {
813  NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
814  if (NumBytes) {
815  if (isARM)
816  emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
817  ARMCC::AL, 0, TII);
818  else {
819  // It's not possible to restore SP from FP in a single instruction.
820  // For iOS, this looks like:
821  // mov sp, r7
822  // sub sp, #24
823  // This is bad, if an interrupt is taken after the mov, sp is in an
824  // inconsistent state.
825  // Use the first callee-saved register as a scratch register.
826  assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
827  "No scratch register to restore SP from FP!");
828  emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
829  ARMCC::AL, 0, TII);
830  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
831  .addReg(ARM::R4)
832  .add(predOps(ARMCC::AL));
833  }
834  } else {
835  // Thumb2 or ARM.
836  if (isARM)
837  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
838  .addReg(FramePtr)
840  .add(condCodeOp());
841  else
842  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
843  .addReg(FramePtr)
844  .add(predOps(ARMCC::AL));
845  }
846  } else if (NumBytes &&
847  !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
848  emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
849 
850  // Increment past our save areas.
851  if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
852  MBBI++;
853  // Since vpop register list cannot have gaps, there may be multiple vpop
854  // instructions in the epilogue.
855  while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
856  MBBI++;
857  }
858  if (AFI->getDPRCalleeSavedGapSize()) {
859  assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
860  "unexpected DPR alignment gap");
861  emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize());
862  }
863 
864  if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
865  if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
866  }
867 
868  if (ArgRegsSaveSize)
869  emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
870 }
871 
872 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
873 /// debug info. It's the same as what we use for resolving the code-gen
874 /// references for now. FIXME: This can go wrong when references are
875 /// SP-relative and simple call frames aren't used.
876 int
878  unsigned &FrameReg) const {
879  return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
880 }
881 
882 int
884  int FI, unsigned &FrameReg,
885  int SPAdj) const {
886  const MachineFrameInfo &MFI = MF.getFrameInfo();
887  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
889  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
890  int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
891  int FPOffset = Offset - AFI->getFramePtrSpillOffset();
892  bool isFixed = MFI.isFixedObjectIndex(FI);
893 
894  FrameReg = ARM::SP;
895  Offset += SPAdj;
896 
897  // SP can move around if there are allocas. We may also lose track of SP
898  // when emergency spilling inside a non-reserved call frame setup.
899  bool hasMovingSP = !hasReservedCallFrame(MF);
900 
901  // When dynamically realigning the stack, use the frame pointer for
902  // parameters, and the stack/base pointer for locals.
903  if (RegInfo->needsStackRealignment(MF)) {
904  assert(hasFP(MF) && "dynamic stack realignment without a FP!");
905  if (isFixed) {
906  FrameReg = RegInfo->getFrameRegister(MF);
907  Offset = FPOffset;
908  } else if (hasMovingSP) {
909  assert(RegInfo->hasBasePointer(MF) &&
910  "VLAs and dynamic stack alignment, but missing base pointer!");
911  FrameReg = RegInfo->getBaseRegister();
912  }
913  return Offset;
914  }
915 
916  // If there is a frame pointer, use it when we can.
917  if (hasFP(MF) && AFI->hasStackFrame()) {
918  // Use frame pointer to reference fixed objects. Use it for locals if
919  // there are VLAs (and thus the SP isn't reliable as a base).
920  if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
921  FrameReg = RegInfo->getFrameRegister(MF);
922  return FPOffset;
923  } else if (hasMovingSP) {
924  assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
925  if (AFI->isThumb2Function()) {
926  // Try to use the frame pointer if we can, else use the base pointer
927  // since it's available. This is handy for the emergency spill slot, in
928  // particular.
929  if (FPOffset >= -255 && FPOffset < 0) {
930  FrameReg = RegInfo->getFrameRegister(MF);
931  return FPOffset;
932  }
933  }
934  } else if (AFI->isThumbFunction()) {
935  // Prefer SP to base pointer, if the offset is suitably aligned and in
936  // range as the effective range of the immediate offset is bigger when
937  // basing off SP.
938  // Use add <rd>, sp, #<imm8>
939  // ldr <rd>, [sp, #<imm8>]
940  if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
941  return Offset;
942  // In Thumb2 mode, the negative offset is very limited. Try to avoid
943  // out of range references. ldr <rt>,[<rn>, #-<imm8>]
944  if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
945  FrameReg = RegInfo->getFrameRegister(MF);
946  return FPOffset;
947  }
948  } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
949  // Otherwise, use SP or FP, whichever is closer to the stack slot.
950  FrameReg = RegInfo->getFrameRegister(MF);
951  return FPOffset;
952  }
953  }
954  // Use the base pointer if we have one.
955  if (RegInfo->hasBasePointer(MF))
956  FrameReg = RegInfo->getBaseRegister();
957  return Offset;
958 }
959 
960 void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
962  const std::vector<CalleeSavedInfo> &CSI,
963  unsigned StmOpc, unsigned StrOpc,
964  bool NoGap,
965  bool(*Func)(unsigned, bool),
966  unsigned NumAlignedDPRCS2Regs,
967  unsigned MIFlags) const {
968  MachineFunction &MF = *MBB.getParent();
969  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
971 
972  DebugLoc DL;
973 
974  using RegAndKill = std::pair<unsigned, bool>;
975 
977  unsigned i = CSI.size();
978  while (i != 0) {
979  unsigned LastReg = 0;
980  for (; i != 0; --i) {
981  unsigned Reg = CSI[i-1].getReg();
982  if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
983 
984  // D-registers in the aligned area DPRCS2 are NOT spilled here.
985  if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
986  continue;
987 
988  const MachineRegisterInfo &MRI = MF.getRegInfo();
989  bool isLiveIn = MRI.isLiveIn(Reg);
990  if (!isLiveIn && !MRI.isReserved(Reg))
991  MBB.addLiveIn(Reg);
992  // If NoGap is true, push consecutive registers and then leave the rest
993  // for other instructions. e.g.
994  // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
995  if (NoGap && LastReg && LastReg != Reg-1)
996  break;
997  LastReg = Reg;
998  // Do not set a kill flag on values that are also marked as live-in. This
999  // happens with the @llvm-returnaddress intrinsic and with arguments
1000  // passed in callee saved registers.
1001  // Omitting the kill flags is conservatively correct even if the live-in
1002  // is not used after all.
1003  Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn));
1004  }
1005 
1006  if (Regs.empty())
1007  continue;
1008 
1009  llvm::sort(Regs.begin(), Regs.end(), [&](const RegAndKill &LHS,
1010  const RegAndKill &RHS) {
1011  return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
1012  });
1013 
1014  if (Regs.size() > 1 || StrOpc== 0) {
1015  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
1016  .addReg(ARM::SP)
1017  .setMIFlags(MIFlags)
1018  .add(predOps(ARMCC::AL));
1019  for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1020  MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
1021  } else if (Regs.size() == 1) {
1022  BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
1023  .addReg(Regs[0].first, getKillRegState(Regs[0].second))
1024  .addReg(ARM::SP)
1025  .setMIFlags(MIFlags)
1026  .addImm(-4)
1027  .add(predOps(ARMCC::AL));
1028  }
1029  Regs.clear();
1030 
1031  // Put any subsequent vpush instructions before this one: they will refer to
1032  // higher register numbers so need to be pushed first in order to preserve
1033  // monotonicity.
1034  if (MI != MBB.begin())
1035  --MI;
1036  }
1037 }
1038 
1039 void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1041  std::vector<CalleeSavedInfo> &CSI,
1042  unsigned LdmOpc, unsigned LdrOpc,
1043  bool isVarArg, bool NoGap,
1044  bool(*Func)(unsigned, bool),
1045  unsigned NumAlignedDPRCS2Regs) const {
1046  MachineFunction &MF = *MBB.getParent();
1047  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1050  DebugLoc DL;
1051  bool isTailCall = false;
1052  bool isInterrupt = false;
1053  bool isTrap = false;
1054  if (MBB.end() != MI) {
1055  DL = MI->getDebugLoc();
1056  unsigned RetOpcode = MI->getOpcode();
1057  isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri);
1058  isInterrupt =
1059  RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1060  isTrap =
1061  RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
1062  RetOpcode == ARM::tTRAP;
1063  }
1064 
1066  unsigned i = CSI.size();
1067  while (i != 0) {
1068  unsigned LastReg = 0;
1069  bool DeleteRet = false;
1070  for (; i != 0; --i) {
1071  CalleeSavedInfo &Info = CSI[i-1];
1072  unsigned Reg = Info.getReg();
1073  if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1074 
1075  // The aligned reloads from area DPRCS2 are not inserted here.
1076  if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1077  continue;
1078 
1079  if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1080  !isTrap && STI.hasV5TOps()) {
1081  if (MBB.succ_empty()) {
1082  Reg = ARM::PC;
1083  // Fold the return instruction into the LDM.
1084  DeleteRet = true;
1085  LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1086  // We 'restore' LR into PC so it is not live out of the return block:
1087  // Clear Restored bit.
1088  Info.setRestored(false);
1089  } else
1090  LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
1091  }
1092 
1093  // If NoGap is true, pop consecutive registers and then leave the rest
1094  // for other instructions. e.g.
1095  // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1096  if (NoGap && LastReg && LastReg != Reg-1)
1097  break;
1098 
1099  LastReg = Reg;
1100  Regs.push_back(Reg);
1101  }
1102 
1103  if (Regs.empty())
1104  continue;
1105 
1106  llvm::sort(Regs.begin(), Regs.end(), [&](unsigned LHS, unsigned RHS) {
1107  return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
1108  });
1109 
1110  if (Regs.size() > 1 || LdrOpc == 0) {
1111  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
1112  .addReg(ARM::SP)
1113  .add(predOps(ARMCC::AL));
1114  for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1115  MIB.addReg(Regs[i], getDefRegState(true));
1116  if (DeleteRet) {
1117  if (MI != MBB.end()) {
1118  MIB.copyImplicitOps(*MI);
1119  MI->eraseFromParent();
1120  }
1121  }
1122  MI = MIB;
1123  } else if (Regs.size() == 1) {
1124  // If we adjusted the reg to PC from LR above, switch it back here. We
1125  // only do that for LDM.
1126  if (Regs[0] == ARM::PC)
1127  Regs[0] = ARM::LR;
1128  MachineInstrBuilder MIB =
1129  BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
1130  .addReg(ARM::SP, RegState::Define)
1131  .addReg(ARM::SP);
1132  // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1133  // that refactoring is complete (eventually).
1134  if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1135  MIB.addReg(0);
1137  } else
1138  MIB.addImm(4);
1139  MIB.add(predOps(ARMCC::AL));
1140  }
1141  Regs.clear();
1142 
1143  // Put any subsequent vpop instructions after this one: they will refer to
1144  // higher register numbers so need to be popped afterwards.
1145  if (MI != MBB.end())
1146  ++MI;
1147  }
1148 }
1149 
1150 /// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1151 /// starting from d8. Also insert stack realignment code and leave the stack
1152 /// pointer pointing to the d8 spill slot.
1155  unsigned NumAlignedDPRCS2Regs,
1156  const std::vector<CalleeSavedInfo> &CSI,
1157  const TargetRegisterInfo *TRI) {
1158  MachineFunction &MF = *MBB.getParent();
1160  DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1161  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1162  MachineFrameInfo &MFI = MF.getFrameInfo();
1163 
1164  // Mark the D-register spill slots as properly aligned. Since MFI computes
1165  // stack slot layout backwards, this can actually mean that the d-reg stack
1166  // slot offsets can be wrong. The offset for d8 will always be correct.
1167  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1168  unsigned DNum = CSI[i].getReg() - ARM::D8;
1169  if (DNum > NumAlignedDPRCS2Regs - 1)
1170  continue;
1171  int FI = CSI[i].getFrameIdx();
1172  // The even-numbered registers will be 16-byte aligned, the odd-numbered
1173  // registers will be 8-byte aligned.
1174  MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16);
1175 
1176  // The stack slot for D8 needs to be maximally aligned because this is
1177  // actually the point where we align the stack pointer. MachineFrameInfo
1178  // computes all offsets relative to the incoming stack pointer which is a
1179  // bit weird when realigning the stack. Any extra padding for this
1180  // over-alignment is not realized because the code inserted below adjusts
1181  // the stack pointer by numregs * 8 before aligning the stack pointer.
1182  if (DNum == 0)
1183  MFI.setObjectAlignment(FI, MFI.getMaxAlignment());
1184  }
1185 
1186  // Move the stack pointer to the d8 spill slot, and align it at the same
1187  // time. Leave the stack slot address in the scratch register r4.
1188  //
1189  // sub r4, sp, #numregs * 8
1190  // bic r4, r4, #align - 1
1191  // mov sp, r4
1192  //
1193  bool isThumb = AFI->isThumbFunction();
1194  assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1195  AFI->setShouldRestoreSPFromFP(true);
1196 
1197  // sub r4, sp, #numregs * 8
1198  // The immediate is <= 64, so it doesn't need any special encoding.
1199  unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1200  BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1201  .addReg(ARM::SP)
1202  .addImm(8 * NumAlignedDPRCS2Regs)
1203  .add(predOps(ARMCC::AL))
1204  .add(condCodeOp());
1205 
1206  unsigned MaxAlign = MF.getFrameInfo().getMaxAlignment();
1207  // We must set parameter MustBeSingleInstruction to true, since
1208  // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1209  // stack alignment. Luckily, this can always be done since all ARM
1210  // architecture versions that support Neon also support the BFC
1211  // instruction.
1212  emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
1213 
1214  // mov sp, r4
1215  // The stack pointer must be adjusted before spilling anything, otherwise
1216  // the stack slots could be clobbered by an interrupt handler.
1217  // Leave r4 live, it is used below.
1218  Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
1219  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
1220  .addReg(ARM::R4)
1221  .add(predOps(ARMCC::AL));
1222  if (!isThumb)
1223  MIB.add(condCodeOp());
1224 
1225  // Now spill NumAlignedDPRCS2Regs registers starting from d8.
1226  // r4 holds the stack slot address.
1227  unsigned NextReg = ARM::D8;
1228 
1229  // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
1230  // The writeback is only needed when emitting two vst1.64 instructions.
1231  if (NumAlignedDPRCS2Regs >= 6) {
1232  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1233  &ARM::QQPRRegClass);
1234  MBB.addLiveIn(SupReg);
1235  BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
1236  .addReg(ARM::R4, RegState::Kill)
1237  .addImm(16)
1238  .addReg(NextReg)
1239  .addReg(SupReg, RegState::ImplicitKill)
1240  .add(predOps(ARMCC::AL));
1241  NextReg += 4;
1242  NumAlignedDPRCS2Regs -= 4;
1243  }
1244 
1245  // We won't modify r4 beyond this point. It currently points to the next
1246  // register to be spilled.
1247  unsigned R4BaseReg = NextReg;
1248 
1249  // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
1250  if (NumAlignedDPRCS2Regs >= 4) {
1251  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1252  &ARM::QQPRRegClass);
1253  MBB.addLiveIn(SupReg);
1254  BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
1255  .addReg(ARM::R4)
1256  .addImm(16)
1257  .addReg(NextReg)
1258  .addReg(SupReg, RegState::ImplicitKill)
1259  .add(predOps(ARMCC::AL));
1260  NextReg += 4;
1261  NumAlignedDPRCS2Regs -= 4;
1262  }
1263 
1264  // 16-byte aligned vst1.64 with 2 d-regs.
1265  if (NumAlignedDPRCS2Regs >= 2) {
1266  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1267  &ARM::QPRRegClass);
1268  MBB.addLiveIn(SupReg);
1269  BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
1270  .addReg(ARM::R4)
1271  .addImm(16)
1272  .addReg(SupReg)
1273  .add(predOps(ARMCC::AL));
1274  NextReg += 2;
1275  NumAlignedDPRCS2Regs -= 2;
1276  }
1277 
1278  // Finally, use a vanilla vstr.64 for the odd last register.
1279  if (NumAlignedDPRCS2Regs) {
1280  MBB.addLiveIn(NextReg);
1281  // vstr.64 uses addrmode5 which has an offset scale of 4.
1282  BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
1283  .addReg(NextReg)
1284  .addReg(ARM::R4)
1285  .addImm((NextReg - R4BaseReg) * 2)
1286  .add(predOps(ARMCC::AL));
1287  }
1288 
1289  // The last spill instruction inserted should kill the scratch register r4.
1290  std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1291 }
1292 
1293 /// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
1294 /// iterator to the following instruction.
1297  unsigned NumAlignedDPRCS2Regs) {
1298  // sub r4, sp, #numregs * 8
1299  // bic r4, r4, #align - 1
1300  // mov sp, r4
1301  ++MI; ++MI; ++MI;
1302  assert(MI->mayStore() && "Expecting spill instruction");
1303 
1304  // These switches all fall through.
1305  switch(NumAlignedDPRCS2Regs) {
1306  case 7:
1307  ++MI;
1308  assert(MI->mayStore() && "Expecting spill instruction");
1310  default:
1311  ++MI;
1312  assert(MI->mayStore() && "Expecting spill instruction");
1314  case 1:
1315  case 2:
1316  case 4:
1317  assert(MI->killsRegister(ARM::R4) && "Missed kill flag");
1318  ++MI;
1319  }
1320  return MI;
1321 }
1322 
1323 /// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
1324 /// starting from d8. These instructions are assumed to execute while the
1325 /// stack is still aligned, unlike the code inserted by emitPopInst.
1328  unsigned NumAlignedDPRCS2Regs,
1329  const std::vector<CalleeSavedInfo> &CSI,
1330  const TargetRegisterInfo *TRI) {
1331  MachineFunction &MF = *MBB.getParent();
1333  DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1334  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1335 
1336  // Find the frame index assigned to d8.
1337  int D8SpillFI = 0;
1338  for (unsigned i = 0, e = CSI.size(); i != e; ++i)
1339  if (CSI[i].getReg() == ARM::D8) {
1340  D8SpillFI = CSI[i].getFrameIdx();
1341  break;
1342  }
1343 
1344  // Materialize the address of the d8 spill slot into the scratch register r4.
1345  // This can be fairly complicated if the stack frame is large, so just use
1346  // the normal frame index elimination mechanism to do it. This code runs as
1347  // the initial part of the epilog where the stack and base pointers haven't
1348  // been changed yet.
1349  bool isThumb = AFI->isThumbFunction();
1350  assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1351 
1352  unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
1353  BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1354  .addFrameIndex(D8SpillFI)
1355  .addImm(0)
1356  .add(predOps(ARMCC::AL))
1357  .add(condCodeOp());
1358 
1359  // Now restore NumAlignedDPRCS2Regs registers starting from d8.
1360  unsigned NextReg = ARM::D8;
1361 
1362  // 16-byte aligned vld1.64 with 4 d-regs and writeback.
1363  if (NumAlignedDPRCS2Regs >= 6) {
1364  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1365  &ARM::QQPRRegClass);
1366  BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
1367  .addReg(ARM::R4, RegState::Define)
1369  .addImm(16)
1371  .add(predOps(ARMCC::AL));
1372  NextReg += 4;
1373  NumAlignedDPRCS2Regs -= 4;
1374  }
1375 
1376  // We won't modify r4 beyond this point. It currently points to the next
1377  // register to be spilled.
1378  unsigned R4BaseReg = NextReg;
1379 
1380  // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
1381  if (NumAlignedDPRCS2Regs >= 4) {
1382  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1383  &ARM::QQPRRegClass);
1384  BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
1385  .addReg(ARM::R4)
1386  .addImm(16)
1388  .add(predOps(ARMCC::AL));
1389  NextReg += 4;
1390  NumAlignedDPRCS2Regs -= 4;
1391  }
1392 
1393  // 16-byte aligned vld1.64 with 2 d-regs.
1394  if (NumAlignedDPRCS2Regs >= 2) {
1395  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1396  &ARM::QPRRegClass);
1397  BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
1398  .addReg(ARM::R4)
1399  .addImm(16)
1400  .add(predOps(ARMCC::AL));
1401  NextReg += 2;
1402  NumAlignedDPRCS2Regs -= 2;
1403  }
1404 
1405  // Finally, use a vanilla vldr.64 for the remaining odd register.
1406  if (NumAlignedDPRCS2Regs)
1407  BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
1408  .addReg(ARM::R4)
1409  .addImm(2 * (NextReg - R4BaseReg))
1410  .add(predOps(ARMCC::AL));
1411 
1412  // Last store kills r4.
1413  std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1414 }
1415 
1418  const std::vector<CalleeSavedInfo> &CSI,
1419  const TargetRegisterInfo *TRI) const {
1420  if (CSI.empty())
1421  return false;
1422 
1423  MachineFunction &MF = *MBB.getParent();
1425 
1426  unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
1427  unsigned PushOneOpc = AFI->isThumbFunction() ?
1428  ARM::t2STR_PRE : ARM::STR_PRE_IMM;
1429  unsigned FltOpc = ARM::VSTMDDB_UPD;
1430  unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1431  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,
1433  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,
1435  emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
1436  NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
1437 
1438  // The code above does not insert spill code for the aligned DPRCS2 registers.
1439  // The stack realignment code will be inserted between the push instructions
1440  // and these spills.
1441  if (NumAlignedDPRCS2Regs)
1442  emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
1443 
1444  return true;
1445 }
1446 
1449  std::vector<CalleeSavedInfo> &CSI,
1450  const TargetRegisterInfo *TRI) const {
1451  if (CSI.empty())
1452  return false;
1453 
1454  MachineFunction &MF = *MBB.getParent();
1456  bool isVarArg = AFI->getArgRegsSaveSize() > 0;
1457  unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1458 
1459  // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
1460  // registers. Do that here instead.
1461  if (NumAlignedDPRCS2Regs)
1462  emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
1463 
1464  unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
1465  unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
1466  unsigned FltOpc = ARM::VLDMDIA_UPD;
1467  emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
1468  NumAlignedDPRCS2Regs);
1469  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1470  &isARMArea2Register, 0);
1471  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1472  &isARMArea1Register, 0);
1473 
1474  return true;
1475 }
1476 
1477 // FIXME: Make generic?
1478 static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
1479  const ARMBaseInstrInfo &TII) {
1480  unsigned FnSize = 0;
1481  for (auto &MBB : MF) {
1482  for (auto &MI : MBB)
1483  FnSize += TII.getInstSizeInBytes(MI);
1484  }
1485  return FnSize;
1486 }
1487 
1488 /// estimateRSStackSizeLimit - Look at each instruction that references stack
1489 /// frames and return the stack size limit beyond which some of these
1490 /// instructions will require a scratch register during their expansion later.
1491 // FIXME: Move to TII?
1493  const TargetFrameLowering *TFI) {
1494  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1495  unsigned Limit = (1 << 12) - 1;
1496  for (auto &MBB : MF) {
1497  for (auto &MI : MBB) {
1498  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1499  if (!MI.getOperand(i).isFI())
1500  continue;
1501 
1502  // When using ADDri to get the address of a stack object, 255 is the
1503  // largest offset guaranteed to fit in the immediate offset.
1504  if (MI.getOpcode() == ARM::ADDri) {
1505  Limit = std::min(Limit, (1U << 8) - 1);
1506  break;
1507  }
1508 
1509  // Otherwise check the addressing mode.
1510  switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
1511  case ARMII::AddrMode3:
1512  case ARMII::AddrModeT2_i8:
1513  Limit = std::min(Limit, (1U << 8) - 1);
1514  break;
1515  case ARMII::AddrMode5:
1517  Limit = std::min(Limit, ((1U << 8) - 1) * 4);
1518  break;
1519  case ARMII::AddrModeT2_i12:
1520  // i12 supports only positive offset so these will be converted to
1521  // i8 opcodes. See llvm::rewriteT2FrameIndex.
1522  if (TFI->hasFP(MF) && AFI->hasStackFrame())
1523  Limit = std::min(Limit, (1U << 8) - 1);
1524  break;
1525  case ARMII::AddrMode4:
1526  case ARMII::AddrMode6:
1527  // Addressing modes 4 & 6 (load/store) instructions can't encode an
1528  // immediate offset for stack references.
1529  return 0;
1530  default:
1531  break;
1532  }
1533  break; // At most one FI per instruction
1534  }
1535  }
1536  }
1537 
1538  return Limit;
1539 }
1540 
1541 // In functions that realign the stack, it can be an advantage to spill the
1542 // callee-saved vector registers after realigning the stack. The vst1 and vld1
1543 // instructions take alignment hints that can improve performance.
1544 static void
1546  MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
1547  if (!SpillAlignedNEONRegs)
1548  return;
1549 
1550  // Naked functions don't spill callee-saved registers.
1551  if (MF.getFunction().hasFnAttribute(Attribute::Naked))
1552  return;
1553 
1554  // We are planning to use NEON instructions vst1 / vld1.
1555  if (!static_cast<const ARMSubtarget &>(MF.getSubtarget()).hasNEON())
1556  return;
1557 
1558  // Don't bother if the default stack alignment is sufficiently high.
1559  if (MF.getSubtarget().getFrameLowering()->getStackAlignment() >= 8)
1560  return;
1561 
1562  // Aligned spills require stack realignment.
1563  if (!static_cast<const ARMBaseRegisterInfo *>(
1564  MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))
1565  return;
1566 
1567  // We always spill contiguous d-registers starting from d8. Count how many
1568  // needs spilling. The register allocator will almost always use the
1569  // callee-saved registers in order, but it can happen that there are holes in
1570  // the range. Registers above the hole will be spilled to the standard DPRCS
1571  // area.
1572  unsigned NumSpills = 0;
1573  for (; NumSpills < 8; ++NumSpills)
1574  if (!SavedRegs.test(ARM::D8 + NumSpills))
1575  break;
1576 
1577  // Don't do this for just one d-register. It's not worth it.
1578  if (NumSpills < 2)
1579  return;
1580 
1581  // Spill the first NumSpills D-registers after realigning the stack.
1582  MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
1583 
1584  // A scratch register is required for the vst1 / vld1 instructions.
1585  SavedRegs.set(ARM::R4);
1586 }
1587 
1589  BitVector &SavedRegs,
1590  RegScavenger *RS) const {
1591  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1592  // This tells PEI to spill the FP as if it is any other callee-save register
1593  // to take advantage the eliminateFrameIndex machinery. This also ensures it
1594  // is spilled in the order specified by getCalleeSavedRegs() to make it easier
1595  // to combine multiple loads / stores.
1596  bool CanEliminateFrame = true;
1597  bool CS1Spilled = false;
1598  bool LRSpilled = false;
1599  unsigned NumGPRSpills = 0;
1600  unsigned NumFPRSpills = 0;
1601  SmallVector<unsigned, 4> UnspilledCS1GPRs;
1602  SmallVector<unsigned, 4> UnspilledCS2GPRs;
1603  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1604  MF.getSubtarget().getRegisterInfo());
1605  const ARMBaseInstrInfo &TII =
1606  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1608  MachineFrameInfo &MFI = MF.getFrameInfo();
1611  (void)TRI; // Silence unused warning in non-assert builds.
1612  unsigned FramePtr = RegInfo->getFrameRegister(MF);
1613 
1614  // Spill R4 if Thumb2 function requires stack realignment - it will be used as
1615  // scratch register. Also spill R4 if Thumb2 function has varsized objects,
1616  // since it's not always possible to restore sp from fp in a single
1617  // instruction.
1618  // FIXME: It will be better just to find spare register here.
1619  if (AFI->isThumb2Function() &&
1620  (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
1621  SavedRegs.set(ARM::R4);
1622 
1623  // If a stack probe will be emitted, spill R4 and LR, since they are
1624  // clobbered by the stack probe call.
1625  // This estimate should be a safe, conservative estimate. The actual
1626  // stack probe is enabled based on the size of the local objects;
1627  // this estimate also includes the varargs store size.
1628  if (STI.isTargetWindows() &&
1629  WindowsRequiresStackProbe(MF, MFI.estimateStackSize(MF))) {
1630  SavedRegs.set(ARM::R4);
1631  SavedRegs.set(ARM::LR);
1632  }
1633 
1634  if (AFI->isThumb1OnlyFunction()) {
1635  // Spill LR if Thumb1 function uses variable length argument lists.
1636  if (AFI->getArgRegsSaveSize() > 0)
1637  SavedRegs.set(ARM::LR);
1638 
1639  // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
1640  // requires stack alignment. We don't know for sure what the stack size
1641  // will be, but for this, an estimate is good enough. If there anything
1642  // changes it, it'll be a spill, which implies we've used all the registers
1643  // and so R4 is already used, so not marking it here will be OK.
1644  // FIXME: It will be better just to find spare register here.
1645  if (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF) ||
1646  MFI.estimateStackSize(MF) > 508)
1647  SavedRegs.set(ARM::R4);
1648  }
1649 
1650  // See if we can spill vector registers to aligned stack.
1651  checkNumAlignedDPRCS2Regs(MF, SavedRegs);
1652 
1653  // Spill the BasePtr if it's used.
1654  if (RegInfo->hasBasePointer(MF))
1655  SavedRegs.set(RegInfo->getBaseRegister());
1656 
1657  // Don't spill FP if the frame can be eliminated. This is determined
1658  // by scanning the callee-save registers to see if any is modified.
1659  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
1660  for (unsigned i = 0; CSRegs[i]; ++i) {
1661  unsigned Reg = CSRegs[i];
1662  bool Spilled = false;
1663  if (SavedRegs.test(Reg)) {
1664  Spilled = true;
1665  CanEliminateFrame = false;
1666  }
1667 
1668  if (!ARM::GPRRegClass.contains(Reg)) {
1669  if (Spilled) {
1670  if (ARM::SPRRegClass.contains(Reg))
1671  NumFPRSpills++;
1672  else if (ARM::DPRRegClass.contains(Reg))
1673  NumFPRSpills += 2;
1674  else if (ARM::QPRRegClass.contains(Reg))
1675  NumFPRSpills += 4;
1676  }
1677  continue;
1678  }
1679 
1680  if (Spilled) {
1681  NumGPRSpills++;
1682 
1683  if (!STI.splitFramePushPop(MF)) {
1684  if (Reg == ARM::LR)
1685  LRSpilled = true;
1686  CS1Spilled = true;
1687  continue;
1688  }
1689 
1690  // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
1691  switch (Reg) {
1692  case ARM::LR:
1693  LRSpilled = true;
1695  case ARM::R0: case ARM::R1:
1696  case ARM::R2: case ARM::R3:
1697  case ARM::R4: case ARM::R5:
1698  case ARM::R6: case ARM::R7:
1699  CS1Spilled = true;
1700  break;
1701  default:
1702  break;
1703  }
1704  } else {
1705  if (!STI.splitFramePushPop(MF)) {
1706  UnspilledCS1GPRs.push_back(Reg);
1707  continue;
1708  }
1709 
1710  switch (Reg) {
1711  case ARM::R0: case ARM::R1:
1712  case ARM::R2: case ARM::R3:
1713  case ARM::R4: case ARM::R5:
1714  case ARM::R6: case ARM::R7:
1715  case ARM::LR:
1716  UnspilledCS1GPRs.push_back(Reg);
1717  break;
1718  default:
1719  UnspilledCS2GPRs.push_back(Reg);
1720  break;
1721  }
1722  }
1723  }
1724 
1725  bool ForceLRSpill = false;
1726  if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
1727  unsigned FnSize = GetFunctionSizeInBytes(MF, TII);
1728  // Force LR to be spilled if the Thumb function size is > 2048. This enables
1729  // use of BL to implement far jump. If it turns out that it's not needed
1730  // then the branch fix up path will undo it.
1731  if (FnSize >= (1 << 11)) {
1732  CanEliminateFrame = false;
1733  ForceLRSpill = true;
1734  }
1735  }
1736 
1737  // If any of the stack slot references may be out of range of an immediate
1738  // offset, make sure a register (or a spill slot) is available for the
1739  // register scavenger. Note that if we're indexing off the frame pointer, the
1740  // effective stack size is 4 bytes larger since the FP points to the stack
1741  // slot of the previous FP. Also, if we have variable sized objects in the
1742  // function, stack slot references will often be negative, and some of
1743  // our instructions are positive-offset only, so conservatively consider
1744  // that case to want a spill slot (or register) as well. Similarly, if
1745  // the function adjusts the stack pointer during execution and the
1746  // adjustments aren't already part of our stack size estimate, our offset
1747  // calculations may be off, so be conservative.
1748  // FIXME: We could add logic to be more precise about negative offsets
1749  // and which instructions will need a scratch register for them. Is it
1750  // worth the effort and added fragility?
1751  unsigned EstimatedStackSize =
1752  MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
1753 
1754  // Determine biggest (positive) SP offset in MachineFrameInfo.
1755  int MaxFixedOffset = 0;
1756  for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
1757  int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
1758  MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
1759  }
1760 
1761  bool HasFP = hasFP(MF);
1762  if (HasFP) {
1763  if (AFI->hasStackFrame())
1764  EstimatedStackSize += 4;
1765  } else {
1766  // If FP is not used, SP will be used to access arguments, so count the
1767  // size of arguments into the estimation.
1768  EstimatedStackSize += MaxFixedOffset;
1769  }
1770  EstimatedStackSize += 16; // For possible paddings.
1771 
1772  unsigned EstimatedRSStackSizeLimit = estimateRSStackSizeLimit(MF, this);
1773  int MaxFPOffset = getMaxFPOffset(MF.getFunction(), *AFI);
1774  bool BigFrameOffsets = EstimatedStackSize >= EstimatedRSStackSizeLimit ||
1775  MFI.hasVarSizedObjects() ||
1776  (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF)) ||
1777  // For large argument stacks fp relative addressed may overflow.
1778  (HasFP && (MaxFixedOffset - MaxFPOffset) >= (int)EstimatedRSStackSizeLimit);
1779  if (BigFrameOffsets ||
1780  !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
1781  AFI->setHasStackFrame(true);
1782 
1783  if (HasFP) {
1784  SavedRegs.set(FramePtr);
1785  // If the frame pointer is required by the ABI, also spill LR so that we
1786  // emit a complete frame record.
1787  if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) {
1788  SavedRegs.set(ARM::LR);
1789  LRSpilled = true;
1790  NumGPRSpills++;
1791  auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
1792  if (LRPos != UnspilledCS1GPRs.end())
1793  UnspilledCS1GPRs.erase(LRPos);
1794  }
1795  auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);
1796  if (FPPos != UnspilledCS1GPRs.end())
1797  UnspilledCS1GPRs.erase(FPPos);
1798  NumGPRSpills++;
1799  if (FramePtr == ARM::R7)
1800  CS1Spilled = true;
1801  }
1802 
1803  // This is true when we inserted a spill for an unused register that can now
1804  // be used for register scavenging.
1805  bool ExtraCSSpill = false;
1806 
1807  if (AFI->isThumb1OnlyFunction()) {
1808  // For Thumb1-only targets, we need some low registers when we save and
1809  // restore the high registers (which aren't allocatable, but could be
1810  // used by inline assembly) because the push/pop instructions can not
1811  // access high registers. If necessary, we might need to push more low
1812  // registers to ensure that there is at least one free that can be used
1813  // for the saving & restoring, and preferably we should ensure that as
1814  // many as are needed are available so that fewer push/pop instructions
1815  // are required.
1816 
1817  // Low registers which are not currently pushed, but could be (r4-r7).
1818  SmallVector<unsigned, 4> AvailableRegs;
1819 
1820  // Unused argument registers (r0-r3) can be clobbered in the prologue for
1821  // free.
1822  int EntryRegDeficit = 0;
1823  for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
1824  if (!MF.getRegInfo().isLiveIn(Reg)) {
1825  --EntryRegDeficit;
1826  LLVM_DEBUG(dbgs()
1827  << printReg(Reg, TRI)
1828  << " is unused argument register, EntryRegDeficit = "
1829  << EntryRegDeficit << "\n");
1830  }
1831  }
1832 
1833  // Unused return registers can be clobbered in the epilogue for free.
1834  int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
1835  LLVM_DEBUG(dbgs() << AFI->getReturnRegsCount()
1836  << " return regs used, ExitRegDeficit = "
1837  << ExitRegDeficit << "\n");
1838 
1839  int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);
1840  LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
1841 
1842  // r4-r6 can be used in the prologue if they are pushed by the first push
1843  // instruction.
1844  for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
1845  if (SavedRegs.test(Reg)) {
1846  --RegDeficit;
1847  LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
1848  << " is saved low register, RegDeficit = "
1849  << RegDeficit << "\n");
1850  } else {
1851  AvailableRegs.push_back(Reg);
1852  LLVM_DEBUG(
1853  dbgs()
1854  << printReg(Reg, TRI)
1855  << " is non-saved low register, adding to AvailableRegs\n");
1856  }
1857  }
1858 
1859  // r7 can be used if it is not being used as the frame pointer.
1860  if (!HasFP) {
1861  if (SavedRegs.test(ARM::R7)) {
1862  --RegDeficit;
1863  LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
1864  << RegDeficit << "\n");
1865  } else {
1866  AvailableRegs.push_back(ARM::R7);
1867  LLVM_DEBUG(
1868  dbgs()
1869  << "%r7 is non-saved low register, adding to AvailableRegs\n");
1870  }
1871  }
1872 
1873  // Each of r8-r11 needs to be copied to a low register, then pushed.
1874  for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
1875  if (SavedRegs.test(Reg)) {
1876  ++RegDeficit;
1877  LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
1878  << " is saved high register, RegDeficit = "
1879  << RegDeficit << "\n");
1880  }
1881  }
1882 
1883  // LR can only be used by PUSH, not POP, and can't be used at all if the
1884  // llvm.returnaddress intrinsic is used. This is only worth doing if we
1885  // are more limited at function entry than exit.
1886  if ((EntryRegDeficit > ExitRegDeficit) &&
1887  !(MF.getRegInfo().isLiveIn(ARM::LR) &&
1889  if (SavedRegs.test(ARM::LR)) {
1890  --RegDeficit;
1891  LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
1892  << RegDeficit << "\n");
1893  } else {
1894  AvailableRegs.push_back(ARM::LR);
1895  LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
1896  }
1897  }
1898 
1899  // If there are more high registers that need pushing than low registers
1900  // available, push some more low registers so that we can use fewer push
1901  // instructions. This might not reduce RegDeficit all the way to zero,
1902  // because we can only guarantee that r4-r6 are available, but r8-r11 may
1903  // need saving.
1904  LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
1905  for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
1906  unsigned Reg = AvailableRegs.pop_back_val();
1907  LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
1908  << " to make up reg deficit\n");
1909  SavedRegs.set(Reg);
1910  NumGPRSpills++;
1911  CS1Spilled = true;
1912  assert(!MRI.isReserved(Reg) && "Should not be reserved");
1913  if (!MRI.isPhysRegUsed(Reg))
1914  ExtraCSSpill = true;
1915  UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
1916  if (Reg == ARM::LR)
1917  LRSpilled = true;
1918  }
1919  LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
1920  << "\n");
1921  }
1922 
1923  // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
1924  // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
1925  if (!LRSpilled && CS1Spilled) {
1926  SavedRegs.set(ARM::LR);
1927  NumGPRSpills++;
1929  LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
1930  if (LRPos != UnspilledCS1GPRs.end())
1931  UnspilledCS1GPRs.erase(LRPos);
1932 
1933  ForceLRSpill = false;
1934  if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR))
1935  ExtraCSSpill = true;
1936  }
1937 
1938  // If stack and double are 8-byte aligned and we are spilling an odd number
1939  // of GPRs, spill one extra callee save GPR so we won't have to pad between
1940  // the integer and double callee save areas.
1941  LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
1942  unsigned TargetAlign = getStackAlignment();
1943  if (TargetAlign >= 8 && (NumGPRSpills & 1)) {
1944  if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
1945  for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
1946  unsigned Reg = UnspilledCS1GPRs[i];
1947  // Don't spill high register if the function is thumb. In the case of
1948  // Windows on ARM, accept R11 (frame pointer)
1949  if (!AFI->isThumbFunction() ||
1950  (STI.isTargetWindows() && Reg == ARM::R11) ||
1951  isARMLowRegister(Reg) || Reg == ARM::LR) {
1952  SavedRegs.set(Reg);
1953  LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
1954  << " to make up alignment\n");
1955  if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
1956  ExtraCSSpill = true;
1957  break;
1958  }
1959  }
1960  } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
1961  unsigned Reg = UnspilledCS2GPRs.front();
1962  SavedRegs.set(Reg);
1963  LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
1964  << " to make up alignment\n");
1965  if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
1966  ExtraCSSpill = true;
1967  }
1968  }
1969 
1970  // Estimate if we might need to scavenge a register at some point in order
1971  // to materialize a stack offset. If so, either spill one additional
1972  // callee-saved register or reserve a special spill slot to facilitate
1973  // register scavenging. Thumb1 needs a spill slot for stack pointer
1974  // adjustments also, even when the frame itself is small.
1975  if (BigFrameOffsets && !ExtraCSSpill) {
1976  // If any non-reserved CS register isn't spilled, just spill one or two
1977  // extra. That should take care of it!
1978  unsigned NumExtras = TargetAlign / 4;
1979  SmallVector<unsigned, 2> Extras;
1980  while (NumExtras && !UnspilledCS1GPRs.empty()) {
1981  unsigned Reg = UnspilledCS1GPRs.back();
1982  UnspilledCS1GPRs.pop_back();
1983  if (!MRI.isReserved(Reg) &&
1984  (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
1985  Reg == ARM::LR)) {
1986  Extras.push_back(Reg);
1987  NumExtras--;
1988  }
1989  }
1990  // For non-Thumb1 functions, also check for hi-reg CS registers
1991  if (!AFI->isThumb1OnlyFunction()) {
1992  while (NumExtras && !UnspilledCS2GPRs.empty()) {
1993  unsigned Reg = UnspilledCS2GPRs.back();
1994  UnspilledCS2GPRs.pop_back();
1995  if (!MRI.isReserved(Reg)) {
1996  Extras.push_back(Reg);
1997  NumExtras--;
1998  }
1999  }
2000  }
2001  if (NumExtras == 0) {
2002  for (unsigned Reg : Extras) {
2003  SavedRegs.set(Reg);
2004  if (!MRI.isPhysRegUsed(Reg))
2005  ExtraCSSpill = true;
2006  }
2007  }
2008  if (!ExtraCSSpill && !AFI->isThumb1OnlyFunction()) {
2009  // note: Thumb1 functions spill to R12, not the stack. Reserve a slot
2010  // closest to SP or frame pointer.
2011  assert(RS && "Register scavenging not provided");
2012  const TargetRegisterClass &RC = ARM::GPRRegClass;
2013  unsigned Size = TRI->getSpillSize(RC);
2014  unsigned Align = TRI->getSpillAlignment(RC);
2015  RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2016  }
2017  }
2018  }
2019 
2020  if (ForceLRSpill) {
2021  SavedRegs.set(ARM::LR);
2022  AFI->setLRIsSpilledForFarJump(true);
2023  }
2024 }
2025 
2026 MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
2029  const ARMBaseInstrInfo &TII =
2030  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2031  if (!hasReservedCallFrame(MF)) {
2032  // If we have alloca, convert as follows:
2033  // ADJCALLSTACKDOWN -> sub, sp, sp, amount
2034  // ADJCALLSTACKUP -> add, sp, sp, amount
2035  MachineInstr &Old = *I;
2036  DebugLoc dl = Old.getDebugLoc();
2037  unsigned Amount = TII.getFrameSize(Old);
2038  if (Amount != 0) {
2039  // We need to keep the stack aligned properly. To do this, we round the
2040  // amount of space needed for the outgoing arguments up to the next
2041  // alignment boundary.
2042  Amount = alignSPAdjust(Amount);
2043 
2045  assert(!AFI->isThumb1OnlyFunction() &&
2046  "This eliminateCallFramePseudoInstr does not support Thumb1!");
2047  bool isARM = !AFI->isThumbFunction();
2048 
2049  // Replace the pseudo instruction with a new instruction...
2050  unsigned Opc = Old.getOpcode();
2051  int PIdx = Old.findFirstPredOperandIdx();
2052  ARMCC::CondCodes Pred =
2053  (PIdx == -1) ? ARMCC::AL
2054  : (ARMCC::CondCodes)Old.getOperand(PIdx).getImm();
2055  unsigned PredReg = TII.getFramePred(Old);
2056  if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
2057  emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
2058  Pred, PredReg);
2059  } else {
2060  assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
2061  emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
2062  Pred, PredReg);
2063  }
2064  }
2065  }
2066  return MBB.erase(I);
2067 }
2068 
2069 /// Get the minimum constant for ARM that is greater than or equal to the
2070 /// argument. In ARM, constants can have any value that can be produced by
2071 /// rotating an 8-bit value to the right by an even number of bits within a
2072 /// 32-bit word.
2074  unsigned Shifted = 0;
2075 
2076  if (Value == 0)
2077  return 0;
2078 
2079  while (!(Value & 0xC0000000)) {
2080  Value = Value << 2;
2081  Shifted += 2;
2082  }
2083 
2084  bool Carry = (Value & 0x00FFFFFF);
2085  Value = ((Value & 0xFF000000) >> 24) + Carry;
2086 
2087  if (Value & 0x0000100)
2088  Value = Value & 0x000001FC;
2089 
2090  if (Shifted > 24)
2091  Value = Value >> (Shifted - 24);
2092  else
2093  Value = Value << (24 - Shifted);
2094 
2095  return Value;
2096 }
2097 
2098 // The stack limit in the TCB is set to this many bytes above the actual
2099 // stack limit.
2100 static const uint64_t kSplitStackAvailable = 256;
2101 
2102 // Adjust the function prologue to enable split stacks. This currently only
2103 // supports android and linux.
2104 //
2105 // The ABI of the segmented stack prologue is a little arbitrarily chosen, but
2106 // must be well defined in order to allow for consistent implementations of the
2107 // __morestack helper function. The ABI is also not a normal ABI in that it
2108 // doesn't follow the normal calling conventions because this allows the
2109 // prologue of each function to be optimized further.
2110 //
2111 // Currently, the ABI looks like (when calling __morestack)
2112 //
2113 // * r4 holds the minimum stack size requested for this function call
2114 // * r5 holds the stack size of the arguments to the function
2115 // * the beginning of the function is 3 instructions after the call to
2116 // __morestack
2117 //
2118 // Implementations of __morestack should use r4 to allocate a new stack, r5 to
2119 // place the arguments on to the new stack, and the 3-instruction knowledge to
2120 // jump directly to the body of the function when working on the new stack.
2121 //
2122 // An old (and possibly no longer compatible) implementation of __morestack for
2123 // ARM can be found at [1].
2124 //
2125 // [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
2127  MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
2128  unsigned Opcode;
2129  unsigned CFIIndex;
2130  const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
2131  bool Thumb = ST->isThumb();
2132 
2133  // Sadly, this currently doesn't support varargs, platforms other than
2134  // android/linux. Note that thumb1/thumb2 are support for android/linux.
2135  if (MF.getFunction().isVarArg())
2136  report_fatal_error("Segmented stacks do not support vararg functions.");
2137  if (!ST->isTargetAndroid() && !ST->isTargetLinux())
2138  report_fatal_error("Segmented stacks not supported on this platform.");
2139 
2140  MachineFrameInfo &MFI = MF.getFrameInfo();
2141  MachineModuleInfo &MMI = MF.getMMI();
2142  MCContext &Context = MMI.getContext();
2143  const MCRegisterInfo *MRI = Context.getRegisterInfo();
2144  const ARMBaseInstrInfo &TII =
2145  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2146  ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
2147  DebugLoc DL;
2148 
2149  uint64_t StackSize = MFI.getStackSize();
2150 
2151  // Do not generate a prologue for functions with a stack of size zero
2152  if (StackSize == 0)
2153  return;
2154 
2155  // Use R4 and R5 as scratch registers.
2156  // We save R4 and R5 before use and restore them before leaving the function.
2157  unsigned ScratchReg0 = ARM::R4;
2158  unsigned ScratchReg1 = ARM::R5;
2159  uint64_t AlignedStackSize;
2160 
2161  MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
2162  MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
2163  MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock();
2166 
2167  // Grab everything that reaches PrologueMBB to update there liveness as well.
2168  SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
2170  WalkList.push_back(&PrologueMBB);
2171 
2172  do {
2173  MachineBasicBlock *CurMBB = WalkList.pop_back_val();
2174  for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
2175  if (BeforePrologueRegion.insert(PredBB).second)
2176  WalkList.push_back(PredBB);
2177  }
2178  } while (!WalkList.empty());
2179 
2180  // The order in that list is important.
2181  // The blocks will all be inserted before PrologueMBB using that order.
2182  // Therefore the block that should appear first in the CFG should appear
2183  // first in the list.
2184  MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
2185  PostStackMBB};
2186 
2187  for (MachineBasicBlock *B : AddedBlocks)
2188  BeforePrologueRegion.insert(B);
2189 
2190  for (const auto &LI : PrologueMBB.liveins()) {
2191  for (MachineBasicBlock *PredBB : BeforePrologueRegion)
2192  PredBB->addLiveIn(LI);
2193  }
2194 
2195  // Remove the newly added blocks from the list, since we know
2196  // we do not have to do the following updates for them.
2197  for (MachineBasicBlock *B : AddedBlocks) {
2198  BeforePrologueRegion.erase(B);
2199  MF.insert(PrologueMBB.getIterator(), B);
2200  }
2201 
2202  for (MachineBasicBlock *MBB : BeforePrologueRegion) {
2203  // Make sure the LiveIns are still sorted and unique.
2204  MBB->sortUniqueLiveIns();
2205  // Replace the edges to PrologueMBB by edges to the sequences
2206  // we are about to add.
2207  MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
2208  }
2209 
2210  // The required stack size that is aligned to ARM constant criterion.
2211  AlignedStackSize = alignToARMConstant(StackSize);
2212 
2213  // When the frame size is less than 256 we just compare the stack
2214  // boundary directly to the value of the stack pointer, per gcc.
2215  bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
2216 
2217  // We will use two of the callee save registers as scratch registers so we
2218  // need to save those registers onto the stack.
2219  // We will use SR0 to hold stack limit and SR1 to hold the stack size
2220  // requested and arguments for __morestack().
2221  // SR0: Scratch Register #0
2222  // SR1: Scratch Register #1
2223  // push {SR0, SR1}
2224  if (Thumb) {
2225  BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
2226  .add(predOps(ARMCC::AL))
2227  .addReg(ScratchReg0)
2228  .addReg(ScratchReg1);
2229  } else {
2230  BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
2231  .addReg(ARM::SP, RegState::Define)
2232  .addReg(ARM::SP)
2233  .add(predOps(ARMCC::AL))
2234  .addReg(ScratchReg0)
2235  .addReg(ScratchReg1);
2236  }
2237 
2238  // Emit the relevant DWARF information about the change in stack pointer as
2239  // well as where to find both r4 and r5 (the callee-save registers)
2240  CFIIndex =
2242  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2243  .addCFIIndex(CFIIndex);
2245  nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
2246  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2247  .addCFIIndex(CFIIndex);
2249  nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
2250  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2251  .addCFIIndex(CFIIndex);
2252 
2253  // mov SR1, sp
2254  if (Thumb) {
2255  BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
2256  .addReg(ARM::SP)
2257  .add(predOps(ARMCC::AL));
2258  } else if (CompareStackPointer) {
2259  BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
2260  .addReg(ARM::SP)
2261  .add(predOps(ARMCC::AL))
2262  .add(condCodeOp());
2263  }
2264 
2265  // sub SR1, sp, #StackSize
2266  if (!CompareStackPointer && Thumb) {
2267  BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
2268  .add(condCodeOp())
2269  .addReg(ScratchReg1)
2270  .addImm(AlignedStackSize)
2271  .add(predOps(ARMCC::AL));
2272  } else if (!CompareStackPointer) {
2273  BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
2274  .addReg(ARM::SP)
2275  .addImm(AlignedStackSize)
2276  .add(predOps(ARMCC::AL))
2277  .add(condCodeOp());
2278  }
2279 
2280  if (Thumb && ST->isThumb1Only()) {
2281  unsigned PCLabelId = ARMFI->createPICLabelUId();
2283  MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0);
2285  unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4);
2286 
2287  // ldr SR0, [pc, offset(STACK_LIMIT)]
2288  BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
2289  .addConstantPoolIndex(CPI)
2290  .add(predOps(ARMCC::AL));
2291 
2292  // ldr SR0, [SR0]
2293  BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
2294  .addReg(ScratchReg0)
2295  .addImm(0)
2296  .add(predOps(ARMCC::AL));
2297  } else {
2298  // Get TLS base address from the coprocessor
2299  // mrc p15, #0, SR0, c13, c0, #3
2300  BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0)
2301  .addImm(15)
2302  .addImm(0)
2303  .addImm(13)
2304  .addImm(0)
2305  .addImm(3)
2306  .add(predOps(ARMCC::AL));
2307 
2308  // Use the last tls slot on android and a private field of the TCP on linux.
2309  assert(ST->isTargetAndroid() || ST->isTargetLinux());
2310  unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
2311 
2312  // Get the stack limit from the right offset
2313  // ldr SR0, [sr0, #4 * TlsOffset]
2314  BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0)
2315  .addReg(ScratchReg0)
2316  .addImm(4 * TlsOffset)
2317  .add(predOps(ARMCC::AL));
2318  }
2319 
2320  // Compare stack limit with stack size requested.
2321  // cmp SR0, SR1
2322  Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
2323  BuildMI(GetMBB, DL, TII.get(Opcode))
2324  .addReg(ScratchReg0)
2325  .addReg(ScratchReg1)
2326  .add(predOps(ARMCC::AL));
2327 
2328  // This jump is taken if StackLimit < SP - stack required.
2329  Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
2330  BuildMI(GetMBB, DL, TII.get(Opcode)).addMBB(PostStackMBB)
2331  .addImm(ARMCC::LO)
2332  .addReg(ARM::CPSR);
2333 
2334 
2335  // Calling __morestack(StackSize, Size of stack arguments).
2336  // __morestack knows that the stack size requested is in SR0(r4)
2337  // and amount size of stack arguments is in SR1(r5).
2338 
2339  // Pass first argument for the __morestack by Scratch Register #0.
2340  // The amount size of stack required
2341  if (Thumb) {
2342  BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
2343  .add(condCodeOp())
2344  .addImm(AlignedStackSize)
2345  .add(predOps(ARMCC::AL));
2346  } else {
2347  BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
2348  .addImm(AlignedStackSize)
2349  .add(predOps(ARMCC::AL))
2350  .add(condCodeOp());
2351  }
2352  // Pass second argument for the __morestack by Scratch Register #1.
2353  // The amount size of stack consumed to save function arguments.
2354  if (Thumb) {
2355  BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
2356  .add(condCodeOp())
2357  .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
2358  .add(predOps(ARMCC::AL));
2359  } else {
2360  BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
2361  .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
2362  .add(predOps(ARMCC::AL))
2363  .add(condCodeOp());
2364  }
2365 
2366  // push {lr} - Save return address of this function.
2367  if (Thumb) {
2368  BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
2369  .add(predOps(ARMCC::AL))
2370  .addReg(ARM::LR);
2371  } else {
2372  BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
2373  .addReg(ARM::SP, RegState::Define)
2374  .addReg(ARM::SP)
2375  .add(predOps(ARMCC::AL))
2376  .addReg(ARM::LR);
2377  }
2378 
2379  // Emit the DWARF info about the change in stack as well as where to find the
2380  // previous link register
2381  CFIIndex =
2383  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2384  .addCFIIndex(CFIIndex);
2386  nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
2387  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2388  .addCFIIndex(CFIIndex);
2389 
2390  // Call __morestack().
2391  if (Thumb) {
2392  BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
2393  .add(predOps(ARMCC::AL))
2394  .addExternalSymbol("__morestack");
2395  } else {
2396  BuildMI(AllocMBB, DL, TII.get(ARM::BL))
2397  .addExternalSymbol("__morestack");
2398  }
2399 
2400  // pop {lr} - Restore return address of this original function.
2401  if (Thumb) {
2402  if (ST->isThumb1Only()) {
2403  BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
2404  .add(predOps(ARMCC::AL))
2405  .addReg(ScratchReg0);
2406  BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
2407  .addReg(ScratchReg0)
2408  .add(predOps(ARMCC::AL));
2409  } else {
2410  BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
2411  .addReg(ARM::LR, RegState::Define)
2412  .addReg(ARM::SP, RegState::Define)
2413  .addReg(ARM::SP)
2414  .addImm(4)
2415  .add(predOps(ARMCC::AL));
2416  }
2417  } else {
2418  BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
2419  .addReg(ARM::SP, RegState::Define)
2420  .addReg(ARM::SP)
2421  .add(predOps(ARMCC::AL))
2422  .addReg(ARM::LR);
2423  }
2424 
2425  // Restore SR0 and SR1 in case of __morestack() was called.
2426  // __morestack() will skip PostStackMBB block so we need to restore
2427  // scratch registers from here.
2428  // pop {SR0, SR1}
2429  if (Thumb) {
2430  BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
2431  .add(predOps(ARMCC::AL))
2432  .addReg(ScratchReg0)
2433  .addReg(ScratchReg1);
2434  } else {
2435  BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
2436  .addReg(ARM::SP, RegState::Define)
2437  .addReg(ARM::SP)
2438  .add(predOps(ARMCC::AL))
2439  .addReg(ScratchReg0)
2440  .addReg(ScratchReg1);
2441  }
2442 
2443  // Update the CFA offset now that we've popped
2444  CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
2445  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2446  .addCFIIndex(CFIIndex);
2447 
2448  // Return from this function.
2449  BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
2450 
2451  // Restore SR0 and SR1 in case of __morestack() was not called.
2452  // pop {SR0, SR1}
2453  if (Thumb) {
2454  BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
2455  .add(predOps(ARMCC::AL))
2456  .addReg(ScratchReg0)
2457  .addReg(ScratchReg1);
2458  } else {
2459  BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
2460  .addReg(ARM::SP, RegState::Define)
2461  .addReg(ARM::SP)
2462  .add(predOps(ARMCC::AL))
2463  .addReg(ScratchReg0)
2464  .addReg(ScratchReg1);
2465  }
2466 
2467  // Update the CFA offset now that we've popped
2468  CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
2469  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2470  .addCFIIndex(CFIIndex);
2471 
2472  // Tell debuggers that r4 and r5 are now the same as they were in the
2473  // previous function, that they're the "Same Value".
2475  nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
2476  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2477  .addCFIIndex(CFIIndex);
2479  nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
2480  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2481  .addCFIIndex(CFIIndex);
2482 
2483  // Organizing MBB lists
2484  PostStackMBB->addSuccessor(&PrologueMBB);
2485 
2486  AllocMBB->addSuccessor(PostStackMBB);
2487 
2488  GetMBB->addSuccessor(PostStackMBB);
2489  GetMBB->addSuccessor(AllocMBB);
2490 
2491  McrMBB->addSuccessor(GetMBB);
2492 
2493  PrevStackMBB->addSuccessor(McrMBB);
2494 
2495 #ifdef EXPENSIVE_CHECKS
2496  MF.verify();
2497 #endif
2498 }
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:163
const MachineInstrBuilder & add(const MachineOperand &MO) const
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BitVector & set()
Definition: BitVector.h:398
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
#define R4(n)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
LLVMContext & Context
int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate...
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:115
bool isThumb() const
Definition: ARMSubtarget.h:680
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:137
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static cl::opt< bool > SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), cl::desc("Align ARM NEON spills in prolog and epilog"))
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:285
ARMConstantPoolValue - ARM specific constantpool value.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getReg() const
getReg - Returns the register number.
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset)
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:480
unsigned Reg
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override
Adjust the prologue to have the function use segmented stacks.
bool hasV7Ops() const
Definition: ARMSubtarget.h:511
bool test(unsigned Idx) const
Definition: BitVector.h:502
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:307
unsigned second
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
F(f)
MachineModuleInfo & getMMI() const
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
bool isThumb1Only() const
Definition: ARMSubtarget.h:681
#define R2(n)
void setGPRCalleeSavedArea2Offset(unsigned o)
static uint32_t alignToARMConstant(uint32_t Value)
Get the minimum constant for ARM that is greater than or equal to the argument.
static MCCFIInstruction createDefCfaOffset(MCSymbol *L, int Offset)
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:467
static bool isThumb(const MCSubtargetInfo &STI)
unsigned getSpillSize(const TargetRegisterClass &RC) const
Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...
return AArch64::GPR64RegClass contains(Reg)
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:462
static const uint64_t kSplitStackAvailable
bool isTargetELF() const
Definition: ARMSubtarget.h:619
unsigned getSpillAlignment(const TargetRegisterClass &RC) const
Return the minimum required alignment in bytes for a spill slot for a register of this class...
void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Given a machine basic block that branched to &#39;Old&#39;, change the code and CFG so that it branches to &#39;N...
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
unsigned getFramePred(const MachineInstr &MI) const
Returns predicate register associated with the given frame instruction.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:314
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
unsigned getFrameRegister(const MachineFunction &MF) const override
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required, we reserve argument space for call sites in the function immediately on entry to the current function.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
This file contains the simple types necessary to represent the attributes associated with functions a...
static int sizeOfSPAdjustment(const MachineInstr &MI)
virtual bool noFramePointerElim(const MachineFunction &MF) const
Return true if the target needs to disable frame pointer elimination.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:311
int getDwarfRegNum(unsigned RegNum, bool isEH) const
Map a target register to an equivalent dwarf register number.
void setDPRCalleeSavedAreaOffset(unsigned o)
LLVM_NODISCARD unsigned addFrameInst(const MCCFIInstruction &Inst)
bool enableCalleeSaveSkip(const MachineFunction &MF) const override
Returns true if the target can safely skip saving callee-saved registers for noreturn nounwind functi...
unsigned getArgRegsSaveSize() const
void setFramePtrSpillOffset(unsigned o)
Context object for machine code objects.
Definition: MCContext.h:63
static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg, unsigned SrcReg, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
unsigned getDPRCalleeSavedAreaSize() const
bool splitFramePushPop(const MachineFunction &MF) const
Returns true if the frame setup is split into two separate pushes (first r0-r7,lr then r8-r11)...
Definition: ARMSubtarget.h:701
void setDPRCalleeSavedGapSize(unsigned s)
unsigned getNumAlignedDPRCS2Regs() const
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
const MCContext & getContext() const
const RegList & Regs
int alignSPAdjust(int SPAdj) const
alignSPAdjust - This method aligns the stack adjustment to the correct alignment. ...
bool hasV6T2Ops() const
Definition: ARMSubtarget.h:510
virtual const TargetInstrInfo * getInstrInfo() const
static bool isARMArea1Register(unsigned Reg, bool isIOS)
isARMArea1Register - Returns true if the register is a low register (r0-r7) or a stack/pc register th...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:974
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
TargetInstrInfo - Interface to description of machine instruction set.
unsigned getDefRegState(bool B)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static MachineBasicBlock::iterator skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs)
Skip past the code inserted by emitAlignedDPRCS2Spills, and return an iterator to the following instr...
static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register)
.cfi_same_value Current value of Register is the same as in the previous frame.
Definition: MCDwarf.h:520
static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs)
int ResolveFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg, int SPAdj) const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
This file declares the machine register scavenger class.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:118
static MCCFIInstruction createDefCfa(MCSymbol *L, unsigned Register, int Offset)
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it...
Definition: MCDwarf.h:453
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubRegIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool cannotEliminateFrame(const MachineFunction &MF) const
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register)
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:460
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:117
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Returns the size of the specified MachineInstr.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
unsigned getGPRCalleeSavedArea1Size() const
int getStackProtectorIndex() const
Return the index for the stack protector object.
unsigned getMaxAlignment() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
bool hasBasePointer(const MachineFunction &MF) const
void setGPRCalleeSavedArea2Size(unsigned s)
static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI)
We need the offset of the frame pointer relative to other MachineFrameInfo offsets which are encoded ...
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:728
unsigned getReturnRegsCount() const
static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI)
Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers starting from d8...
self_iterator getIterator()
Definition: ilist_node.h:82
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
iterator_range< pred_iterator > predecessors()
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:936
const ARMFrameLowering * getFrameLowering() const override
Definition: ARMSubtarget.h:470
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:193
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
iterator erase(const_iterator CI)
Definition: SmallVector.h:446
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:929
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned first
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:859
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:497
int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override
getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.
unsigned getDPRCalleeSavedGapSize() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI)
Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers starting from d8...
#define R6(n)
void setGPRCalleeSavedArea1Size(unsigned s)
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call...
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:378
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:199
bool isTargetLinux() const
Definition: ARMSubtarget.h:613
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
static bool isARMArea2Register(unsigned Reg, bool isIOS)
bool isTargetAndroid() const
Definition: ARMSubtarget.h:666
Information about stack frame layout on the target.
static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, const TargetInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const unsigned Reg, const unsigned Alignment, const bool MustBeSingleInstruction)
Emit an instruction sequence that will align the address in register Reg by zero-ing out the lower bi...
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:382
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
void setOffsetAdjustment(int Adj)
Set the correction for frame offsets.
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
CodeModel::Model getCodeModel() const
Returns the code model.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
ARMFrameLowering(const ARMSubtarget &sti)
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
unsigned getReturnOpcode() const
Returns the correct return opcode for the current feature set.
Definition: ARMSubtarget.h:771
unsigned getArgumentStackSize() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
static unsigned estimateRSStackSizeLimit(MachineFunction &MF, const TargetFrameLowering *TFI)
estimateRSStackSizeLimit - Look at each instruction that references stack frames and return the stack...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
static bool isPopOpcode(int Opc)
bool isLiveIn(unsigned Reg) const
bool noFramePointerElim(const MachineFunction &MF) const override
Return true if the target needs to disable frame pointer elimination.
static bool isARMLowRegister(unsigned Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:147
Representation of each machine instruction.
Definition: MachineInstr.h:60
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:121
uint16_t getEncodingValue(unsigned RegNo) const
Returns the encoding for RegNo.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
unsigned getGPRCalleeSavedArea2Size() const
void setGPRCalleeSavedArea1Offset(unsigned o)
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use...
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:62
unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg...
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
bool isReturnAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
TargetOptions Options
Definition: TargetMachine.h:98
#define I(x, y, z)
Definition: MD5.cpp:58
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:474
virtual const TargetFrameLowering * getFrameLowering() const
void setDPRCalleeSavedAreaSize(unsigned s)
int getOffsetAdjustment() const
Return the correction for frame offsets.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool hasV5TOps() const
Definition: ARMSubtarget.h:505
iterator_range< livein_iterator > liveins() const
static bool isARMArea3Register(unsigned Reg, bool isIOS)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
static bool WindowsRequiresStackProbe(const MachineFunction &MF, size_t StackSizeInBytes)
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:292
bool needsStackRealignment(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
LLVM Value Representation.
Definition: Value.h:73
static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, const ARMBaseInstrInfo &TII)
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
unsigned getFramePtrSpillOffset() const
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:317
static const unsigned FramePtr
bool isTargetWindows() const
Definition: ARMSubtarget.h:616
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
bool addRegisterKilled(unsigned IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
IRTranslator LLVM IR MI
static bool isCSRestore(MachineInstr &MI, const ARMBaseInstrInfo &TII, const MCPhysReg *CSRegs)
BitVector getPristineRegs(const MachineFunction &MF) const
Return a set of physical registers that are pristine.
const ARMSubtarget & STI
static void checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs)
#define LLVM_DEBUG(X)
Definition: Debug.h:119
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:316
unsigned getReg() const
void setObjectAlignment(int ObjectIdx, unsigned Align)
setObjectAlignment - Change the alignment of the specified stack object.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects...
unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one...
static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
bool isReserved(unsigned PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
This class contains meta information specific to a module.