LLVM  10.0.0svn
ARMFrameLowering.cpp
Go to the documentation of this file.
1 //===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the ARM implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "ARMFrameLowering.h"
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMBaseRegisterInfo.h"
16 #include "ARMConstantPoolValue.h"
17 #include "ARMMachineFunctionInfo.h"
18 #include "ARMSubtarget.h"
21 #include "Utils/ARMBaseInfo.h"
22 #include "llvm/ADT/BitVector.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/SmallVector.h"
41 #include "llvm/IR/Attributes.h"
42 #include "llvm/IR/CallingConv.h"
43 #include "llvm/IR/DebugLoc.h"
44 #include "llvm/IR/Function.h"
45 #include "llvm/MC/MCContext.h"
46 #include "llvm/MC/MCDwarf.h"
47 #include "llvm/MC/MCInstrDesc.h"
48 #include "llvm/MC/MCRegisterInfo.h"
49 #include "llvm/Support/CodeGen.h"
51 #include "llvm/Support/Compiler.h"
52 #include "llvm/Support/Debug.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <cstddef>
61 #include <cstdint>
62 #include <iterator>
63 #include <utility>
64 #include <vector>
65 
66 #define DEBUG_TYPE "arm-frame-lowering"
67 
68 using namespace llvm;
69 
70 static cl::opt<bool>
71 SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
72  cl::desc("Align ARM NEON spills in prolog and epilog"));
73 
76  unsigned NumAlignedDPRCS2Regs);
77 
79  : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)),
80  STI(sti) {}
81 
83  // iOS always has a FP for backtracking, force other targets to keep their FP
84  // when doing FastISel. The emitted code is currently superior, and in cases
85  // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
86  return MF.getSubtarget<ARMSubtarget>().useFastISel();
87 }
88 
89 /// Returns true if the target can safely skip saving callee-saved registers
90 /// for noreturn nounwind functions.
92  assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
93  MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
94  !MF.getFunction().hasFnAttribute(Attribute::UWTable));
95 
96  // Frame pointer and link register are not treated as normal CSR, thus we
97  // can always skip CSR saves for nonreturning functions.
98  return true;
99 }
100 
101 /// hasFP - Return true if the specified function should have a dedicated frame
102 /// pointer register. This is true if the function has variable sized allocas
103 /// or if frame pointer elimination is disabled.
105  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
106  const MachineFrameInfo &MFI = MF.getFrameInfo();
107 
108  // ABI-required frame pointer.
110  return true;
111 
112  // Frame pointer required for use within this function.
113  return (RegInfo->needsStackRealignment(MF) ||
114  MFI.hasVarSizedObjects() ||
115  MFI.isFrameAddressTaken());
116 }
117 
118 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
119 /// not required, we reserve argument space for call sites in the function
120 /// immediately on entry to the current function. This eliminates the need for
121 /// add/sub sp brackets around call sites. Returns true if the call frame is
122 /// included as part of the stack frame.
124  const MachineFrameInfo &MFI = MF.getFrameInfo();
125  unsigned CFSize = MFI.getMaxCallFrameSize();
126  // It's not always a good idea to include the call frame as part of the
127  // stack frame. ARM (especially Thumb) has small immediate offset to
128  // address the stack frame. So a large call frame can cause poor codegen
129  // and may even makes it impossible to scavenge a register.
130  if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
131  return false;
132 
133  return !MFI.hasVarSizedObjects();
134 }
135 
136 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the
137 /// call frame pseudos can be simplified. Unlike most targets, having a FP
138 /// is not sufficient here since we still may reference some objects via SP
139 /// even when FP is available in Thumb2 mode.
140 bool
143 }
144 
146  const MCPhysReg *CSRegs) {
147  // Integer spill area is handled with "pop".
148  if (isPopOpcode(MI.getOpcode())) {
149  // The first two operands are predicates. The last two are
150  // imp-def and imp-use of SP. Check everything in between.
151  for (int i = 5, e = MI.getNumOperands(); i != e; ++i)
152  if (!isCalleeSavedRegister(MI.getOperand(i).getReg(), CSRegs))
153  return false;
154  return true;
155  }
156  if ((MI.getOpcode() == ARM::LDR_POST_IMM ||
157  MI.getOpcode() == ARM::LDR_POST_REG ||
158  MI.getOpcode() == ARM::t2LDR_POST) &&
159  isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs) &&
160  MI.getOperand(1).getReg() == ARM::SP)
161  return true;
162 
163  return false;
164 }
165 
167  bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
168  const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
169  unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
170  ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
171  if (isARM)
172  emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
173  Pred, PredReg, TII, MIFlags);
174  else
175  emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
176  Pred, PredReg, TII, MIFlags);
177 }
178 
179 static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
180  MachineBasicBlock::iterator &MBBI, const DebugLoc &dl,
181  const ARMBaseInstrInfo &TII, int NumBytes,
182  unsigned MIFlags = MachineInstr::NoFlags,
184  unsigned PredReg = 0) {
185  emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
186  MIFlags, Pred, PredReg);
187 }
188 
189 static int sizeOfSPAdjustment(const MachineInstr &MI) {
190  int RegSize;
191  switch (MI.getOpcode()) {
192  case ARM::VSTMDDB_UPD:
193  RegSize = 8;
194  break;
195  case ARM::STMDB_UPD:
196  case ARM::t2STMDB_UPD:
197  RegSize = 4;
198  break;
199  case ARM::t2STR_PRE:
200  case ARM::STR_PRE_IMM:
201  return 4;
202  default:
203  llvm_unreachable("Unknown push or pop like instruction");
204  }
205 
206  int count = 0;
207  // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
208  // pred) so the list starts at 4.
209  for (int i = MI.getNumOperands() - 1; i >= 4; --i)
210  count += RegSize;
211  return count;
212 }
213 
215  size_t StackSizeInBytes) {
216  const MachineFrameInfo &MFI = MF.getFrameInfo();
217  const Function &F = MF.getFunction();
218  unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
219  if (F.hasFnAttribute("stack-probe-size"))
220  F.getFnAttribute("stack-probe-size")
222  .getAsInteger(0, StackProbeSize);
223  return (StackSizeInBytes >= StackProbeSize) &&
224  !F.hasFnAttribute("no-stack-arg-probe");
225 }
226 
227 namespace {
228 
229 struct StackAdjustingInsts {
230  struct InstInfo {
232  unsigned SPAdjust;
233  bool BeforeFPSet;
234  };
235 
237 
238  void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
239  bool BeforeFPSet = false) {
240  InstInfo Info = {I, SPAdjust, BeforeFPSet};
241  Insts.push_back(Info);
242  }
243 
244  void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
245  auto Info =
246  llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
247  assert(Info != Insts.end() && "invalid sp adjusting instruction");
248  Info->SPAdjust += ExtraBytes;
249  }
250 
251  void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl,
252  const ARMBaseInstrInfo &TII, bool HasFP) {
253  MachineFunction &MF = *MBB.getParent();
254  unsigned CFAOffset = 0;
255  for (auto &Info : Insts) {
256  if (HasFP && !Info.BeforeFPSet)
257  return;
258 
259  CFAOffset -= Info.SPAdjust;
260  unsigned CFIIndex = MF.addFrameInst(
261  MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
262  BuildMI(MBB, std::next(Info.I), dl,
263  TII.get(TargetOpcode::CFI_INSTRUCTION))
264  .addCFIIndex(CFIIndex)
266  }
267  }
268 };
269 
270 } // end anonymous namespace
271 
272 /// Emit an instruction sequence that will align the address in
273 /// register Reg by zero-ing out the lower bits. For versions of the
274 /// architecture that support Neon, this must be done in a single
275 /// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
276 /// single instruction. That function only gets called when optimizing
277 /// spilling of D registers on a core with the Neon instruction set
278 /// present.
280  const TargetInstrInfo &TII,
281  MachineBasicBlock &MBB,
283  const DebugLoc &DL, const unsigned Reg,
284  const unsigned Alignment,
285  const bool MustBeSingleInstruction) {
286  const ARMSubtarget &AST =
287  static_cast<const ARMSubtarget &>(MF.getSubtarget());
288  const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
289  const unsigned AlignMask = Alignment - 1;
290  const unsigned NrBitsToZero = countTrailingZeros(Alignment);
291  assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
292  if (!AFI->isThumbFunction()) {
293  // if the BFC instruction is available, use that to zero the lower
294  // bits:
295  // bfc Reg, #0, log2(Alignment)
296  // otherwise use BIC, if the mask to zero the required number of bits
297  // can be encoded in the bic immediate field
298  // bic Reg, Reg, Alignment-1
299  // otherwise, emit
300  // lsr Reg, Reg, log2(Alignment)
301  // lsl Reg, Reg, log2(Alignment)
302  if (CanUseBFC) {
303  BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
304  .addReg(Reg, RegState::Kill)
305  .addImm(~AlignMask)
306  .add(predOps(ARMCC::AL));
307  } else if (AlignMask <= 255) {
308  BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
309  .addReg(Reg, RegState::Kill)
310  .addImm(AlignMask)
312  .add(condCodeOp());
313  } else {
314  assert(!MustBeSingleInstruction &&
315  "Shouldn't call emitAligningInstructions demanding a single "
316  "instruction to be emitted for large stack alignment for a target "
317  "without BFC.");
318  BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
319  .addReg(Reg, RegState::Kill)
320  .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
322  .add(condCodeOp());
323  BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
324  .addReg(Reg, RegState::Kill)
325  .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
327  .add(condCodeOp());
328  }
329  } else {
330  // Since this is only reached for Thumb-2 targets, the BFC instruction
331  // should always be available.
332  assert(CanUseBFC);
333  BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
334  .addReg(Reg, RegState::Kill)
335  .addImm(~AlignMask)
336  .add(predOps(ARMCC::AL));
337  }
338 }
339 
340 /// We need the offset of the frame pointer relative to other MachineFrameInfo
341 /// offsets which are encoded relative to SP at function begin.
342 /// See also emitPrologue() for how the FP is set up.
343 /// Unfortunately we cannot determine this value in determineCalleeSaves() yet
344 /// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
345 /// this to produce a conservative estimate that we check in an assert() later.
346 static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI) {
347  // For Thumb1, push.w isn't available, so the first push will always push
348  // r7 and lr onto the stack first.
349  if (AFI.isThumb1OnlyFunction())
350  return -AFI.getArgRegsSaveSize() - (2 * 4);
351  // This is a conservative estimation: Assume the frame pointer being r7 and
352  // pc("r15") up to r8 getting spilled before (= 8 registers).
353  return -AFI.getArgRegsSaveSize() - (8 * 4);
354 }
355 
357  MachineBasicBlock &MBB) const {
358  MachineBasicBlock::iterator MBBI = MBB.begin();
359  MachineFrameInfo &MFI = MF.getFrameInfo();
361  MachineModuleInfo &MMI = MF.getMMI();
362  MCContext &Context = MMI.getContext();
363  const TargetMachine &TM = MF.getTarget();
364  const MCRegisterInfo *MRI = Context.getRegisterInfo();
365  const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
366  const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
367  assert(!AFI->isThumb1OnlyFunction() &&
368  "This emitPrologue does not support Thumb1!");
369  bool isARM = !AFI->isThumbFunction();
371  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
372  unsigned NumBytes = MFI.getStackSize();
373  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
374 
375  // Debug location must be unknown since the first debug location is used
376  // to determine the end of the prologue.
377  DebugLoc dl;
378 
379  Register FramePtr = RegInfo->getFrameRegister(MF);
380 
381  // Determine the sizes of each callee-save spill areas and record which frame
382  // belongs to which callee-save spill areas.
383  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
384  int FramePtrSpillFI = 0;
385  int D8SpillFI = 0;
386 
387  // All calls are tail calls in GHC calling conv, and functions have no
388  // prologue/epilogue.
390  return;
391 
392  StackAdjustingInsts DefCFAOffsetCandidates;
393  bool HasFP = hasFP(MF);
394 
395  // Allocate the vararg register save area.
396  if (ArgRegsSaveSize) {
397  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
399  DefCFAOffsetCandidates.addInst(std::prev(MBBI), ArgRegsSaveSize, true);
400  }
401 
402  if (!AFI->hasStackFrame() &&
403  (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
404  if (NumBytes - ArgRegsSaveSize != 0) {
405  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize),
407  DefCFAOffsetCandidates.addInst(std::prev(MBBI),
408  NumBytes - ArgRegsSaveSize, true);
409  }
410  DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
411  return;
412  }
413 
414  // Determine spill area sizes.
415  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
416  unsigned Reg = CSI[i].getReg();
417  int FI = CSI[i].getFrameIdx();
418  switch (Reg) {
419  case ARM::R8:
420  case ARM::R9:
421  case ARM::R10:
422  case ARM::R11:
423  case ARM::R12:
424  if (STI.splitFramePushPop(MF)) {
425  GPRCS2Size += 4;
426  break;
427  }
429  case ARM::R0:
430  case ARM::R1:
431  case ARM::R2:
432  case ARM::R3:
433  case ARM::R4:
434  case ARM::R5:
435  case ARM::R6:
436  case ARM::R7:
437  case ARM::LR:
438  if (Reg == FramePtr)
439  FramePtrSpillFI = FI;
440  GPRCS1Size += 4;
441  break;
442  default:
443  // This is a DPR. Exclude the aligned DPRCS2 spills.
444  if (Reg == ARM::D8)
445  D8SpillFI = FI;
446  if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
447  DPRCSSize += 8;
448  }
449  }
450 
451  // Move past area 1.
452  MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
453  if (GPRCS1Size > 0) {
454  GPRCS1Push = LastPush = MBBI++;
455  DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
456  }
457 
458  // Determine starting offsets of spill areas.
459  unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size;
460  unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
461  unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U;
462  unsigned DPRGapSize = (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign;
463  unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
464  int FramePtrOffsetInPush = 0;
465  if (HasFP) {
466  int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
467  assert(getMaxFPOffset(MF.getFunction(), *AFI) <= FPOffset &&
468  "Max FP estimation is wrong");
469  FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize;
470  AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
471  NumBytes);
472  }
473  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
474  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
475  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
476 
477  // Move past area 2.
478  if (GPRCS2Size > 0) {
479  GPRCS2Push = LastPush = MBBI++;
480  DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
481  }
482 
483  // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
484  // .cfi_offset operations will reflect that.
485  if (DPRGapSize) {
486  assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
487  if (LastPush != MBB.end() &&
488  tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
489  DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
490  else {
491  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
493  DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
494  }
495  }
496 
497  // Move past area 3.
498  if (DPRCSSize > 0) {
499  // Since vpush register list cannot have gaps, there may be multiple vpush
500  // instructions in the prologue.
501  while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
502  DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));
503  LastPush = MBBI++;
504  }
505  }
506 
507  // Move past the aligned DPRCS2 area.
508  if (AFI->getNumAlignedDPRCS2Regs() > 0) {
510  // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
511  // leaves the stack pointer pointing to the DPRCS2 area.
512  //
513  // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
514  NumBytes += MFI.getObjectOffset(D8SpillFI);
515  } else
516  NumBytes = DPRCSOffset;
517 
518  if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
519  uint32_t NumWords = NumBytes >> 2;
520 
521  if (NumWords < 65536)
522  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
523  .addImm(NumWords)
525  .add(predOps(ARMCC::AL));
526  else
527  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4)
528  .addImm(NumWords)
530 
531  switch (TM.getCodeModel()) {
532  case CodeModel::Tiny:
533  llvm_unreachable("Tiny code model not available on ARM.");
534  case CodeModel::Small:
535  case CodeModel::Medium:
536  case CodeModel::Kernel:
537  BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
539  .addExternalSymbol("__chkstk")
540  .addReg(ARM::R4, RegState::Implicit)
541  .setMIFlags(MachineInstr::FrameSetup);
542  break;
543  case CodeModel::Large:
544  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
545  .addExternalSymbol("__chkstk")
547 
548  BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
550  .addReg(ARM::R12, RegState::Kill)
551  .addReg(ARM::R4, RegState::Implicit)
552  .setMIFlags(MachineInstr::FrameSetup);
553  break;
554  }
555 
556  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
557  .addReg(ARM::SP, RegState::Kill)
561  .add(condCodeOp());
562  NumBytes = 0;
563  }
564 
565  if (NumBytes) {
566  // Adjust SP after all the callee-save spills.
567  if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
568  tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes))
569  DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
570  else {
571  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
573  DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
574  }
575 
576  if (HasFP && isARM)
577  // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
578  // Note it's not safe to do this in Thumb2 mode because it would have
579  // taken two instructions:
580  // mov sp, r7
581  // sub sp, #24
582  // If an interrupt is taken between the two instructions, then sp is in
583  // an inconsistent state (pointing to the middle of callee-saved area).
584  // The interrupt handler can end up clobbering the registers.
585  AFI->setShouldRestoreSPFromFP(true);
586  }
587 
588  // Set FP to point to the stack slot that contains the previous FP.
589  // For iOS, FP is R7, which has now been stored in spill area 1.
590  // Otherwise, if this is not iOS, all the callee-saved registers go
591  // into spill area 1, including the FP in R11. In either case, it
592  // is in area one and the adjustment needs to take place just after
593  // that push.
594  if (HasFP) {
595  MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push);
596  unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
597  emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush,
598  dl, TII, FramePtr, ARM::SP,
599  PushSize + FramePtrOffsetInPush,
601  if (FramePtrOffsetInPush + PushSize != 0) {
602  unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
603  nullptr, MRI->getDwarfRegNum(FramePtr, true),
604  -(ArgRegsSaveSize - FramePtrOffsetInPush)));
605  BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
606  .addCFIIndex(CFIIndex)
608  } else {
609  unsigned CFIIndex =
611  nullptr, MRI->getDwarfRegNum(FramePtr, true)));
612  BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
613  .addCFIIndex(CFIIndex)
615  }
616  }
617 
618  // Now that the prologue's actual instructions are finalised, we can insert
619  // the necessary DWARF cf instructions to describe the situation. Start by
620  // recording where each register ended up:
621  if (GPRCS1Size > 0) {
622  MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
623  int CFIIndex;
624  for (const auto &Entry : CSI) {
625  unsigned Reg = Entry.getReg();
626  int FI = Entry.getFrameIdx();
627  switch (Reg) {
628  case ARM::R8:
629  case ARM::R9:
630  case ARM::R10:
631  case ARM::R11:
632  case ARM::R12:
633  if (STI.splitFramePushPop(MF))
634  break;
636  case ARM::R0:
637  case ARM::R1:
638  case ARM::R2:
639  case ARM::R3:
640  case ARM::R4:
641  case ARM::R5:
642  case ARM::R6:
643  case ARM::R7:
644  case ARM::LR:
646  nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
647  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
648  .addCFIIndex(CFIIndex)
650  break;
651  }
652  }
653  }
654 
655  if (GPRCS2Size > 0) {
656  MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
657  for (const auto &Entry : CSI) {
658  unsigned Reg = Entry.getReg();
659  int FI = Entry.getFrameIdx();
660  switch (Reg) {
661  case ARM::R8:
662  case ARM::R9:
663  case ARM::R10:
664  case ARM::R11:
665  case ARM::R12:
666  if (STI.splitFramePushPop(MF)) {
667  unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
668  unsigned Offset = MFI.getObjectOffset(FI);
669  unsigned CFIIndex = MF.addFrameInst(
670  MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
671  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
672  .addCFIIndex(CFIIndex)
674  }
675  break;
676  }
677  }
678  }
679 
680  if (DPRCSSize > 0) {
681  // Since vpush register list cannot have gaps, there may be multiple vpush
682  // instructions in the prologue.
683  MachineBasicBlock::iterator Pos = std::next(LastPush);
684  for (const auto &Entry : CSI) {
685  unsigned Reg = Entry.getReg();
686  int FI = Entry.getFrameIdx();
687  if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
688  (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
689  unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
690  unsigned Offset = MFI.getObjectOffset(FI);
691  unsigned CFIIndex = MF.addFrameInst(
692  MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
693  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
694  .addCFIIndex(CFIIndex)
696  }
697  }
698  }
699 
700  // Now we can emit descriptions of where the canonical frame address was
701  // throughout the process. If we have a frame pointer, it takes over the job
702  // half-way through, so only the first few .cfi_def_cfa_offset instructions
703  // actually get emitted.
704  DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
705 
706  if (STI.isTargetELF() && hasFP(MF))
708  AFI->getFramePtrSpillOffset());
709 
710  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
711  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
712  AFI->setDPRCalleeSavedGapSize(DPRGapSize);
713  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
714 
715  // If we need dynamic stack realignment, do it here. Be paranoid and make
716  // sure if we also have VLAs, we have a base pointer for frame access.
717  // If aligned NEON registers were spilled, the stack has already been
718  // realigned.
719  if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) {
720  unsigned MaxAlign = MFI.getMaxAlignment();
721  assert(!AFI->isThumb1OnlyFunction());
722  if (!AFI->isThumbFunction()) {
723  emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
724  false);
725  } else {
726  // We cannot use sp as source/dest register here, thus we're using r4 to
727  // perform the calculations. We're emitting the following sequence:
728  // mov r4, sp
729  // -- use emitAligningInstructions to produce best sequence to zero
730  // -- out lower bits in r4
731  // mov sp, r4
732  // FIXME: It will be better just to find spare register here.
733  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
734  .addReg(ARM::SP, RegState::Kill)
735  .add(predOps(ARMCC::AL));
736  emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
737  false);
738  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
739  .addReg(ARM::R4, RegState::Kill)
740  .add(predOps(ARMCC::AL));
741  }
742 
743  AFI->setShouldRestoreSPFromFP(true);
744  }
745 
746  // If we need a base pointer, set it up here. It's whatever the value
747  // of the stack pointer is at this point. Any variable size objects
748  // will be allocated after this, so we can still use the base pointer
749  // to reference locals.
750  // FIXME: Clarify FrameSetup flags here.
751  if (RegInfo->hasBasePointer(MF)) {
752  if (isARM)
753  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
754  .addReg(ARM::SP)
756  .add(condCodeOp());
757  else
758  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
759  .addReg(ARM::SP)
760  .add(predOps(ARMCC::AL));
761  }
762 
763  // If the frame has variable sized objects then the epilogue must restore
764  // the sp from fp. We can assume there's an FP here since hasFP already
765  // checks for hasVarSizedObjects.
766  if (MFI.hasVarSizedObjects())
767  AFI->setShouldRestoreSPFromFP(true);
768 }
769 
771  MachineBasicBlock &MBB) const {
772  MachineFrameInfo &MFI = MF.getFrameInfo();
774  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
775  const ARMBaseInstrInfo &TII =
776  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
777  assert(!AFI->isThumb1OnlyFunction() &&
778  "This emitEpilogue does not support Thumb1!");
779  bool isARM = !AFI->isThumbFunction();
780 
781  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
782  int NumBytes = (int)MFI.getStackSize();
783  Register FramePtr = RegInfo->getFrameRegister(MF);
784 
785  // All calls are tail calls in GHC calling conv, and functions have no
786  // prologue/epilogue.
788  return;
789 
790  // First put ourselves on the first (from top) terminator instructions.
792  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
793 
794  if (!AFI->hasStackFrame()) {
795  if (NumBytes - ArgRegsSaveSize != 0)
796  emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize);
797  } else {
798  // Unwind MBBI to point to first LDR / VLDRD.
799  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
800  if (MBBI != MBB.begin()) {
801  do {
802  --MBBI;
803  } while (MBBI != MBB.begin() && isCSRestore(*MBBI, TII, CSRegs));
804  if (!isCSRestore(*MBBI, TII, CSRegs))
805  ++MBBI;
806  }
807 
808  // Move SP to start of FP callee save spill area.
809  NumBytes -= (ArgRegsSaveSize +
812  AFI->getDPRCalleeSavedGapSize() +
814 
815  // Reset SP based on frame pointer only if the stack frame extends beyond
816  // frame pointer stack slot or target is ELF and the function has FP.
817  if (AFI->shouldRestoreSPFromFP()) {
818  NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
819  if (NumBytes) {
820  if (isARM)
821  emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
822  ARMCC::AL, 0, TII);
823  else {
824  // It's not possible to restore SP from FP in a single instruction.
825  // For iOS, this looks like:
826  // mov sp, r7
827  // sub sp, #24
828  // This is bad, if an interrupt is taken after the mov, sp is in an
829  // inconsistent state.
830  // Use the first callee-saved register as a scratch register.
831  assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
832  "No scratch register to restore SP from FP!");
833  emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
834  ARMCC::AL, 0, TII);
835  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
836  .addReg(ARM::R4)
837  .add(predOps(ARMCC::AL));
838  }
839  } else {
840  // Thumb2 or ARM.
841  if (isARM)
842  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
843  .addReg(FramePtr)
845  .add(condCodeOp());
846  else
847  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
848  .addReg(FramePtr)
849  .add(predOps(ARMCC::AL));
850  }
851  } else if (NumBytes &&
852  !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
853  emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
854 
855  // Increment past our save areas.
856  if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
857  MBBI++;
858  // Since vpop register list cannot have gaps, there may be multiple vpop
859  // instructions in the epilogue.
860  while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
861  MBBI++;
862  }
863  if (AFI->getDPRCalleeSavedGapSize()) {
864  assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
865  "unexpected DPR alignment gap");
866  emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize());
867  }
868 
869  if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
870  if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
871  }
872 
873  if (ArgRegsSaveSize)
874  emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
875 }
876 
877 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
878 /// debug info. It's the same as what we use for resolving the code-gen
879 /// references for now. FIXME: This can go wrong when references are
880 /// SP-relative and simple call frames aren't used.
881 int
883  unsigned &FrameReg) const {
884  return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
885 }
886 
887 int
889  int FI, unsigned &FrameReg,
890  int SPAdj) const {
891  const MachineFrameInfo &MFI = MF.getFrameInfo();
892  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
894  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
895  int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
896  int FPOffset = Offset - AFI->getFramePtrSpillOffset();
897  bool isFixed = MFI.isFixedObjectIndex(FI);
898 
899  FrameReg = ARM::SP;
900  Offset += SPAdj;
901 
902  // SP can move around if there are allocas. We may also lose track of SP
903  // when emergency spilling inside a non-reserved call frame setup.
904  bool hasMovingSP = !hasReservedCallFrame(MF);
905 
906  // When dynamically realigning the stack, use the frame pointer for
907  // parameters, and the stack/base pointer for locals.
908  if (RegInfo->needsStackRealignment(MF)) {
909  assert(hasFP(MF) && "dynamic stack realignment without a FP!");
910  if (isFixed) {
911  FrameReg = RegInfo->getFrameRegister(MF);
912  Offset = FPOffset;
913  } else if (hasMovingSP) {
914  assert(RegInfo->hasBasePointer(MF) &&
915  "VLAs and dynamic stack alignment, but missing base pointer!");
916  FrameReg = RegInfo->getBaseRegister();
917  Offset -= SPAdj;
918  }
919  return Offset;
920  }
921 
922  // If there is a frame pointer, use it when we can.
923  if (hasFP(MF) && AFI->hasStackFrame()) {
924  // Use frame pointer to reference fixed objects. Use it for locals if
925  // there are VLAs (and thus the SP isn't reliable as a base).
926  if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
927  FrameReg = RegInfo->getFrameRegister(MF);
928  return FPOffset;
929  } else if (hasMovingSP) {
930  assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
931  if (AFI->isThumb2Function()) {
932  // Try to use the frame pointer if we can, else use the base pointer
933  // since it's available. This is handy for the emergency spill slot, in
934  // particular.
935  if (FPOffset >= -255 && FPOffset < 0) {
936  FrameReg = RegInfo->getFrameRegister(MF);
937  return FPOffset;
938  }
939  }
940  } else if (AFI->isThumbFunction()) {
941  // Prefer SP to base pointer, if the offset is suitably aligned and in
942  // range as the effective range of the immediate offset is bigger when
943  // basing off SP.
944  // Use add <rd>, sp, #<imm8>
945  // ldr <rd>, [sp, #<imm8>]
946  if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
947  return Offset;
948  // In Thumb2 mode, the negative offset is very limited. Try to avoid
949  // out of range references. ldr <rt>,[<rn>, #-<imm8>]
950  if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
951  FrameReg = RegInfo->getFrameRegister(MF);
952  return FPOffset;
953  }
954  } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
955  // Otherwise, use SP or FP, whichever is closer to the stack slot.
956  FrameReg = RegInfo->getFrameRegister(MF);
957  return FPOffset;
958  }
959  }
960  // Use the base pointer if we have one.
961  // FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
962  // That can happen if we forced a base pointer for a large call frame.
963  if (RegInfo->hasBasePointer(MF)) {
964  FrameReg = RegInfo->getBaseRegister();
965  Offset -= SPAdj;
966  }
967  return Offset;
968 }
969 
970 void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
972  const std::vector<CalleeSavedInfo> &CSI,
973  unsigned StmOpc, unsigned StrOpc,
974  bool NoGap,
975  bool(*Func)(unsigned, bool),
976  unsigned NumAlignedDPRCS2Regs,
977  unsigned MIFlags) const {
978  MachineFunction &MF = *MBB.getParent();
979  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
981 
982  DebugLoc DL;
983 
984  using RegAndKill = std::pair<unsigned, bool>;
985 
987  unsigned i = CSI.size();
988  while (i != 0) {
989  unsigned LastReg = 0;
990  for (; i != 0; --i) {
991  unsigned Reg = CSI[i-1].getReg();
992  if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
993 
994  // D-registers in the aligned area DPRCS2 are NOT spilled here.
995  if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
996  continue;
997 
998  const MachineRegisterInfo &MRI = MF.getRegInfo();
999  bool isLiveIn = MRI.isLiveIn(Reg);
1000  if (!isLiveIn && !MRI.isReserved(Reg))
1001  MBB.addLiveIn(Reg);
1002  // If NoGap is true, push consecutive registers and then leave the rest
1003  // for other instructions. e.g.
1004  // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
1005  if (NoGap && LastReg && LastReg != Reg-1)
1006  break;
1007  LastReg = Reg;
1008  // Do not set a kill flag on values that are also marked as live-in. This
1009  // happens with the @llvm-returnaddress intrinsic and with arguments
1010  // passed in callee saved registers.
1011  // Omitting the kill flags is conservatively correct even if the live-in
1012  // is not used after all.
1013  Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn));
1014  }
1015 
1016  if (Regs.empty())
1017  continue;
1018 
1019  llvm::sort(Regs, [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1020  return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
1021  });
1022 
1023  if (Regs.size() > 1 || StrOpc== 0) {
1024  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
1025  .addReg(ARM::SP)
1026  .setMIFlags(MIFlags)
1027  .add(predOps(ARMCC::AL));
1028  for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1029  MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
1030  } else if (Regs.size() == 1) {
1031  BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
1032  .addReg(Regs[0].first, getKillRegState(Regs[0].second))
1033  .addReg(ARM::SP)
1034  .setMIFlags(MIFlags)
1035  .addImm(-4)
1036  .add(predOps(ARMCC::AL));
1037  }
1038  Regs.clear();
1039 
1040  // Put any subsequent vpush instructions before this one: they will refer to
1041  // higher register numbers so need to be pushed first in order to preserve
1042  // monotonicity.
1043  if (MI != MBB.begin())
1044  --MI;
1045  }
1046 }
1047 
1048 void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1050  std::vector<CalleeSavedInfo> &CSI,
1051  unsigned LdmOpc, unsigned LdrOpc,
1052  bool isVarArg, bool NoGap,
1053  bool(*Func)(unsigned, bool),
1054  unsigned NumAlignedDPRCS2Regs) const {
1055  MachineFunction &MF = *MBB.getParent();
1056  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1059  DebugLoc DL;
1060  bool isTailCall = false;
1061  bool isInterrupt = false;
1062  bool isTrap = false;
1063  if (MBB.end() != MI) {
1064  DL = MI->getDebugLoc();
1065  unsigned RetOpcode = MI->getOpcode();
1066  isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri);
1067  isInterrupt =
1068  RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1069  isTrap =
1070  RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
1071  RetOpcode == ARM::tTRAP;
1072  }
1073 
1075  unsigned i = CSI.size();
1076  while (i != 0) {
1077  unsigned LastReg = 0;
1078  bool DeleteRet = false;
1079  for (; i != 0; --i) {
1080  CalleeSavedInfo &Info = CSI[i-1];
1081  unsigned Reg = Info.getReg();
1082  if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1083 
1084  // The aligned reloads from area DPRCS2 are not inserted here.
1085  if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1086  continue;
1087 
1088  if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1089  !isTrap && STI.hasV5TOps()) {
1090  if (MBB.succ_empty()) {
1091  Reg = ARM::PC;
1092  // Fold the return instruction into the LDM.
1093  DeleteRet = true;
1094  LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1095  // We 'restore' LR into PC so it is not live out of the return block:
1096  // Clear Restored bit.
1097  Info.setRestored(false);
1098  } else
1099  LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
1100  }
1101 
1102  // If NoGap is true, pop consecutive registers and then leave the rest
1103  // for other instructions. e.g.
1104  // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1105  if (NoGap && LastReg && LastReg != Reg-1)
1106  break;
1107 
1108  LastReg = Reg;
1109  Regs.push_back(Reg);
1110  }
1111 
1112  if (Regs.empty())
1113  continue;
1114 
1115  llvm::sort(Regs, [&](unsigned LHS, unsigned RHS) {
1116  return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
1117  });
1118 
1119  if (Regs.size() > 1 || LdrOpc == 0) {
1120  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
1121  .addReg(ARM::SP)
1122  .add(predOps(ARMCC::AL));
1123  for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1124  MIB.addReg(Regs[i], getDefRegState(true));
1125  if (DeleteRet) {
1126  if (MI != MBB.end()) {
1127  MIB.copyImplicitOps(*MI);
1128  MI->eraseFromParent();
1129  }
1130  }
1131  MI = MIB;
1132  } else if (Regs.size() == 1) {
1133  // If we adjusted the reg to PC from LR above, switch it back here. We
1134  // only do that for LDM.
1135  if (Regs[0] == ARM::PC)
1136  Regs[0] = ARM::LR;
1137  MachineInstrBuilder MIB =
1138  BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
1139  .addReg(ARM::SP, RegState::Define)
1140  .addReg(ARM::SP);
1141  // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1142  // that refactoring is complete (eventually).
1143  if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1144  MIB.addReg(0);
1146  } else
1147  MIB.addImm(4);
1148  MIB.add(predOps(ARMCC::AL));
1149  }
1150  Regs.clear();
1151 
1152  // Put any subsequent vpop instructions after this one: they will refer to
1153  // higher register numbers so need to be popped afterwards.
1154  if (MI != MBB.end())
1155  ++MI;
1156  }
1157 }
1158 
1159 /// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1160 /// starting from d8. Also insert stack realignment code and leave the stack
1161 /// pointer pointing to the d8 spill slot.
1164  unsigned NumAlignedDPRCS2Regs,
1165  const std::vector<CalleeSavedInfo> &CSI,
1166  const TargetRegisterInfo *TRI) {
1167  MachineFunction &MF = *MBB.getParent();
1169  DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1170  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1171  MachineFrameInfo &MFI = MF.getFrameInfo();
1172 
1173  // Mark the D-register spill slots as properly aligned. Since MFI computes
1174  // stack slot layout backwards, this can actually mean that the d-reg stack
1175  // slot offsets can be wrong. The offset for d8 will always be correct.
1176  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1177  unsigned DNum = CSI[i].getReg() - ARM::D8;
1178  if (DNum > NumAlignedDPRCS2Regs - 1)
1179  continue;
1180  int FI = CSI[i].getFrameIdx();
1181  // The even-numbered registers will be 16-byte aligned, the odd-numbered
1182  // registers will be 8-byte aligned.
1183  MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16);
1184 
1185  // The stack slot for D8 needs to be maximally aligned because this is
1186  // actually the point where we align the stack pointer. MachineFrameInfo
1187  // computes all offsets relative to the incoming stack pointer which is a
1188  // bit weird when realigning the stack. Any extra padding for this
1189  // over-alignment is not realized because the code inserted below adjusts
1190  // the stack pointer by numregs * 8 before aligning the stack pointer.
1191  if (DNum == 0)
1192  MFI.setObjectAlignment(FI, MFI.getMaxAlignment());
1193  }
1194 
1195  // Move the stack pointer to the d8 spill slot, and align it at the same
1196  // time. Leave the stack slot address in the scratch register r4.
1197  //
1198  // sub r4, sp, #numregs * 8
1199  // bic r4, r4, #align - 1
1200  // mov sp, r4
1201  //
1202  bool isThumb = AFI->isThumbFunction();
1203  assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1204  AFI->setShouldRestoreSPFromFP(true);
1205 
1206  // sub r4, sp, #numregs * 8
1207  // The immediate is <= 64, so it doesn't need any special encoding.
1208  unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1209  BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1210  .addReg(ARM::SP)
1211  .addImm(8 * NumAlignedDPRCS2Regs)
1212  .add(predOps(ARMCC::AL))
1213  .add(condCodeOp());
1214 
1215  unsigned MaxAlign = MF.getFrameInfo().getMaxAlignment();
1216  // We must set parameter MustBeSingleInstruction to true, since
1217  // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1218  // stack alignment. Luckily, this can always be done since all ARM
1219  // architecture versions that support Neon also support the BFC
1220  // instruction.
1221  emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
1222 
1223  // mov sp, r4
1224  // The stack pointer must be adjusted before spilling anything, otherwise
1225  // the stack slots could be clobbered by an interrupt handler.
1226  // Leave r4 live, it is used below.
1227  Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
1228  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
1229  .addReg(ARM::R4)
1230  .add(predOps(ARMCC::AL));
1231  if (!isThumb)
1232  MIB.add(condCodeOp());
1233 
1234  // Now spill NumAlignedDPRCS2Regs registers starting from d8.
1235  // r4 holds the stack slot address.
1236  unsigned NextReg = ARM::D8;
1237 
1238  // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
1239  // The writeback is only needed when emitting two vst1.64 instructions.
1240  if (NumAlignedDPRCS2Regs >= 6) {
1241  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1242  &ARM::QQPRRegClass);
1243  MBB.addLiveIn(SupReg);
1244  BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
1245  .addReg(ARM::R4, RegState::Kill)
1246  .addImm(16)
1247  .addReg(NextReg)
1248  .addReg(SupReg, RegState::ImplicitKill)
1249  .add(predOps(ARMCC::AL));
1250  NextReg += 4;
1251  NumAlignedDPRCS2Regs -= 4;
1252  }
1253 
1254  // We won't modify r4 beyond this point. It currently points to the next
1255  // register to be spilled.
1256  unsigned R4BaseReg = NextReg;
1257 
1258  // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
1259  if (NumAlignedDPRCS2Regs >= 4) {
1260  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1261  &ARM::QQPRRegClass);
1262  MBB.addLiveIn(SupReg);
1263  BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
1264  .addReg(ARM::R4)
1265  .addImm(16)
1266  .addReg(NextReg)
1267  .addReg(SupReg, RegState::ImplicitKill)
1268  .add(predOps(ARMCC::AL));
1269  NextReg += 4;
1270  NumAlignedDPRCS2Regs -= 4;
1271  }
1272 
1273  // 16-byte aligned vst1.64 with 2 d-regs.
1274  if (NumAlignedDPRCS2Regs >= 2) {
1275  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1276  &ARM::QPRRegClass);
1277  MBB.addLiveIn(SupReg);
1278  BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
1279  .addReg(ARM::R4)
1280  .addImm(16)
1281  .addReg(SupReg)
1282  .add(predOps(ARMCC::AL));
1283  NextReg += 2;
1284  NumAlignedDPRCS2Regs -= 2;
1285  }
1286 
1287  // Finally, use a vanilla vstr.64 for the odd last register.
1288  if (NumAlignedDPRCS2Regs) {
1289  MBB.addLiveIn(NextReg);
1290  // vstr.64 uses addrmode5 which has an offset scale of 4.
1291  BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
1292  .addReg(NextReg)
1293  .addReg(ARM::R4)
1294  .addImm((NextReg - R4BaseReg) * 2)
1295  .add(predOps(ARMCC::AL));
1296  }
1297 
1298  // The last spill instruction inserted should kill the scratch register r4.
1299  std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1300 }
1301 
1302 /// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
1303 /// iterator to the following instruction.
1306  unsigned NumAlignedDPRCS2Regs) {
1307  // sub r4, sp, #numregs * 8
1308  // bic r4, r4, #align - 1
1309  // mov sp, r4
1310  ++MI; ++MI; ++MI;
1311  assert(MI->mayStore() && "Expecting spill instruction");
1312 
1313  // These switches all fall through.
1314  switch(NumAlignedDPRCS2Regs) {
1315  case 7:
1316  ++MI;
1317  assert(MI->mayStore() && "Expecting spill instruction");
1319  default:
1320  ++MI;
1321  assert(MI->mayStore() && "Expecting spill instruction");
1323  case 1:
1324  case 2:
1325  case 4:
1326  assert(MI->killsRegister(ARM::R4) && "Missed kill flag");
1327  ++MI;
1328  }
1329  return MI;
1330 }
1331 
1332 /// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
1333 /// starting from d8. These instructions are assumed to execute while the
1334 /// stack is still aligned, unlike the code inserted by emitPopInst.
1337  unsigned NumAlignedDPRCS2Regs,
1338  const std::vector<CalleeSavedInfo> &CSI,
1339  const TargetRegisterInfo *TRI) {
1340  MachineFunction &MF = *MBB.getParent();
1342  DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1343  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1344 
1345  // Find the frame index assigned to d8.
1346  int D8SpillFI = 0;
1347  for (unsigned i = 0, e = CSI.size(); i != e; ++i)
1348  if (CSI[i].getReg() == ARM::D8) {
1349  D8SpillFI = CSI[i].getFrameIdx();
1350  break;
1351  }
1352 
1353  // Materialize the address of the d8 spill slot into the scratch register r4.
1354  // This can be fairly complicated if the stack frame is large, so just use
1355  // the normal frame index elimination mechanism to do it. This code runs as
1356  // the initial part of the epilog where the stack and base pointers haven't
1357  // been changed yet.
1358  bool isThumb = AFI->isThumbFunction();
1359  assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1360 
1361  unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
1362  BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1363  .addFrameIndex(D8SpillFI)
1364  .addImm(0)
1365  .add(predOps(ARMCC::AL))
1366  .add(condCodeOp());
1367 
1368  // Now restore NumAlignedDPRCS2Regs registers starting from d8.
1369  unsigned NextReg = ARM::D8;
1370 
1371  // 16-byte aligned vld1.64 with 4 d-regs and writeback.
1372  if (NumAlignedDPRCS2Regs >= 6) {
1373  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1374  &ARM::QQPRRegClass);
1375  BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
1376  .addReg(ARM::R4, RegState::Define)
1378  .addImm(16)
1380  .add(predOps(ARMCC::AL));
1381  NextReg += 4;
1382  NumAlignedDPRCS2Regs -= 4;
1383  }
1384 
1385  // We won't modify r4 beyond this point. It currently points to the next
1386  // register to be spilled.
1387  unsigned R4BaseReg = NextReg;
1388 
1389  // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
1390  if (NumAlignedDPRCS2Regs >= 4) {
1391  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1392  &ARM::QQPRRegClass);
1393  BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
1394  .addReg(ARM::R4)
1395  .addImm(16)
1397  .add(predOps(ARMCC::AL));
1398  NextReg += 4;
1399  NumAlignedDPRCS2Regs -= 4;
1400  }
1401 
1402  // 16-byte aligned vld1.64 with 2 d-regs.
1403  if (NumAlignedDPRCS2Regs >= 2) {
1404  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1405  &ARM::QPRRegClass);
1406  BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
1407  .addReg(ARM::R4)
1408  .addImm(16)
1409  .add(predOps(ARMCC::AL));
1410  NextReg += 2;
1411  NumAlignedDPRCS2Regs -= 2;
1412  }
1413 
1414  // Finally, use a vanilla vldr.64 for the remaining odd register.
1415  if (NumAlignedDPRCS2Regs)
1416  BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
1417  .addReg(ARM::R4)
1418  .addImm(2 * (NextReg - R4BaseReg))
1419  .add(predOps(ARMCC::AL));
1420 
1421  // Last store kills r4.
1422  std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1423 }
1424 
1427  const std::vector<CalleeSavedInfo> &CSI,
1428  const TargetRegisterInfo *TRI) const {
1429  if (CSI.empty())
1430  return false;
1431 
1432  MachineFunction &MF = *MBB.getParent();
1434 
1435  unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
1436  unsigned PushOneOpc = AFI->isThumbFunction() ?
1437  ARM::t2STR_PRE : ARM::STR_PRE_IMM;
1438  unsigned FltOpc = ARM::VSTMDDB_UPD;
1439  unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1440  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,
1442  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,
1444  emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
1445  NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
1446 
1447  // The code above does not insert spill code for the aligned DPRCS2 registers.
1448  // The stack realignment code will be inserted between the push instructions
1449  // and these spills.
1450  if (NumAlignedDPRCS2Regs)
1451  emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
1452 
1453  return true;
1454 }
1455 
1458  std::vector<CalleeSavedInfo> &CSI,
1459  const TargetRegisterInfo *TRI) const {
1460  if (CSI.empty())
1461  return false;
1462 
1463  MachineFunction &MF = *MBB.getParent();
1465  bool isVarArg = AFI->getArgRegsSaveSize() > 0;
1466  unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1467 
1468  // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
1469  // registers. Do that here instead.
1470  if (NumAlignedDPRCS2Regs)
1471  emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
1472 
1473  unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
1474  unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
1475  unsigned FltOpc = ARM::VLDMDIA_UPD;
1476  emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
1477  NumAlignedDPRCS2Regs);
1478  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1479  &isARMArea2Register, 0);
1480  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1481  &isARMArea1Register, 0);
1482 
1483  return true;
1484 }
1485 
1486 // FIXME: Make generic?
1488  const ARMBaseInstrInfo &TII) {
1489  unsigned FnSize = 0;
1490  for (auto &MBB : MF) {
1491  for (auto &MI : MBB)
1492  FnSize += TII.getInstSizeInBytes(MI);
1493  }
1494  if (MF.getJumpTableInfo())
1495  for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
1496  FnSize += Table.MBBs.size() * 4;
1497  FnSize += MF.getConstantPool()->getConstants().size() * 4;
1498  return FnSize;
1499 }
1500 
1501 /// estimateRSStackSizeLimit - Look at each instruction that references stack
1502 /// frames and return the stack size limit beyond which some of these
1503 /// instructions will require a scratch register during their expansion later.
1504 // FIXME: Move to TII?
1506  const TargetFrameLowering *TFI,
1507  bool &HasNonSPFrameIndex) {
1508  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1509  const ARMBaseInstrInfo &TII =
1510  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1512  unsigned Limit = (1 << 12) - 1;
1513  for (auto &MBB : MF) {
1514  for (auto &MI : MBB) {
1515  if (MI.isDebugInstr())
1516  continue;
1517  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1518  if (!MI.getOperand(i).isFI())
1519  continue;
1520 
1521  // When using ADDri to get the address of a stack object, 255 is the
1522  // largest offset guaranteed to fit in the immediate offset.
1523  if (MI.getOpcode() == ARM::ADDri) {
1524  Limit = std::min(Limit, (1U << 8) - 1);
1525  break;
1526  }
1527  // t2ADDri will not require an extra register, it can reuse the
1528  // destination.
1529  if (MI.getOpcode() == ARM::t2ADDri || MI.getOpcode() == ARM::t2ADDri12)
1530  break;
1531 
1532  const MCInstrDesc &MCID = MI.getDesc();
1533  const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI, MF);
1534  if (RegClass && !RegClass->contains(ARM::SP))
1535  HasNonSPFrameIndex = true;
1536 
1537  // Otherwise check the addressing mode.
1538  switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
1539  case ARMII::AddrMode_i12:
1540  case ARMII::AddrMode2:
1541  // Default 12 bit limit.
1542  break;
1543  case ARMII::AddrMode3:
1544  case ARMII::AddrModeT2_i8:
1545  Limit = std::min(Limit, (1U << 8) - 1);
1546  break;
1547  case ARMII::AddrMode5FP16:
1548  Limit = std::min(Limit, ((1U << 8) - 1) * 2);
1549  break;
1550  case ARMII::AddrMode5:
1553  Limit = std::min(Limit, ((1U << 8) - 1) * 4);
1554  break;
1555  case ARMII::AddrModeT2_i12:
1556  // i12 supports only positive offset so these will be converted to
1557  // i8 opcodes. See llvm::rewriteT2FrameIndex.
1558  if (TFI->hasFP(MF) && AFI->hasStackFrame())
1559  Limit = std::min(Limit, (1U << 8) - 1);
1560  break;
1561  case ARMII::AddrMode4:
1562  case ARMII::AddrMode6:
1563  // Addressing modes 4 & 6 (load/store) instructions can't encode an
1564  // immediate offset for stack references.
1565  return 0;
1566  case ARMII::AddrModeT2_i7:
1567  Limit = std::min(Limit, ((1U << 7) - 1) * 1);
1568  break;
1570  Limit = std::min(Limit, ((1U << 7) - 1) * 2);
1571  break;
1573  Limit = std::min(Limit, ((1U << 7) - 1) * 4);
1574  break;
1575  default:
1576  llvm_unreachable("Unhandled addressing mode in stack size limit calculation");
1577  }
1578  break; // At most one FI per instruction
1579  }
1580  }
1581  }
1582 
1583  return Limit;
1584 }
1585 
1586 // In functions that realign the stack, it can be an advantage to spill the
1587 // callee-saved vector registers after realigning the stack. The vst1 and vld1
1588 // instructions take alignment hints that can improve performance.
1589 static void
1591  MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
1592  if (!SpillAlignedNEONRegs)
1593  return;
1594 
1595  // Naked functions don't spill callee-saved registers.
1596  if (MF.getFunction().hasFnAttribute(Attribute::Naked))
1597  return;
1598 
1599  // We are planning to use NEON instructions vst1 / vld1.
1600  if (!static_cast<const ARMSubtarget &>(MF.getSubtarget()).hasNEON())
1601  return;
1602 
1603  // Don't bother if the default stack alignment is sufficiently high.
1604  if (MF.getSubtarget().getFrameLowering()->getStackAlignment() >= 8)
1605  return;
1606 
1607  // Aligned spills require stack realignment.
1608  if (!static_cast<const ARMBaseRegisterInfo *>(
1609  MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))
1610  return;
1611 
1612  // We always spill contiguous d-registers starting from d8. Count how many
1613  // needs spilling. The register allocator will almost always use the
1614  // callee-saved registers in order, but it can happen that there are holes in
1615  // the range. Registers above the hole will be spilled to the standard DPRCS
1616  // area.
1617  unsigned NumSpills = 0;
1618  for (; NumSpills < 8; ++NumSpills)
1619  if (!SavedRegs.test(ARM::D8 + NumSpills))
1620  break;
1621 
1622  // Don't do this for just one d-register. It's not worth it.
1623  if (NumSpills < 2)
1624  return;
1625 
1626  // Spill the first NumSpills D-registers after realigning the stack.
1627  MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
1628 
1629  // A scratch register is required for the vst1 / vld1 instructions.
1630  SavedRegs.set(ARM::R4);
1631 }
1632 
1634  BitVector &SavedRegs,
1635  RegScavenger *RS) const {
1636  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1637  // This tells PEI to spill the FP as if it is any other callee-save register
1638  // to take advantage the eliminateFrameIndex machinery. This also ensures it
1639  // is spilled in the order specified by getCalleeSavedRegs() to make it easier
1640  // to combine multiple loads / stores.
1641  bool CanEliminateFrame = true;
1642  bool CS1Spilled = false;
1643  bool LRSpilled = false;
1644  unsigned NumGPRSpills = 0;
1645  unsigned NumFPRSpills = 0;
1646  SmallVector<unsigned, 4> UnspilledCS1GPRs;
1647  SmallVector<unsigned, 4> UnspilledCS2GPRs;
1648  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1649  MF.getSubtarget().getRegisterInfo());
1650  const ARMBaseInstrInfo &TII =
1651  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1653  MachineFrameInfo &MFI = MF.getFrameInfo();
1656  (void)TRI; // Silence unused warning in non-assert builds.
1657  Register FramePtr = RegInfo->getFrameRegister(MF);
1658 
1659  // Spill R4 if Thumb2 function requires stack realignment - it will be used as
1660  // scratch register. Also spill R4 if Thumb2 function has varsized objects,
1661  // since it's not always possible to restore sp from fp in a single
1662  // instruction.
1663  // FIXME: It will be better just to find spare register here.
1664  if (AFI->isThumb2Function() &&
1665  (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
1666  SavedRegs.set(ARM::R4);
1667 
1668  // If a stack probe will be emitted, spill R4 and LR, since they are
1669  // clobbered by the stack probe call.
1670  // This estimate should be a safe, conservative estimate. The actual
1671  // stack probe is enabled based on the size of the local objects;
1672  // this estimate also includes the varargs store size.
1673  if (STI.isTargetWindows() &&
1674  WindowsRequiresStackProbe(MF, MFI.estimateStackSize(MF))) {
1675  SavedRegs.set(ARM::R4);
1676  SavedRegs.set(ARM::LR);
1677  }
1678 
1679  if (AFI->isThumb1OnlyFunction()) {
1680  // Spill LR if Thumb1 function uses variable length argument lists.
1681  if (AFI->getArgRegsSaveSize() > 0)
1682  SavedRegs.set(ARM::LR);
1683 
1684  // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
1685  // requires stack alignment. We don't know for sure what the stack size
1686  // will be, but for this, an estimate is good enough. If there anything
1687  // changes it, it'll be a spill, which implies we've used all the registers
1688  // and so R4 is already used, so not marking it here will be OK.
1689  // FIXME: It will be better just to find spare register here.
1690  if (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF) ||
1691  MFI.estimateStackSize(MF) > 508)
1692  SavedRegs.set(ARM::R4);
1693  }
1694 
1695  // See if we can spill vector registers to aligned stack.
1696  checkNumAlignedDPRCS2Regs(MF, SavedRegs);
1697 
1698  // Spill the BasePtr if it's used.
1699  if (RegInfo->hasBasePointer(MF))
1700  SavedRegs.set(RegInfo->getBaseRegister());
1701 
1702  // Don't spill FP if the frame can be eliminated. This is determined
1703  // by scanning the callee-save registers to see if any is modified.
1704  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
1705  for (unsigned i = 0; CSRegs[i]; ++i) {
1706  unsigned Reg = CSRegs[i];
1707  bool Spilled = false;
1708  if (SavedRegs.test(Reg)) {
1709  Spilled = true;
1710  CanEliminateFrame = false;
1711  }
1712 
1713  if (!ARM::GPRRegClass.contains(Reg)) {
1714  if (Spilled) {
1715  if (ARM::SPRRegClass.contains(Reg))
1716  NumFPRSpills++;
1717  else if (ARM::DPRRegClass.contains(Reg))
1718  NumFPRSpills += 2;
1719  else if (ARM::QPRRegClass.contains(Reg))
1720  NumFPRSpills += 4;
1721  }
1722  continue;
1723  }
1724 
1725  if (Spilled) {
1726  NumGPRSpills++;
1727 
1728  if (!STI.splitFramePushPop(MF)) {
1729  if (Reg == ARM::LR)
1730  LRSpilled = true;
1731  CS1Spilled = true;
1732  continue;
1733  }
1734 
1735  // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
1736  switch (Reg) {
1737  case ARM::LR:
1738  LRSpilled = true;
1740  case ARM::R0: case ARM::R1:
1741  case ARM::R2: case ARM::R3:
1742  case ARM::R4: case ARM::R5:
1743  case ARM::R6: case ARM::R7:
1744  CS1Spilled = true;
1745  break;
1746  default:
1747  break;
1748  }
1749  } else {
1750  if (!STI.splitFramePushPop(MF)) {
1751  UnspilledCS1GPRs.push_back(Reg);
1752  continue;
1753  }
1754 
1755  switch (Reg) {
1756  case ARM::R0: case ARM::R1:
1757  case ARM::R2: case ARM::R3:
1758  case ARM::R4: case ARM::R5:
1759  case ARM::R6: case ARM::R7:
1760  case ARM::LR:
1761  UnspilledCS1GPRs.push_back(Reg);
1762  break;
1763  default:
1764  UnspilledCS2GPRs.push_back(Reg);
1765  break;
1766  }
1767  }
1768  }
1769 
1770  bool ForceLRSpill = false;
1771  if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
1772  unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
1773  // Force LR to be spilled if the Thumb function size is > 2048. This enables
1774  // use of BL to implement far jump. If it turns out that it's not needed
1775  // then the branch fix up path will undo it.
1776  if (FnSize >= (1 << 11)) {
1777  CanEliminateFrame = false;
1778  ForceLRSpill = true;
1779  }
1780  }
1781 
1782  // If any of the stack slot references may be out of range of an immediate
1783  // offset, make sure a register (or a spill slot) is available for the
1784  // register scavenger. Note that if we're indexing off the frame pointer, the
1785  // effective stack size is 4 bytes larger since the FP points to the stack
1786  // slot of the previous FP. Also, if we have variable sized objects in the
1787  // function, stack slot references will often be negative, and some of
1788  // our instructions are positive-offset only, so conservatively consider
1789  // that case to want a spill slot (or register) as well. Similarly, if
1790  // the function adjusts the stack pointer during execution and the
1791  // adjustments aren't already part of our stack size estimate, our offset
1792  // calculations may be off, so be conservative.
1793  // FIXME: We could add logic to be more precise about negative offsets
1794  // and which instructions will need a scratch register for them. Is it
1795  // worth the effort and added fragility?
1796  unsigned EstimatedStackSize =
1797  MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
1798 
1799  // Determine biggest (positive) SP offset in MachineFrameInfo.
1800  int MaxFixedOffset = 0;
1801  for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
1802  int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
1803  MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
1804  }
1805 
1806  bool HasFP = hasFP(MF);
1807  if (HasFP) {
1808  if (AFI->hasStackFrame())
1809  EstimatedStackSize += 4;
1810  } else {
1811  // If FP is not used, SP will be used to access arguments, so count the
1812  // size of arguments into the estimation.
1813  EstimatedStackSize += MaxFixedOffset;
1814  }
1815  EstimatedStackSize += 16; // For possible paddings.
1816 
1817  unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
1818  bool HasNonSPFrameIndex = false;
1819  if (AFI->isThumb1OnlyFunction()) {
1820  // For Thumb1, don't bother to iterate over the function. The only
1821  // instruction that requires an emergency spill slot is a store to a
1822  // frame index.
1823  //
1824  // tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
1825  // immediate. tSTRi, which is used for bp- and fp-relative accesses, has
1826  // a 5-bit unsigned immediate.
1827  //
1828  // We could try to check if the function actually contains a tSTRspi
1829  // that might need the spill slot, but it's not really important.
1830  // Functions with VLAs or extremely large call frames are rare, and
1831  // if a function is allocating more than 1KB of stack, an extra 4-byte
1832  // slot probably isn't relevant.
1833  if (RegInfo->hasBasePointer(MF))
1834  EstimatedRSStackSizeLimit = (1U << 5) * 4;
1835  else
1836  EstimatedRSStackSizeLimit = (1U << 8) * 4;
1837  EstimatedRSFixedSizeLimit = (1U << 5) * 4;
1838  } else {
1839  EstimatedRSStackSizeLimit =
1840  estimateRSStackSizeLimit(MF, this, HasNonSPFrameIndex);
1841  EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
1842  }
1843  // Final estimate of whether sp or bp-relative accesses might require
1844  // scavenging.
1845  bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;
1846 
1847  // If the stack pointer moves and we don't have a base pointer, the
1848  // estimate logic doesn't work. The actual offsets might be larger when
1849  // we're constructing a call frame, or we might need to use negative
1850  // offsets from fp.
1851  bool HasMovingSP = MFI.hasVarSizedObjects() ||
1852  (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
1853  bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) || !HasMovingSP;
1854 
1855  // If we have a frame pointer, we assume arguments will be accessed
1856  // relative to the frame pointer. Check whether fp-relative accesses to
1857  // arguments require scavenging.
1858  //
1859  // We could do slightly better on Thumb1; in some cases, an sp-relative
1860  // offset would be legal even though an fp-relative offset is not.
1861  int MaxFPOffset = getMaxFPOffset(MF.getFunction(), *AFI);
1862  bool HasLargeArgumentList =
1863  HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
1864 
1865  bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP ||
1866  HasLargeArgumentList || HasNonSPFrameIndex;
1867  LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
1868  << "; EstimatedStack: " << EstimatedStackSize
1869  << "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset
1870  << "; BigFrameOffsets: " << BigFrameOffsets << "\n");
1871  if (BigFrameOffsets ||
1872  !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
1873  AFI->setHasStackFrame(true);
1874 
1875  if (HasFP) {
1876  SavedRegs.set(FramePtr);
1877  // If the frame pointer is required by the ABI, also spill LR so that we
1878  // emit a complete frame record.
1879  if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) {
1880  SavedRegs.set(ARM::LR);
1881  LRSpilled = true;
1882  NumGPRSpills++;
1883  auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
1884  if (LRPos != UnspilledCS1GPRs.end())
1885  UnspilledCS1GPRs.erase(LRPos);
1886  }
1887  auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);
1888  if (FPPos != UnspilledCS1GPRs.end())
1889  UnspilledCS1GPRs.erase(FPPos);
1890  NumGPRSpills++;
1891  if (FramePtr == ARM::R7)
1892  CS1Spilled = true;
1893  }
1894 
1895  // This is true when we inserted a spill for a callee-save GPR which is
1896  // not otherwise used by the function. This guaranteees it is possible
1897  // to scavenge a register to hold the address of a stack slot. On Thumb1,
1898  // the register must be a valid operand to tSTRi, i.e. r4-r7. For other
1899  // subtargets, this is any GPR, i.e. r4-r11 or lr.
1900  //
1901  // If we don't insert a spill, we instead allocate an emergency spill
1902  // slot, which can be used by scavenging to spill an arbitrary register.
1903  //
1904  // We currently don't try to figure out whether any specific instruction
1905  // requires scavening an additional register.
1906  bool ExtraCSSpill = false;
1907 
1908  if (AFI->isThumb1OnlyFunction()) {
1909  // For Thumb1-only targets, we need some low registers when we save and
1910  // restore the high registers (which aren't allocatable, but could be
1911  // used by inline assembly) because the push/pop instructions can not
1912  // access high registers. If necessary, we might need to push more low
1913  // registers to ensure that there is at least one free that can be used
1914  // for the saving & restoring, and preferably we should ensure that as
1915  // many as are needed are available so that fewer push/pop instructions
1916  // are required.
1917 
1918  // Low registers which are not currently pushed, but could be (r4-r7).
1919  SmallVector<unsigned, 4> AvailableRegs;
1920 
1921  // Unused argument registers (r0-r3) can be clobbered in the prologue for
1922  // free.
1923  int EntryRegDeficit = 0;
1924  for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
1925  if (!MF.getRegInfo().isLiveIn(Reg)) {
1926  --EntryRegDeficit;
1927  LLVM_DEBUG(dbgs()
1928  << printReg(Reg, TRI)
1929  << " is unused argument register, EntryRegDeficit = "
1930  << EntryRegDeficit << "\n");
1931  }
1932  }
1933 
1934  // Unused return registers can be clobbered in the epilogue for free.
1935  int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
1936  LLVM_DEBUG(dbgs() << AFI->getReturnRegsCount()
1937  << " return regs used, ExitRegDeficit = "
1938  << ExitRegDeficit << "\n");
1939 
1940  int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);
1941  LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
1942 
1943  // r4-r6 can be used in the prologue if they are pushed by the first push
1944  // instruction.
1945  for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
1946  if (SavedRegs.test(Reg)) {
1947  --RegDeficit;
1948  LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
1949  << " is saved low register, RegDeficit = "
1950  << RegDeficit << "\n");
1951  } else {
1952  AvailableRegs.push_back(Reg);
1953  LLVM_DEBUG(
1954  dbgs()
1955  << printReg(Reg, TRI)
1956  << " is non-saved low register, adding to AvailableRegs\n");
1957  }
1958  }
1959 
1960  // r7 can be used if it is not being used as the frame pointer.
1961  if (!HasFP) {
1962  if (SavedRegs.test(ARM::R7)) {
1963  --RegDeficit;
1964  LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
1965  << RegDeficit << "\n");
1966  } else {
1967  AvailableRegs.push_back(ARM::R7);
1968  LLVM_DEBUG(
1969  dbgs()
1970  << "%r7 is non-saved low register, adding to AvailableRegs\n");
1971  }
1972  }
1973 
1974  // Each of r8-r11 needs to be copied to a low register, then pushed.
1975  for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
1976  if (SavedRegs.test(Reg)) {
1977  ++RegDeficit;
1978  LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
1979  << " is saved high register, RegDeficit = "
1980  << RegDeficit << "\n");
1981  }
1982  }
1983 
1984  // LR can only be used by PUSH, not POP, and can't be used at all if the
1985  // llvm.returnaddress intrinsic is used. This is only worth doing if we
1986  // are more limited at function entry than exit.
1987  if ((EntryRegDeficit > ExitRegDeficit) &&
1988  !(MF.getRegInfo().isLiveIn(ARM::LR) &&
1990  if (SavedRegs.test(ARM::LR)) {
1991  --RegDeficit;
1992  LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
1993  << RegDeficit << "\n");
1994  } else {
1995  AvailableRegs.push_back(ARM::LR);
1996  LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
1997  }
1998  }
1999 
2000  // If there are more high registers that need pushing than low registers
2001  // available, push some more low registers so that we can use fewer push
2002  // instructions. This might not reduce RegDeficit all the way to zero,
2003  // because we can only guarantee that r4-r6 are available, but r8-r11 may
2004  // need saving.
2005  LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
2006  for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
2007  unsigned Reg = AvailableRegs.pop_back_val();
2008  LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2009  << " to make up reg deficit\n");
2010  SavedRegs.set(Reg);
2011  NumGPRSpills++;
2012  CS1Spilled = true;
2013  assert(!MRI.isReserved(Reg) && "Should not be reserved");
2014  if (Reg != ARM::LR && !MRI.isPhysRegUsed(Reg))
2015  ExtraCSSpill = true;
2016  UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
2017  if (Reg == ARM::LR)
2018  LRSpilled = true;
2019  }
2020  LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
2021  << "\n");
2022  }
2023 
2024  // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
2025  // restore LR in that case.
2026  bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
2027 
2028  // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
2029  // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
2030  if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
2031  SavedRegs.set(ARM::LR);
2032  NumGPRSpills++;
2034  LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
2035  if (LRPos != UnspilledCS1GPRs.end())
2036  UnspilledCS1GPRs.erase(LRPos);
2037 
2038  ForceLRSpill = false;
2039  if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR) &&
2040  !AFI->isThumb1OnlyFunction())
2041  ExtraCSSpill = true;
2042  }
2043 
2044  // If stack and double are 8-byte aligned and we are spilling an odd number
2045  // of GPRs, spill one extra callee save GPR so we won't have to pad between
2046  // the integer and double callee save areas.
2047  LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
2048  unsigned TargetAlign = getStackAlignment();
2049  if (TargetAlign >= 8 && (NumGPRSpills & 1)) {
2050  if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
2051  for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
2052  unsigned Reg = UnspilledCS1GPRs[i];
2053  // Don't spill high register if the function is thumb. In the case of
2054  // Windows on ARM, accept R11 (frame pointer)
2055  if (!AFI->isThumbFunction() ||
2056  (STI.isTargetWindows() && Reg == ARM::R11) ||
2057  isARMLowRegister(Reg) ||
2058  (Reg == ARM::LR && !ExpensiveLRRestore)) {
2059  SavedRegs.set(Reg);
2060  LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2061  << " to make up alignment\n");
2062  if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg) &&
2063  !(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
2064  ExtraCSSpill = true;
2065  break;
2066  }
2067  }
2068  } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
2069  unsigned Reg = UnspilledCS2GPRs.front();
2070  SavedRegs.set(Reg);
2071  LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2072  << " to make up alignment\n");
2073  if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
2074  ExtraCSSpill = true;
2075  }
2076  }
2077 
2078  // Estimate if we might need to scavenge a register at some point in order
2079  // to materialize a stack offset. If so, either spill one additional
2080  // callee-saved register or reserve a special spill slot to facilitate
2081  // register scavenging. Thumb1 needs a spill slot for stack pointer
2082  // adjustments also, even when the frame itself is small.
2083  if (BigFrameOffsets && !ExtraCSSpill) {
2084  // If any non-reserved CS register isn't spilled, just spill one or two
2085  // extra. That should take care of it!
2086  unsigned NumExtras = TargetAlign / 4;
2087  SmallVector<unsigned, 2> Extras;
2088  while (NumExtras && !UnspilledCS1GPRs.empty()) {
2089  unsigned Reg = UnspilledCS1GPRs.back();
2090  UnspilledCS1GPRs.pop_back();
2091  if (!MRI.isReserved(Reg) &&
2092  (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {
2093  Extras.push_back(Reg);
2094  NumExtras--;
2095  }
2096  }
2097  // For non-Thumb1 functions, also check for hi-reg CS registers
2098  if (!AFI->isThumb1OnlyFunction()) {
2099  while (NumExtras && !UnspilledCS2GPRs.empty()) {
2100  unsigned Reg = UnspilledCS2GPRs.back();
2101  UnspilledCS2GPRs.pop_back();
2102  if (!MRI.isReserved(Reg)) {
2103  Extras.push_back(Reg);
2104  NumExtras--;
2105  }
2106  }
2107  }
2108  if (NumExtras == 0) {
2109  for (unsigned Reg : Extras) {
2110  SavedRegs.set(Reg);
2111  if (!MRI.isPhysRegUsed(Reg))
2112  ExtraCSSpill = true;
2113  }
2114  }
2115  if (!ExtraCSSpill && RS) {
2116  // Reserve a slot closest to SP or frame pointer.
2117  LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
2118  const TargetRegisterClass &RC = ARM::GPRRegClass;
2119  unsigned Size = TRI->getSpillSize(RC);
2120  unsigned Align = TRI->getSpillAlignment(RC);
2121  RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2122  }
2123  }
2124  }
2125 
2126  if (ForceLRSpill) {
2127  SavedRegs.set(ARM::LR);
2128  AFI->setLRIsSpilledForFarJump(true);
2129  }
2130  AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
2131 
2132  // If we have the "returned" parameter attribute which guarantees that we
2133  // return the value which was passed in r0 unmodified (e.g. C++ 'structors),
2134  // record that fact for IPRA.
2135  if (AFI->getPreservesR0())
2136  SavedRegs.set(ARM::R0);
2137 }
2138 
2139 MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
2142  const ARMBaseInstrInfo &TII =
2143  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2144  if (!hasReservedCallFrame(MF)) {
2145  // If we have alloca, convert as follows:
2146  // ADJCALLSTACKDOWN -> sub, sp, sp, amount
2147  // ADJCALLSTACKUP -> add, sp, sp, amount
2148  MachineInstr &Old = *I;
2149  DebugLoc dl = Old.getDebugLoc();
2150  unsigned Amount = TII.getFrameSize(Old);
2151  if (Amount != 0) {
2152  // We need to keep the stack aligned properly. To do this, we round the
2153  // amount of space needed for the outgoing arguments up to the next
2154  // alignment boundary.
2155  Amount = alignSPAdjust(Amount);
2156 
2158  assert(!AFI->isThumb1OnlyFunction() &&
2159  "This eliminateCallFramePseudoInstr does not support Thumb1!");
2160  bool isARM = !AFI->isThumbFunction();
2161 
2162  // Replace the pseudo instruction with a new instruction...
2163  unsigned Opc = Old.getOpcode();
2164  int PIdx = Old.findFirstPredOperandIdx();
2165  ARMCC::CondCodes Pred =
2166  (PIdx == -1) ? ARMCC::AL
2167  : (ARMCC::CondCodes)Old.getOperand(PIdx).getImm();
2168  unsigned PredReg = TII.getFramePred(Old);
2169  if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
2170  emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
2171  Pred, PredReg);
2172  } else {
2173  assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
2174  emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
2175  Pred, PredReg);
2176  }
2177  }
2178  }
2179  return MBB.erase(I);
2180 }
2181 
2182 /// Get the minimum constant for ARM that is greater than or equal to the
2183 /// argument. In ARM, constants can have any value that can be produced by
2184 /// rotating an 8-bit value to the right by an even number of bits within a
2185 /// 32-bit word.
2187  unsigned Shifted = 0;
2188 
2189  if (Value == 0)
2190  return 0;
2191 
2192  while (!(Value & 0xC0000000)) {
2193  Value = Value << 2;
2194  Shifted += 2;
2195  }
2196 
2197  bool Carry = (Value & 0x00FFFFFF);
2198  Value = ((Value & 0xFF000000) >> 24) + Carry;
2199 
2200  if (Value & 0x0000100)
2201  Value = Value & 0x000001FC;
2202 
2203  if (Shifted > 24)
2204  Value = Value >> (Shifted - 24);
2205  else
2206  Value = Value << (24 - Shifted);
2207 
2208  return Value;
2209 }
2210 
2211 // The stack limit in the TCB is set to this many bytes above the actual
2212 // stack limit.
2213 static const uint64_t kSplitStackAvailable = 256;
2214 
2215 // Adjust the function prologue to enable split stacks. This currently only
2216 // supports android and linux.
2217 //
2218 // The ABI of the segmented stack prologue is a little arbitrarily chosen, but
2219 // must be well defined in order to allow for consistent implementations of the
2220 // __morestack helper function. The ABI is also not a normal ABI in that it
2221 // doesn't follow the normal calling conventions because this allows the
2222 // prologue of each function to be optimized further.
2223 //
2224 // Currently, the ABI looks like (when calling __morestack)
2225 //
2226 // * r4 holds the minimum stack size requested for this function call
2227 // * r5 holds the stack size of the arguments to the function
2228 // * the beginning of the function is 3 instructions after the call to
2229 // __morestack
2230 //
2231 // Implementations of __morestack should use r4 to allocate a new stack, r5 to
2232 // place the arguments on to the new stack, and the 3-instruction knowledge to
2233 // jump directly to the body of the function when working on the new stack.
2234 //
2235 // An old (and possibly no longer compatible) implementation of __morestack for
2236 // ARM can be found at [1].
2237 //
2238 // [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
2240  MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
2241  unsigned Opcode;
2242  unsigned CFIIndex;
2243  const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
2244  bool Thumb = ST->isThumb();
2245 
2246  // Sadly, this currently doesn't support varargs, platforms other than
2247  // android/linux. Note that thumb1/thumb2 are support for android/linux.
2248  if (MF.getFunction().isVarArg())
2249  report_fatal_error("Segmented stacks do not support vararg functions.");
2250  if (!ST->isTargetAndroid() && !ST->isTargetLinux())
2251  report_fatal_error("Segmented stacks not supported on this platform.");
2252 
2253  MachineFrameInfo &MFI = MF.getFrameInfo();
2254  MachineModuleInfo &MMI = MF.getMMI();
2255  MCContext &Context = MMI.getContext();
2256  const MCRegisterInfo *MRI = Context.getRegisterInfo();
2257  const ARMBaseInstrInfo &TII =
2258  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2259  ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
2260  DebugLoc DL;
2261 
2262  uint64_t StackSize = MFI.getStackSize();
2263 
2264  // Do not generate a prologue for leaf functions with a stack of size zero.
2265  // For non-leaf functions we have to allow for the possibility that the
2266  // callis to a non-split function, as in PR37807. This function could also
2267  // take the address of a non-split function. When the linker tries to adjust
2268  // its non-existent prologue, it would fail with an error. Mark the object
2269  // file so that such failures are not errors. See this Go language bug-report
2270  // https://go-review.googlesource.com/c/go/+/148819/
2271  if (StackSize == 0 && !MFI.hasTailCall()) {
2272  MF.getMMI().setHasNosplitStack(true);
2273  return;
2274  }
2275 
2276  // Use R4 and R5 as scratch registers.
2277  // We save R4 and R5 before use and restore them before leaving the function.
2278  unsigned ScratchReg0 = ARM::R4;
2279  unsigned ScratchReg1 = ARM::R5;
2280  uint64_t AlignedStackSize;
2281 
2282  MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
2283  MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
2284  MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock();
2287 
2288  // Grab everything that reaches PrologueMBB to update there liveness as well.
2289  SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
2291  WalkList.push_back(&PrologueMBB);
2292 
2293  do {
2294  MachineBasicBlock *CurMBB = WalkList.pop_back_val();
2295  for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
2296  if (BeforePrologueRegion.insert(PredBB).second)
2297  WalkList.push_back(PredBB);
2298  }
2299  } while (!WalkList.empty());
2300 
2301  // The order in that list is important.
2302  // The blocks will all be inserted before PrologueMBB using that order.
2303  // Therefore the block that should appear first in the CFG should appear
2304  // first in the list.
2305  MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
2306  PostStackMBB};
2307 
2308  for (MachineBasicBlock *B : AddedBlocks)
2309  BeforePrologueRegion.insert(B);
2310 
2311  for (const auto &LI : PrologueMBB.liveins()) {
2312  for (MachineBasicBlock *PredBB : BeforePrologueRegion)
2313  PredBB->addLiveIn(LI);
2314  }
2315 
2316  // Remove the newly added blocks from the list, since we know
2317  // we do not have to do the following updates for them.
2318  for (MachineBasicBlock *B : AddedBlocks) {
2319  BeforePrologueRegion.erase(B);
2320  MF.insert(PrologueMBB.getIterator(), B);
2321  }
2322 
2323  for (MachineBasicBlock *MBB : BeforePrologueRegion) {
2324  // Make sure the LiveIns are still sorted and unique.
2325  MBB->sortUniqueLiveIns();
2326  // Replace the edges to PrologueMBB by edges to the sequences
2327  // we are about to add.
2328  MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
2329  }
2330 
2331  // The required stack size that is aligned to ARM constant criterion.
2332  AlignedStackSize = alignToARMConstant(StackSize);
2333 
2334  // When the frame size is less than 256 we just compare the stack
2335  // boundary directly to the value of the stack pointer, per gcc.
2336  bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
2337 
2338  // We will use two of the callee save registers as scratch registers so we
2339  // need to save those registers onto the stack.
2340  // We will use SR0 to hold stack limit and SR1 to hold the stack size
2341  // requested and arguments for __morestack().
2342  // SR0: Scratch Register #0
2343  // SR1: Scratch Register #1
2344  // push {SR0, SR1}
2345  if (Thumb) {
2346  BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
2347  .add(predOps(ARMCC::AL))
2348  .addReg(ScratchReg0)
2349  .addReg(ScratchReg1);
2350  } else {
2351  BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
2352  .addReg(ARM::SP, RegState::Define)
2353  .addReg(ARM::SP)
2354  .add(predOps(ARMCC::AL))
2355  .addReg(ScratchReg0)
2356  .addReg(ScratchReg1);
2357  }
2358 
2359  // Emit the relevant DWARF information about the change in stack pointer as
2360  // well as where to find both r4 and r5 (the callee-save registers)
2361  CFIIndex =
2363  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2364  .addCFIIndex(CFIIndex);
2366  nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
2367  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2368  .addCFIIndex(CFIIndex);
2370  nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
2371  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2372  .addCFIIndex(CFIIndex);
2373 
2374  // mov SR1, sp
2375  if (Thumb) {
2376  BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
2377  .addReg(ARM::SP)
2378  .add(predOps(ARMCC::AL));
2379  } else if (CompareStackPointer) {
2380  BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
2381  .addReg(ARM::SP)
2382  .add(predOps(ARMCC::AL))
2383  .add(condCodeOp());
2384  }
2385 
2386  // sub SR1, sp, #StackSize
2387  if (!CompareStackPointer && Thumb) {
2388  BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
2389  .add(condCodeOp())
2390  .addReg(ScratchReg1)
2391  .addImm(AlignedStackSize)
2392  .add(predOps(ARMCC::AL));
2393  } else if (!CompareStackPointer) {
2394  BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
2395  .addReg(ARM::SP)
2396  .addImm(AlignedStackSize)
2397  .add(predOps(ARMCC::AL))
2398  .add(condCodeOp());
2399  }
2400 
2401  if (Thumb && ST->isThumb1Only()) {
2402  unsigned PCLabelId = ARMFI->createPICLabelUId();
2404  MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0);
2406  unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4);
2407 
2408  // ldr SR0, [pc, offset(STACK_LIMIT)]
2409  BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
2410  .addConstantPoolIndex(CPI)
2411  .add(predOps(ARMCC::AL));
2412 
2413  // ldr SR0, [SR0]
2414  BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
2415  .addReg(ScratchReg0)
2416  .addImm(0)
2417  .add(predOps(ARMCC::AL));
2418  } else {
2419  // Get TLS base address from the coprocessor
2420  // mrc p15, #0, SR0, c13, c0, #3
2421  BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0)
2422  .addImm(15)
2423  .addImm(0)
2424  .addImm(13)
2425  .addImm(0)
2426  .addImm(3)
2427  .add(predOps(ARMCC::AL));
2428 
2429  // Use the last tls slot on android and a private field of the TCP on linux.
2430  assert(ST->isTargetAndroid() || ST->isTargetLinux());
2431  unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
2432 
2433  // Get the stack limit from the right offset
2434  // ldr SR0, [sr0, #4 * TlsOffset]
2435  BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0)
2436  .addReg(ScratchReg0)
2437  .addImm(4 * TlsOffset)
2438  .add(predOps(ARMCC::AL));
2439  }
2440 
2441  // Compare stack limit with stack size requested.
2442  // cmp SR0, SR1
2443  Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
2444  BuildMI(GetMBB, DL, TII.get(Opcode))
2445  .addReg(ScratchReg0)
2446  .addReg(ScratchReg1)
2447  .add(predOps(ARMCC::AL));
2448 
2449  // This jump is taken if StackLimit < SP - stack required.
2450  Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
2451  BuildMI(GetMBB, DL, TII.get(Opcode)).addMBB(PostStackMBB)
2452  .addImm(ARMCC::LO)
2453  .addReg(ARM::CPSR);
2454 
2455 
2456  // Calling __morestack(StackSize, Size of stack arguments).
2457  // __morestack knows that the stack size requested is in SR0(r4)
2458  // and amount size of stack arguments is in SR1(r5).
2459 
2460  // Pass first argument for the __morestack by Scratch Register #0.
2461  // The amount size of stack required
2462  if (Thumb) {
2463  BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
2464  .add(condCodeOp())
2465  .addImm(AlignedStackSize)
2466  .add(predOps(ARMCC::AL));
2467  } else {
2468  BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
2469  .addImm(AlignedStackSize)
2470  .add(predOps(ARMCC::AL))
2471  .add(condCodeOp());
2472  }
2473  // Pass second argument for the __morestack by Scratch Register #1.
2474  // The amount size of stack consumed to save function arguments.
2475  if (Thumb) {
2476  BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
2477  .add(condCodeOp())
2478  .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
2479  .add(predOps(ARMCC::AL));
2480  } else {
2481  BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
2482  .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
2483  .add(predOps(ARMCC::AL))
2484  .add(condCodeOp());
2485  }
2486 
2487  // push {lr} - Save return address of this function.
2488  if (Thumb) {
2489  BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
2490  .add(predOps(ARMCC::AL))
2491  .addReg(ARM::LR);
2492  } else {
2493  BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
2494  .addReg(ARM::SP, RegState::Define)
2495  .addReg(ARM::SP)
2496  .add(predOps(ARMCC::AL))
2497  .addReg(ARM::LR);
2498  }
2499 
2500  // Emit the DWARF info about the change in stack as well as where to find the
2501  // previous link register
2502  CFIIndex =
2504  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2505  .addCFIIndex(CFIIndex);
2507  nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
2508  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2509  .addCFIIndex(CFIIndex);
2510 
2511  // Call __morestack().
2512  if (Thumb) {
2513  BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
2514  .add(predOps(ARMCC::AL))
2515  .addExternalSymbol("__morestack");
2516  } else {
2517  BuildMI(AllocMBB, DL, TII.get(ARM::BL))
2518  .addExternalSymbol("__morestack");
2519  }
2520 
2521  // pop {lr} - Restore return address of this original function.
2522  if (Thumb) {
2523  if (ST->isThumb1Only()) {
2524  BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
2525  .add(predOps(ARMCC::AL))
2526  .addReg(ScratchReg0);
2527  BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
2528  .addReg(ScratchReg0)
2529  .add(predOps(ARMCC::AL));
2530  } else {
2531  BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
2532  .addReg(ARM::LR, RegState::Define)
2533  .addReg(ARM::SP, RegState::Define)
2534  .addReg(ARM::SP)
2535  .addImm(4)
2536  .add(predOps(ARMCC::AL));
2537  }
2538  } else {
2539  BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
2540  .addReg(ARM::SP, RegState::Define)
2541  .addReg(ARM::SP)
2542  .add(predOps(ARMCC::AL))
2543  .addReg(ARM::LR);
2544  }
2545 
2546  // Restore SR0 and SR1 in case of __morestack() was called.
2547  // __morestack() will skip PostStackMBB block so we need to restore
2548  // scratch registers from here.
2549  // pop {SR0, SR1}
2550  if (Thumb) {
2551  BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
2552  .add(predOps(ARMCC::AL))
2553  .addReg(ScratchReg0)
2554  .addReg(ScratchReg1);
2555  } else {
2556  BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
2557  .addReg(ARM::SP, RegState::Define)
2558  .addReg(ARM::SP)
2559  .add(predOps(ARMCC::AL))
2560  .addReg(ScratchReg0)
2561  .addReg(ScratchReg1);
2562  }
2563 
2564  // Update the CFA offset now that we've popped
2565  CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
2566  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2567  .addCFIIndex(CFIIndex);
2568 
2569  // Return from this function.
2570  BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
2571 
2572  // Restore SR0 and SR1 in case of __morestack() was not called.
2573  // pop {SR0, SR1}
2574  if (Thumb) {
2575  BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
2576  .add(predOps(ARMCC::AL))
2577  .addReg(ScratchReg0)
2578  .addReg(ScratchReg1);
2579  } else {
2580  BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
2581  .addReg(ARM::SP, RegState::Define)
2582  .addReg(ARM::SP)
2583  .add(predOps(ARMCC::AL))
2584  .addReg(ScratchReg0)
2585  .addReg(ScratchReg1);
2586  }
2587 
2588  // Update the CFA offset now that we've popped
2589  CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
2590  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2591  .addCFIIndex(CFIIndex);
2592 
2593  // Tell debuggers that r4 and r5 are now the same as they were in the
2594  // previous function, that they're the "Same Value".
2596  nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
2597  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2598  .addCFIIndex(CFIIndex);
2600  nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
2601  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2602  .addCFIIndex(CFIIndex);
2603 
2604  // Organizing MBB lists
2605  PostStackMBB->addSuccessor(&PrologueMBB);
2606 
2607  AllocMBB->addSuccessor(PostStackMBB);
2608 
2609  GetMBB->addSuccessor(PostStackMBB);
2610  GetMBB->addSuccessor(AllocMBB);
2611 
2612  McrMBB->addSuccessor(GetMBB);
2613 
2614  PrevStackMBB->addSuccessor(McrMBB);
2615 
2616 #ifdef EXPENSIVE_CHECKS
2617  MF.verify();
2618 #endif
2619 }
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:176
const MachineInstrBuilder & add(const MachineOperand &MO) const
BitVector & set()
Definition: BitVector.h:397
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
#define R4(n)
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.
LLVMContext & Context
bool isReserved(Register PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate...
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
bool isThumb() const
Definition: ARMSubtarget.h:759
This class represents lattice values for constants.
Definition: AllocatorList.h:23
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static cl::opt< bool > SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), cl::desc("Align ARM NEON spills in prolog and epilog"))
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:384
ARMConstantPoolValue - ARM specific constantpool value.
LLVM_NODISCARD unsigned addFrameInst(const MCCFIInstruction &Inst)
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:179
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset)
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:507
unsigned Reg
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override
Adjust the prologue to have the function use segmented stacks.
Register getFrameRegister(const MachineFunction &MF) const override
bool hasV7Ops() const
Definition: ARMSubtarget.h:574
bool test(unsigned Idx) const
Definition: BitVector.h:501
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:323
unsigned second
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
F(f)
MachineModuleInfo & getMMI() const
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
bool isThumb1Only() const
Definition: ARMSubtarget.h:761
#define R2(n)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void setGPRCalleeSavedArea2Offset(unsigned o)
static uint32_t alignToARMConstant(uint32_t Value)
Get the minimum constant for ARM that is greater than or equal to the argument.
static MCCFIInstruction createDefCfaOffset(MCSymbol *L, int Offset)
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:494
static bool isThumb(const MCSubtargetInfo &STI)
unsigned getSpillSize(const TargetRegisterClass &RC) const
Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...
return AArch64::GPR64RegClass contains(Reg)
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:525
static const uint64_t kSplitStackAvailable
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:156
bool isTargetELF() const
Definition: ARMSubtarget.h:704
unsigned getSpillAlignment(const TargetRegisterClass &RC) const
Return the minimum required alignment in bytes for a spill slot for a register of this class...
void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Given a machine basic block that branched to &#39;Old&#39;, change the code and CFG so that it branches to &#39;N...
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
unsigned getFramePred(const MachineInstr &MI) const
Returns predicate register associated with the given frame instruction.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:413
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register...
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required, we reserve argument space for call sites in the function immediately on entry to the current function.
MachineBasicBlock iterator that automatically skips over MIs that are inside bundles (i...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
void emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
This file contains the simple types necessary to represent the attributes associated with functions a...
static int sizeOfSPAdjustment(const MachineInstr &MI)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:410
bool keepFramePointer(const MachineFunction &MF) const override
Return true if the target wants to keep the frame pointer regardless of the function attribute "frame...
void setDPRCalleeSavedAreaOffset(unsigned o)
bool enableCalleeSaveSkip(const MachineFunction &MF) const override
Returns true if the target can safely skip saving callee-saved registers for noreturn nounwind functi...
unsigned getArgRegsSaveSize() const
void setFramePtrSpillOffset(unsigned o)
Context object for machine code objects.
Definition: MCContext.h:65
int getDwarfRegNum(MCRegister RegNum, bool isEH) const
Map a target register to an equivalent dwarf register number.
static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg, unsigned SrcReg, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
unsigned getDPRCalleeSavedAreaSize() const
bool splitFramePushPop(const MachineFunction &MF) const
Returns true if the frame setup is split into two separate pushes (first r0-r7,lr then r8-r11)...
Definition: ARMSubtarget.h:781
void setDPRCalleeSavedGapSize(unsigned s)
unsigned getNumAlignedDPRCS2Regs() const
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
bool addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
const MCContext & getContext() const
int alignSPAdjust(int SPAdj) const
alignSPAdjust - This method aligns the stack adjustment to the correct alignment. ...
bool hasV6T2Ops() const
Definition: ARMSubtarget.h:573
virtual const TargetInstrInfo * getInstrInfo() const
static bool isARMArea1Register(unsigned Reg, bool isIOS)
isARMArea1Register - Returns true if the register is a low register (r0-r7) or a stack/pc register th...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1231
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:19
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
TargetInstrInfo - Interface to description of machine instruction set.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
unsigned getDefRegState(bool B)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static MachineBasicBlock::iterator skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs)
Skip past the code inserted by emitAlignedDPRCS2Spills, and return an iterator to the following instr...
static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register)
.cfi_same_value Current value of Register is the same as in the previous frame.
Definition: MCDwarf.h:552
static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs)
int ResolveFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg, int SPAdj) const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
This file declares the machine register scavenger class.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
static MCCFIInstruction createDefCfa(MCSymbol *L, unsigned Register, int Offset)
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it...
Definition: MCDwarf.h:480
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool cannotEliminateFrame(const MachineFunction &MF) const
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register)
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:487
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF, const ARMBaseInstrInfo &TII)
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Returns the size of the specified MachineInstr.
static unsigned estimateRSStackSizeLimit(MachineFunction &MF, const TargetFrameLowering *TFI, bool &HasNonSPFrameIndex)
estimateRSStackSizeLimit - Look at each instruction that references stack frames and return the stack...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
unsigned getGPRCalleeSavedArea1Size() const
int getStackProtectorIndex() const
Return the index for the stack protector object.
unsigned getMaxAlignment() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
constexpr double e
Definition: MathExtras.h:57
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
bool hasBasePointer(const MachineFunction &MF) const
void setGPRCalleeSavedArea2Size(unsigned s)
static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI)
We need the offset of the frame pointer relative to other MachineFrameInfo offsets which are encoded ...
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:806
unsigned getReturnRegsCount() const
static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI)
Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers starting from d8...
self_iterator getIterator()
Definition: ilist_node.h:81
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
iterator_range< pred_iterator > predecessors()
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1193
const ARMFrameLowering * getFrameLowering() const override
Definition: ARMSubtarget.h:533
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
uint16_t getEncodingValue(MCRegister RegNo) const
Returns the encoding for RegNo.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
iterator erase(const_iterator CI)
Definition: SmallVector.h:434
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
size_t size() const
Definition: SmallVector.h:52
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1186
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
unsigned first
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1095
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:492
int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override
getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.
unsigned getDPRCalleeSavedGapSize() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI)
Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers starting from d8...
#define R6(n)
void setGPRCalleeSavedArea1Size(unsigned s)
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:390
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call...
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:377
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
bool isTargetLinux() const
Definition: ARMSubtarget.h:698
static uint64_t add(uint64_t LeftOp, uint64_t RightOp)
Definition: FileCheck.cpp:215
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
static bool isARMArea2Register(unsigned Reg, bool isIOS)
bool isTargetAndroid() const
Definition: ARMSubtarget.h:745
Information about stack frame layout on the target.
static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, const TargetInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const unsigned Reg, const unsigned Alignment, const bool MustBeSingleInstruction)
Emit an instruction sequence that will align the address in register Reg by zero-ing out the lower bi...
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
void setOffsetAdjustment(int Adj)
Set the correction for frame offsets.
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
CodeModel::Model getCodeModel() const
Returns the code model.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
ARMFrameLowering(const ARMSubtarget &sti)
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
unsigned getReturnOpcode() const
Returns the correct return opcode for the current feature set.
Definition: ARMSubtarget.h:851
unsigned getArgumentStackSize() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
static bool isPopOpcode(int Opc)
bool isLiveIn(unsigned Reg) const
static bool isARMLowRegister(unsigned Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:160
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
unsigned getGPRCalleeSavedArea2Size() const
void setGPRCalleeSavedArea1Offset(unsigned o)
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use...
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg...
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:220
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
bool isReturnAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
TargetOptions Options
#define I(x, y, z)
Definition: MD5.cpp:58
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:537
virtual const TargetFrameLowering * getFrameLowering() const
void setDPRCalleeSavedAreaSize(unsigned s)
int getOffsetAdjustment() const
Return the correction for frame offsets.
uint32_t Size
Definition: Profile.cpp:46
bool hasV5TOps() const
Definition: ARMSubtarget.h:568
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
iterator_range< livein_iterator > liveins() const
static bool isARMArea3Register(unsigned Reg, bool isIOS)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
static bool WindowsRequiresStackProbe(const MachineFunction &MF, size_t StackSizeInBytes)
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:320
bool needsStackRealignment(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
LLVM Value Representation.
Definition: Value.h:74
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
unsigned getFramePtrSpillOffset() const
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:273
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:333
bool isTargetWindows() const
Definition: ARMSubtarget.h:701
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
IRTranslator LLVM IR MI
static bool isCSRestore(MachineInstr &MI, const ARMBaseInstrInfo &TII, const MCPhysReg *CSRegs)
BitVector getPristineRegs(const MachineFunction &MF) const
Return a set of physical registers that are pristine.
const ARMSubtarget & STI
Register getReg() const
getReg - Returns the register number.
static void checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs)
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:415
unsigned getReg() const
bool hasTailCall() const
Returns true if the function contains a tail call.
void setObjectAlignment(int ObjectIdx, unsigned Align)
setObjectAlignment - Change the alignment of the specified stack object.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects...
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one...
static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This class contains meta information specific to a module.