LCOV - code coverage report
Current view: top level - lib/Target/AArch64 - AArch64FrameLowering.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 399 404 98.8 %
Date: 2018-02-23 15:42:53 Functions: 24 24 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file contains the AArch64 implementation of TargetFrameLowering class.
      11             : //
      12             : // On AArch64, stack frames are structured as follows:
      13             : //
      14             : // The stack grows downward.
      15             : //
      16             : // All of the individual frame areas on the frame below are optional, i.e. it's
      17             : // possible to create a function so that the particular area isn't present
      18             : // in the frame.
      19             : //
      20             : // At function entry, the "frame" looks as follows:
      21             : //
      22             : // |                                   | Higher address
      23             : // |-----------------------------------|
      24             : // |                                   |
      25             : // | arguments passed on the stack     |
      26             : // |                                   |
      27             : // |-----------------------------------| <- sp
      28             : // |                                   | Lower address
      29             : //
      30             : //
      31             : // After the prologue has run, the frame has the following general structure.
      32             : // Note that this doesn't depict the case where a red-zone is used. Also,
      33             : // technically the last frame area (VLAs) doesn't get created until in the
      34             : // main function body, after the prologue is run. However, it's depicted here
      35             : // for completeness.
      36             : //
      37             : // |                                   | Higher address
      38             : // |-----------------------------------|
      39             : // |                                   |
      40             : // | arguments passed on the stack     |
      41             : // |                                   |
      42             : // |-----------------------------------|
      43             : // |                                   |
      44             : // | (Win64 only) varargs from reg     |
      45             : // |                                   |
      46             : // |-----------------------------------|
      47             : // |                                   |
      48             : // | prev_fp, prev_lr                  |
      49             : // | (a.k.a. "frame record")           |
      50             : // |-----------------------------------| <- fp(=x29)
      51             : // |                                   |
      52             : // | other callee-saved registers      |
      53             : // |                                   |
      54             : // |-----------------------------------|
      55             : // |.empty.space.to.make.part.below....|
      56             : // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
      57             : // |.the.standard.16-byte.alignment....|  compile time; if present)
      58             : // |-----------------------------------|
      59             : // |                                   |
      60             : // | local variables of fixed size     |
      61             : // | including spill slots             |
      62             : // |-----------------------------------| <- bp(not defined by ABI,
      63             : // |.variable-sized.local.variables....|       LLVM chooses X19)
      64             : // |.(VLAs)............................| (size of this area is unknown at
      65             : // |...................................|  compile time)
      66             : // |-----------------------------------| <- sp
      67             : // |                                   | Lower address
      68             : //
      69             : //
      70             : // To access the data in a frame, at-compile time, a constant offset must be
      71             : // computable from one of the pointers (fp, bp, sp) to access it. The size
      72             : // of the areas with a dotted background cannot be computed at compile-time
      73             : // if they are present, making it required to have all three of fp, bp and
      74             : // sp to be set up to be able to access all contents in the frame areas,
      75             : // assuming all of the frame areas are non-empty.
      76             : //
      77             : // For most functions, some of the frame areas are empty. For those functions,
      78             : // it may not be necessary to set up fp or bp:
      79             : // * A base pointer is definitely needed when there are both VLAs and local
      80             : //   variables with more-than-default alignment requirements.
      81             : // * A frame pointer is definitely needed when there are local variables with
      82             : //   more-than-default alignment requirements.
      83             : //
      84             : // In some cases when a base pointer is not strictly needed, it is generated
      85             : // anyway when offsets from the frame pointer to access local variables become
      86             : // so large that the offset can't be encoded in the immediate fields of loads
      87             : // or stores.
      88             : //
      89             : // FIXME: also explain the redzone concept.
      90             : // FIXME: also explain the concept of reserved call frames.
      91             : //
      92             : //===----------------------------------------------------------------------===//
      93             : 
      94             : #include "AArch64FrameLowering.h"
      95             : #include "AArch64InstrInfo.h"
      96             : #include "AArch64MachineFunctionInfo.h"
      97             : #include "AArch64RegisterInfo.h"
      98             : #include "AArch64Subtarget.h"
      99             : #include "AArch64TargetMachine.h"
     100             : #include "MCTargetDesc/AArch64AddressingModes.h"
     101             : #include "llvm/ADT/SmallVector.h"
     102             : #include "llvm/ADT/Statistic.h"
     103             : #include "llvm/CodeGen/LivePhysRegs.h"
     104             : #include "llvm/CodeGen/MachineBasicBlock.h"
     105             : #include "llvm/CodeGen/MachineFrameInfo.h"
     106             : #include "llvm/CodeGen/MachineFunction.h"
     107             : #include "llvm/CodeGen/MachineInstr.h"
     108             : #include "llvm/CodeGen/MachineInstrBuilder.h"
     109             : #include "llvm/CodeGen/MachineMemOperand.h"
     110             : #include "llvm/CodeGen/MachineModuleInfo.h"
     111             : #include "llvm/CodeGen/MachineOperand.h"
     112             : #include "llvm/CodeGen/MachineRegisterInfo.h"
     113             : #include "llvm/CodeGen/RegisterScavenging.h"
     114             : #include "llvm/CodeGen/TargetInstrInfo.h"
     115             : #include "llvm/CodeGen/TargetRegisterInfo.h"
     116             : #include "llvm/CodeGen/TargetSubtargetInfo.h"
     117             : #include "llvm/IR/Attributes.h"
     118             : #include "llvm/IR/CallingConv.h"
     119             : #include "llvm/IR/DataLayout.h"
     120             : #include "llvm/IR/DebugLoc.h"
     121             : #include "llvm/IR/Function.h"
     122             : #include "llvm/MC/MCDwarf.h"
     123             : #include "llvm/Support/CommandLine.h"
     124             : #include "llvm/Support/Debug.h"
     125             : #include "llvm/Support/ErrorHandling.h"
     126             : #include "llvm/Support/MathExtras.h"
     127             : #include "llvm/Support/raw_ostream.h"
     128             : #include "llvm/Target/TargetMachine.h"
     129             : #include "llvm/Target/TargetOptions.h"
     130             : #include <cassert>
     131             : #include <cstdint>
     132             : #include <iterator>
     133             : #include <vector>
     134             : 
     135             : using namespace llvm;
     136             : 
     137             : #define DEBUG_TYPE "frame-info"
     138             : 
     139       81686 : static cl::opt<bool> EnableRedZone("aarch64-redzone",
     140       81686 :                                    cl::desc("enable use of redzone on AArch64"),
     141      245058 :                                    cl::init(false), cl::Hidden);
     142             : 
     143             : STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
     144             : 
     145             : /// This is the biggest offset to the stack pointer we can encode in aarch64
     146             : /// instructions (without using a separate calculation and a temp register).
     147             : /// Note that the exception here are vector stores/loads which cannot encode any
     148             : /// displacements (see estimateRSStackSizeLimit(), isAArch64FrameOffsetLegal()).
     149             : static const unsigned DefaultSafeSPDisplacement = 255;
     150             : 
     151             : /// Look at each instruction that references stack frames and return the stack
     152             : /// size limit beyond which some of these instructions will require a scratch
     153             : /// register during their expansion later.
     154       13524 : static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
     155             :   // FIXME: For now, just conservatively guestimate based on unscaled indexing
     156             :   // range. We'll end up allocating an unnecessary spill slot a lot, but
     157             :   // realistically that's not a big deal at this stage of the game.
     158       29853 :   for (MachineBasicBlock &MBB : MF) {
     159      100120 :     for (MachineInstr &MI : MBB) {
     160      135526 :       if (MI.isDebugValue() || MI.isPseudo() ||
     161      188046 :           MI.getOpcode() == AArch64::ADDXri ||
     162             :           MI.getOpcode() == AArch64::ADDSXri)
     163        7501 :         continue;
     164             : 
     165      386548 :       for (const MachineOperand &MO : MI.operands()) {
     166      163310 :         if (!MO.isFI())
     167      160321 :           continue;
     168             : 
     169        2989 :         int Offset = 0;
     170        2989 :         if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) ==
     171             :             AArch64FrameOffsetCannotUpdate)
     172          11 :           return 0;
     173             :       }
     174             :     }
     175             :   }
     176             :   return DefaultSafeSPDisplacement;
     177             : }
     178             : 
     179       16948 : bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
     180       16948 :   if (!EnableRedZone)
     181             :     return false;
     182             :   // Don't use the red zone if the function explicitly asks us not to.
     183             :   // This is typically used for kernel code.
     184         142 :   if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone))
     185             :     return false;
     186             : 
     187          71 :   const MachineFrameInfo &MFI = MF.getFrameInfo();
     188             :   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
     189          71 :   unsigned NumBytes = AFI->getLocalStackSize();
     190             : 
     191          71 :   return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128);
     192             : }
     193             : 
     194             : /// hasFP - Return true if the specified function should have a dedicated frame
     195             : /// pointer register.
     196       99194 : bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
     197       99194 :   const MachineFrameInfo &MFI = MF.getFrameInfo();
     198       99194 :   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
     199             :   // Retain behavior of always omitting the FP for leaf functions when possible.
     200       99194 :   if (MFI.hasCalls() && MF.getTarget().Options.DisableFramePointerElim(MF))
     201             :     return true;
     202      290638 :   if (MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
     203      386267 :       MFI.hasStackMap() || MFI.hasPatchPoint() ||
     204       95937 :       RegInfo->needsStackRealignment(MF))
     205             :     return true;
     206             :   // With large callframes around we may need to use FP to access the scavenging
     207             :   // emergency spillslot.
     208             :   //
     209             :   // Unfortunately some calls to hasFP() like machine verifier ->
     210             :   // getReservedReg() -> hasFP in the middle of global isel are too early
     211             :   // to know the max call frame size. Hopefully conservatively returning "true"
     212             :   // in those cases is fine.
     213             :   // DefaultSafeSPDisplacement is fine as we only emergency spill GP regs.
     214       95750 :   if (!MFI.isMaxCallFrameSizeComputed() ||
     215             :       MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement)
     216             :     return true;
     217             : 
     218             :   return false;
     219             : }
     220             : 
     221             : /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
     222             : /// not required, we reserve argument space for call sites in the function
     223             : /// immediately on entry to the current function.  This eliminates the need for
     224             : /// add/sub sp brackets around call sites.  Returns true if the call frame is
     225             : /// included as part of the stack frame.
     226             : bool
     227       10504 : AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
     228       10504 :   return !MF.getFrameInfo().hasVarSizedObjects();
     229             : }
     230             : 
     231        3578 : MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
     232             :     MachineFunction &MF, MachineBasicBlock &MBB,
     233             :     MachineBasicBlock::iterator I) const {
     234             :   const AArch64InstrInfo *TII =
     235        3578 :       static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
     236             :   DebugLoc DL = I->getDebugLoc();
     237        3578 :   unsigned Opc = I->getOpcode();
     238        3578 :   bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
     239        3578 :   uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
     240             : 
     241        3578 :   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
     242        3578 :   if (!TFI->hasReservedCallFrame(MF)) {
     243         132 :     unsigned Align = getStackAlignment();
     244             : 
     245         132 :     int64_t Amount = I->getOperand(0).getImm();
     246         264 :     Amount = alignTo(Amount, Align);
     247         132 :     if (!IsDestroy)
     248          66 :       Amount = -Amount;
     249             : 
     250             :     // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
     251             :     // doesn't have to pop anything), then the first operand will be zero too so
     252             :     // this adjustment is a no-op.
     253         132 :     if (CalleePopAmount == 0) {
     254             :       // FIXME: in-function stack adjustment for calls is limited to 24-bits
     255             :       // because there's no guaranteed temporary register available.
     256             :       //
     257             :       // ADD/SUB (immediate) has only LSL #0 and LSL #12 available.
     258             :       // 1) For offset <= 12-bit, we use LSL #0
     259             :       // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
     260             :       // LSL #0, and the other uses LSL #12.
     261             :       //
     262             :       // Most call frames will be allocated at the start of a function so
     263             :       // this is OK, but it is a limitation that needs dealing with.
     264             :       assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
     265         131 :       emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII);
     266             :     }
     267        3446 :   } else if (CalleePopAmount != 0) {
     268             :     // If the calling convention demands that the callee pops arguments from the
     269             :     // stack, we want to add it back if we have a reserved call frame.
     270             :     assert(CalleePopAmount < 0xffffff && "call frame too large");
     271          12 :     emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount,
     272             :                     TII);
     273             :   }
     274        7156 :   return MBB.erase(I);
     275             : }
     276             : 
     277         928 : void AArch64FrameLowering::emitCalleeSavedFrameMoves(
     278             :     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
     279         928 :   MachineFunction &MF = *MBB.getParent();
     280         928 :   MachineFrameInfo &MFI = MF.getFrameInfo();
     281         928 :   const TargetSubtargetInfo &STI = MF.getSubtarget();
     282         928 :   const MCRegisterInfo *MRI = STI.getRegisterInfo();
     283         928 :   const TargetInstrInfo *TII = STI.getInstrInfo();
     284             :   DebugLoc DL = MBB.findDebugLoc(MBBI);
     285             : 
     286             :   // Add callee saved registers to move list.
     287             :   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
     288         928 :   if (CSI.empty())
     289             :     return;
     290             : 
     291        3075 :   for (const auto &Info : CSI) {
     292        2147 :     unsigned Reg = Info.getReg();
     293             :     int64_t Offset =
     294        4294 :         MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea();
     295        2147 :     unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
     296             :     unsigned CFIIndex = MF.addFrameInst(
     297        4294 :         MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
     298        6441 :     BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
     299             :         .addCFIIndex(CFIIndex)
     300             :         .setMIFlags(MachineInstr::FrameSetup);
     301             :   }
     302             : }
     303             : 
     304             : // Find a scratch register that we can use at the start of the prologue to
     305             : // re-align the stack pointer.  We avoid using callee-save registers since they
     306             : // may appear to be free when this is called from canUseAsPrologue (during
     307             : // shrink wrapping), but then no longer be free when this is called from
     308             : // emitPrologue.
     309             : //
     310             : // FIXME: This is a bit conservative, since in the above case we could use one
     311             : // of the callee-save registers as a scratch temp to re-align the stack pointer,
     312             : // but we would then have to make sure that we were in fact saving at least one
     313             : // callee-save register in the prologue, which is additional complexity that
     314             : // doesn't seem worth the benefit.
     315          26 : static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
     316          26 :   MachineFunction *MF = MBB->getParent();
     317             : 
     318             :   // If MBB is an entry block, use X9 as the scratch register
     319          26 :   if (&MF->front() == MBB)
     320             :     return AArch64::X9;
     321             : 
     322           9 :   const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
     323             :   const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
     324           9 :   LivePhysRegs LiveRegs(TRI);
     325           9 :   LiveRegs.addLiveIns(*MBB);
     326             : 
     327             :   // Mark callee saved registers as used so we will not choose them.
     328           9 :   const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF);
     329         369 :   for (unsigned i = 0; CSRegs[i]; ++i)
     330         180 :     LiveRegs.addReg(CSRegs[i]);
     331             : 
     332             :   // Prefer X9 since it was historically used for the prologue scratch reg.
     333           9 :   const MachineRegisterInfo &MRI = MF->getRegInfo();
     334           9 :   if (LiveRegs.available(MRI, AArch64::X9))
     335             :     return AArch64::X9;
     336             : 
     337          74 :   for (unsigned Reg : AArch64::GPR64RegClass) {
     338          36 :     if (LiveRegs.available(MRI, Reg))
     339             :       return Reg;
     340             :   }
     341             :   return AArch64::NoRegister;
     342             : }
     343             : 
     344          76 : bool AArch64FrameLowering::canUseAsPrologue(
     345             :     const MachineBasicBlock &MBB) const {
     346          76 :   const MachineFunction *MF = MBB.getParent();
     347             :   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
     348          76 :   const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
     349             :   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
     350             : 
     351             :   // Don't need a scratch register if we're not going to re-align the stack.
     352          76 :   if (!RegInfo->needsStackRealignment(*MF))
     353             :     return true;
     354             :   // Otherwise, we can use any block as long as it has a scratch register
     355             :   // available.
     356           7 :   return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
     357             : }
     358             : 
     359       27755 : static bool windowsRequiresStackProbe(MachineFunction &MF,
     360             :                                       unsigned StackSizeInBytes) {
     361       27755 :   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
     362       27755 :   if (!Subtarget.isTargetWindows())
     363             :     return false;
     364         107 :   const Function &F = MF.getFunction();
     365             :   // TODO: When implementing stack protectors, take that into account
     366             :   // for the probe threshold.
     367         107 :   unsigned StackProbeSize = 4096;
     368         107 :   if (F.hasFnAttribute("stack-probe-size"))
     369           0 :     F.getFnAttribute("stack-probe-size")
     370           0 :         .getValueAsString()
     371           0 :         .getAsInteger(0, StackProbeSize);
     372         107 :   return StackSizeInBytes >= StackProbeSize;
     373             : }
     374             : 
     375       14764 : bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
     376             :     MachineFunction &MF, unsigned StackBumpBytes) const {
     377             :   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
     378       14764 :   const MachineFrameInfo &MFI = MF.getFrameInfo();
     379       14764 :   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
     380             :   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
     381             : 
     382       14764 :   if (AFI->getLocalStackSize() == 0)
     383             :     return false;
     384             : 
     385             :   // 512 is the maximum immediate for stp/ldp that will be used for
     386             :   // callee-save save/restores
     387        1220 :   if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes))
     388             :     return false;
     389             : 
     390        1177 :   if (MFI.hasVarSizedObjects())
     391             :     return false;
     392             : 
     393        1143 :   if (RegInfo->needsStackRealignment(MF))
     394             :     return false;
     395             : 
     396             :   // This isn't strictly necessary, but it simplifies things a bit since the
     397             :   // current RedZone handling code assumes the SP is adjusted by the
     398             :   // callee-save save/restore code.
     399        1121 :   if (canUseRedZone(MF))
     400             :     return false;
     401             : 
     402        1117 :   return true;
     403             : }
     404             : 
     405             : // Convert callee-save register save/restore instruction to do stack pointer
     406             : // decrement/increment to allocate/deallocate the callee-save stack area by
     407             : // converting store/load to use pre/post increment version.
     408        1498 : static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
     409             :     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
     410             :     const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) {
     411             :   unsigned NewOpc;
     412             :   bool NewIsUnscaled = false;
     413        2996 :   switch (MBBI->getOpcode()) {
     414           0 :   default:
     415           0 :     llvm_unreachable("Unexpected callee-save save/restore opcode!");
     416             :   case AArch64::STPXi:
     417             :     NewOpc = AArch64::STPXpre;
     418             :     break;
     419          36 :   case AArch64::STPDi:
     420             :     NewOpc = AArch64::STPDpre;
     421          36 :     break;
     422         274 :   case AArch64::STRXui:
     423             :     NewOpc = AArch64::STRXpre;
     424             :     NewIsUnscaled = true;
     425         274 :     break;
     426          36 :   case AArch64::STRDui:
     427             :     NewOpc = AArch64::STRDpre;
     428             :     NewIsUnscaled = true;
     429          36 :     break;
     430         394 :   case AArch64::LDPXi:
     431             :     NewOpc = AArch64::LDPXpost;
     432         394 :     break;
     433          36 :   case AArch64::LDPDi:
     434             :     NewOpc = AArch64::LDPDpost;
     435          36 :     break;
     436         287 :   case AArch64::LDRXui:
     437             :     NewOpc = AArch64::LDRXpost;
     438             :     NewIsUnscaled = true;
     439         287 :     break;
     440          36 :   case AArch64::LDRDui:
     441             :     NewOpc = AArch64::LDRDpost;
     442             :     NewIsUnscaled = true;
     443          36 :     break;
     444             :   }
     445             : 
     446        2996 :   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
     447        1498 :   MIB.addReg(AArch64::SP, RegState::Define);
     448             : 
     449             :   // Copy all operands other than the immediate offset.
     450             :   unsigned OpndIdx = 0;
     451        5359 :   for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
     452             :        ++OpndIdx)
     453        3861 :     MIB.add(MBBI->getOperand(OpndIdx));
     454             : 
     455             :   assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
     456             :          "Unexpected immediate offset in first/last callee-save save/restore "
     457             :          "instruction!");
     458             :   assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
     459             :          "Unexpected base register in callee-save save/restore instruction!");
     460             :   // Last operand is immediate offset that needs fixing.
     461             :   assert(CSStackSizeInc % 8 == 0);
     462        1498 :   int64_t CSStackSizeIncImm = CSStackSizeInc;
     463        1498 :   if (!NewIsUnscaled)
     464         865 :     CSStackSizeIncImm /= 8;
     465             :   MIB.addImm(CSStackSizeIncImm);
     466             : 
     467        1498 :   MIB.setMIFlags(MBBI->getFlags());
     468        1498 :   MIB.setMemRefs(MBBI->memoperands_begin(), MBBI->memoperands_end());
     469             : 
     470        2996 :   return std::prev(MBB.erase(MBBI));
     471             : }
     472             : 
     473             : // Fixup callee-save register save/restore instructions to take into account
     474             : // combined SP bump by adding the local stack size to the stack offsets.
     475        1568 : static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
     476             :                                               unsigned LocalStackSize) {
     477             :   unsigned Opc = MI.getOpcode();
     478             :   (void)Opc;
     479             :   assert((Opc == AArch64::STPXi || Opc == AArch64::STPDi ||
     480             :           Opc == AArch64::STRXui || Opc == AArch64::STRDui ||
     481             :           Opc == AArch64::LDPXi || Opc == AArch64::LDPDi ||
     482             :           Opc == AArch64::LDRXui || Opc == AArch64::LDRDui) &&
     483             :          "Unexpected callee-save save/restore opcode!");
     484             : 
     485        1568 :   unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
     486             :   assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
     487             :          "Unexpected base register in callee-save save/restore instruction!");
     488             :   // Last operand is immediate offset that needs fixing.
     489        1568 :   MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
     490             :   // All generated opcodes have scaled offsets.
     491             :   assert(LocalStackSize % 8 == 0);
     492        1568 :   OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8);
     493        1568 : }
     494             : 
     495       13337 : void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
     496             :                                         MachineBasicBlock &MBB) const {
     497       13337 :   MachineBasicBlock::iterator MBBI = MBB.begin();
     498       13337 :   const MachineFrameInfo &MFI = MF.getFrameInfo();
     499       13337 :   const Function &F = MF.getFunction();
     500       13337 :   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
     501             :   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
     502             :   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
     503       13337 :   MachineModuleInfo &MMI = MF.getMMI();
     504             :   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
     505       13337 :   bool needsFrameMoves = MMI.hasDebugInfo() || F.needsUnwindTableEntry();
     506       13337 :   bool HasFP = hasFP(MF);
     507             : 
     508             :   // Debug location must be unknown since the first debug location is used
     509             :   // to determine the end of the prologue.
     510       13337 :   DebugLoc DL;
     511             : 
     512             :   // All calls are tail calls in GHC calling conv, and functions have no
     513             :   // prologue/epilogue.
     514       26674 :   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
     515             :     return;
     516             : 
     517       13333 :   int NumBytes = (int)MFI.getStackSize();
     518       13333 :   if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
     519             :     assert(!HasFP && "unexpected function without stack frame but with FP");
     520             : 
     521             :     // All of the stack allocation is for locals.
     522             :     AFI->setLocalStackSize(NumBytes);
     523             : 
     524       12140 :     if (!NumBytes)
     525             :       return;
     526             :     // REDZONE: If the stack size is less than 128 bytes, we don't need
     527             :     // to actually allocate.
     528         212 :     if (canUseRedZone(MF))
     529             :       ++NumRedZoneFunctions;
     530             :     else {
     531         210 :       emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
     532             :                       MachineInstr::FrameSetup);
     533             : 
     534             :       // Label used to tie together the PROLOG_LABEL and the MachineMoves.
     535         210 :       MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
     536             :       // Encode the stack size of the leaf function.
     537             :       unsigned CFIIndex = MF.addFrameInst(
     538         210 :           MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
     539         630 :       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
     540             :           .addCFIIndex(CFIIndex)
     541             :           .setMIFlags(MachineInstr::FrameSetup);
     542             :     }
     543             :     return;
     544             :   }
     545             : 
     546             :   bool IsWin64 =
     547        1193 :       Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
     548        1147 :   unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
     549             : 
     550        1193 :   auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
     551             :   // All of the remaining stack allocations are for locals.
     552        1193 :   AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
     553             : 
     554        1193 :   bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
     555        1193 :   if (CombineSPBump) {
     556         448 :     emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
     557             :                     MachineInstr::FrameSetup);
     558             :     NumBytes = 0;
     559         745 :   } else if (PrologueSaveSize != 0) {
     560         745 :     MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII,
     561         745 :                                                      -PrologueSaveSize);
     562         745 :     NumBytes -= PrologueSaveSize;
     563             :   }
     564             :   assert(NumBytes >= 0 && "Negative stack allocation size!?");
     565             : 
     566             :   // Move past the saves of the callee-saved registers, fixing up the offsets
     567             :   // and pre-inc if we decided to combine the callee-save and local stack
     568             :   // pointer bump above.
     569             :   MachineBasicBlock::iterator End = MBB.end();
     570        3105 :   while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) {
     571        1912 :     if (CombineSPBump)
     572         779 :       fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize());
     573             :     ++MBBI;
     574             :   }
     575        1193 :   if (HasFP) {
     576             :     // Only set up FP if we actually need to. Frame pointer is fp =
     577             :     // sp - fixedobject - 16.
     578         279 :     int FPOffset = AFI->getCalleeSavedStackSize() - 16;
     579         279 :     if (CombineSPBump)
     580         102 :       FPOffset += AFI->getLocalStackSize();
     581             : 
     582             :     // Issue    sub fp, sp, FPOffset or
     583             :     //          mov fp,sp          when FPOffset is zero.
     584             :     // Note: All stores of callee-saved registers are marked as "FrameSetup".
     585             :     // This code marks the instruction(s) that set the FP also.
     586         279 :     emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
     587             :                     MachineInstr::FrameSetup);
     588             :   }
     589             : 
     590        1193 :   if (windowsRequiresStackProbe(MF, NumBytes)) {
     591           2 :     uint32_t NumWords = NumBytes >> 4;
     592             : 
     593           6 :     BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
     594           2 :         .addImm(NumWords)
     595             :         .setMIFlags(MachineInstr::FrameSetup);
     596             : 
     597           2 :     switch (MF.getTarget().getCodeModel()) {
     598           1 :     case CodeModel::Small:
     599             :     case CodeModel::Medium:
     600             :     case CodeModel::Kernel:
     601           3 :       BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
     602             :           .addExternalSymbol("__chkstk")
     603           1 :           .addReg(AArch64::X15, RegState::Implicit)
     604             :           .setMIFlags(MachineInstr::FrameSetup);
     605           1 :       break;
     606           1 :     case CodeModel::Large:
     607           3 :       BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
     608           1 :           .addReg(AArch64::X16, RegState::Define)
     609             :           .addExternalSymbol("__chkstk")
     610             :           .addExternalSymbol("__chkstk")
     611             :           .setMIFlags(MachineInstr::FrameSetup);
     612             : 
     613           3 :       BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR))
     614           1 :           .addReg(AArch64::X16, RegState::Kill)
     615           1 :           .addReg(AArch64::X15, RegState::Implicit | RegState::Define)
     616             :           .setMIFlags(MachineInstr::FrameSetup);
     617           1 :       break;
     618             :     }
     619             : 
     620           6 :     BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
     621           2 :         .addReg(AArch64::SP, RegState::Kill)
     622           2 :         .addReg(AArch64::X15, RegState::Kill)
     623           2 :         .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
     624             :         .setMIFlags(MachineInstr::FrameSetup);
     625             :     NumBytes = 0;
     626             :   }
     627             : 
     628             :   // Allocate space for the rest of the frame.
     629        1191 :   if (NumBytes) {
     630          49 :     const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
     631             :     unsigned scratchSPReg = AArch64::SP;
     632             : 
     633          49 :     if (NeedsRealignment) {
     634          19 :       scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
     635             :       assert(scratchSPReg != AArch64::NoRegister);
     636             :     }
     637             : 
     638             :     // If we're a leaf function, try using the red zone.
     639          49 :     if (!canUseRedZone(MF))
     640             :       // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
     641             :       // the correct value here, as NumBytes also includes padding bytes,
     642             :       // which shouldn't be counted here.
     643          48 :       emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
     644             :                       MachineInstr::FrameSetup);
     645             : 
     646          49 :     if (NeedsRealignment) {
     647          19 :       const unsigned Alignment = MFI.getMaxAlignment();
     648          19 :       const unsigned NrBitsToZero = countTrailingZeros(Alignment);
     649             :       assert(NrBitsToZero > 1);
     650             :       assert(scratchSPReg != AArch64::SP);
     651             : 
     652             :       // SUB X9, SP, NumBytes
     653             :       //   -- X9 is temporary register, so shouldn't contain any live data here,
     654             :       //   -- free to use. This is already produced by emitFrameOffset above.
     655             :       // AND SP, X9, 0b11111...0000
     656             :       // The logical immediates have a non-trivial encoding. The following
     657             :       // formula computes the encoded immediate with all ones but
     658             :       // NrBitsToZero zero bits as least significant bits.
     659          19 :       uint32_t andMaskEncoded = (1 << 12)                         // = N
     660          19 :                                 | ((64 - NrBitsToZero) << 6)      // immr
     661          19 :                                 | ((64 - NrBitsToZero - 1) << 0); // imms
     662             : 
     663          57 :       BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
     664          19 :           .addReg(scratchSPReg, RegState::Kill)
     665          19 :           .addImm(andMaskEncoded);
     666             :       AFI->setStackRealigned(true);
     667             :     }
     668             :   }
     669             : 
     670             :   // If we need a base pointer, set it up here. It's whatever the value of the
     671             :   // stack pointer is at this point. Any variable size objects will be allocated
     672             :   // after this, so we can still use the base pointer to reference locals.
     673             :   //
     674             :   // FIXME: Clarify FrameSetup flags here.
     675             :   // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
     676             :   // needed.
     677        1193 :   if (RegInfo->hasBasePointer(MF)) {
     678          14 :     TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
     679          14 :                      false);
     680             :   }
     681             : 
     682        1193 :   if (needsFrameMoves) {
     683         928 :     const DataLayout &TD = MF.getDataLayout();
     684         928 :     const int StackGrowth = -TD.getPointerSize(0);
     685         928 :     unsigned FramePtr = RegInfo->getFrameRegister(MF);
     686             :     // An example of the prologue:
     687             :     //
     688             :     //     .globl __foo
     689             :     //     .align 2
     690             :     //  __foo:
     691             :     // Ltmp0:
     692             :     //     .cfi_startproc
     693             :     //     .cfi_personality 155, ___gxx_personality_v0
     694             :     // Leh_func_begin:
     695             :     //     .cfi_lsda 16, Lexception33
     696             :     //
     697             :     //     stp  xa,bx, [sp, -#offset]!
     698             :     //     ...
     699             :     //     stp  x28, x27, [sp, #offset-32]
     700             :     //     stp  fp, lr, [sp, #offset-16]
     701             :     //     add  fp, sp, #offset - 16
     702             :     //     sub  sp, sp, #1360
     703             :     //
     704             :     // The Stack:
     705             :     //       +-------------------------------------------+
     706             :     // 10000 | ........ | ........ | ........ | ........ |
     707             :     // 10004 | ........ | ........ | ........ | ........ |
     708             :     //       +-------------------------------------------+
     709             :     // 10008 | ........ | ........ | ........ | ........ |
     710             :     // 1000c | ........ | ........ | ........ | ........ |
     711             :     //       +===========================================+
     712             :     // 10010 |                X28 Register               |
     713             :     // 10014 |                X28 Register               |
     714             :     //       +-------------------------------------------+
     715             :     // 10018 |                X27 Register               |
     716             :     // 1001c |                X27 Register               |
     717             :     //       +===========================================+
     718             :     // 10020 |                Frame Pointer              |
     719             :     // 10024 |                Frame Pointer              |
     720             :     //       +-------------------------------------------+
     721             :     // 10028 |                Link Register              |
     722             :     // 1002c |                Link Register              |
     723             :     //       +===========================================+
     724             :     // 10030 | ........ | ........ | ........ | ........ |
     725             :     // 10034 | ........ | ........ | ........ | ........ |
     726             :     //       +-------------------------------------------+
     727             :     // 10038 | ........ | ........ | ........ | ........ |
     728             :     // 1003c | ........ | ........ | ........ | ........ |
     729             :     //       +-------------------------------------------+
     730             :     //
     731             :     //     [sp] = 10030        ::    >>initial value<<
     732             :     //     sp = 10020          ::  stp fp, lr, [sp, #-16]!
     733             :     //     fp = sp == 10020    ::  mov fp, sp
     734             :     //     [sp] == 10020       ::  stp x28, x27, [sp, #-16]!
     735             :     //     sp == 10010         ::    >>final value<<
     736             :     //
     737             :     // The frame pointer (w29) points to address 10020. If we use an offset of
     738             :     // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
     739             :     // for w27, and -32 for w28:
     740             :     //
     741             :     //  Ltmp1:
     742             :     //     .cfi_def_cfa w29, 16
     743             :     //  Ltmp2:
     744             :     //     .cfi_offset w30, -8
     745             :     //  Ltmp3:
     746             :     //     .cfi_offset w29, -16
     747             :     //  Ltmp4:
     748             :     //     .cfi_offset w27, -24
     749             :     //  Ltmp5:
     750             :     //     .cfi_offset w28, -32
     751             : 
     752         928 :     if (HasFP) {
     753             :       // Define the current CFA rule to use the provided FP.
     754         173 :       unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
     755         346 :       unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
     756         173 :           nullptr, Reg, 2 * StackGrowth - FixedObject));
     757         519 :       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
     758             :           .addCFIIndex(CFIIndex)
     759             :           .setMIFlags(MachineInstr::FrameSetup);
     760             :     } else {
     761             :       // Encode the stack size of the leaf function.
     762             :       unsigned CFIIndex = MF.addFrameInst(
     763        1510 :           MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize()));
     764        2265 :       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
     765             :           .addCFIIndex(CFIIndex)
     766             :           .setMIFlags(MachineInstr::FrameSetup);
     767             :     }
     768             : 
     769             :     // Now emit the moves for whatever callee saved regs we have (including FP,
     770             :     // LR if those are saved).
     771         928 :     emitCalleeSavedFrameMoves(MBB, MBBI);
     772             :   }
     773             : }
     774             : 
     775       13575 : void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
     776             :                                         MachineBasicBlock &MBB) const {
     777       13575 :   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
     778       13575 :   MachineFrameInfo &MFI = MF.getFrameInfo();
     779       13575 :   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
     780             :   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
     781       13575 :   DebugLoc DL;
     782             :   bool IsTailCallReturn = false;
     783       13575 :   if (MBB.end() != MBBI) {
     784             :     DL = MBBI->getDebugLoc();
     785       13575 :     unsigned RetOpcode = MBBI->getOpcode();
     786       13575 :     IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
     787             :       RetOpcode == AArch64::TCRETURNri;
     788             :   }
     789       13575 :   int NumBytes = MFI.getStackSize();
     790             :   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
     791             : 
     792             :   // All calls are tail calls in GHC calling conv, and functions have no
     793             :   // prologue/epilogue.
     794       27150 :   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
     795             :     return;
     796             : 
     797             :   // Initial and residual are named for consistency with the prologue. Note that
     798             :   // in the epilogue, the residual adjustment is executed first.
     799             :   uint64_t ArgumentPopSize = 0;
     800       13571 :   if (IsTailCallReturn) {
     801         192 :     MachineOperand &StackAdjust = MBBI->getOperand(1);
     802             : 
     803             :     // For a tail-call in a callee-pops-arguments environment, some or all of
     804             :     // the stack may actually be in use for the call's arguments, this is
     805             :     // calculated during LowerCall and consumed here...
     806         192 :     ArgumentPopSize = StackAdjust.getImm();
     807             :   } else {
     808             :     // ... otherwise the amount to pop is *all* of the argument space,
     809             :     // conveniently stored in the MachineFunctionInfo by
     810             :     // LowerFormalArguments. This will, of course, be zero for the C calling
     811             :     // convention.
     812       13379 :     ArgumentPopSize = AFI->getArgumentStackToRestore();
     813             :   }
     814             : 
     815             :   // The stack frame should be like below,
     816             :   //
     817             :   //      ----------------------                     ---
     818             :   //      |                    |                      |
     819             :   //      | BytesInStackArgArea|              CalleeArgStackSize
     820             :   //      | (NumReusableBytes) |                (of tail call)
     821             :   //      |                    |                     ---
     822             :   //      |                    |                      |
     823             :   //      ---------------------|        ---           |
     824             :   //      |                    |         |            |
     825             :   //      |   CalleeSavedReg   |         |            |
     826             :   //      | (CalleeSavedStackSize)|      |            |
     827             :   //      |                    |         |            |
     828             :   //      ---------------------|         |         NumBytes
     829             :   //      |                    |     StackSize  (StackAdjustUp)
     830             :   //      |   LocalStackSize   |         |            |
     831             :   //      | (covering callee   |         |            |
     832             :   //      |       args)        |         |            |
     833             :   //      |                    |         |            |
     834             :   //      ----------------------        ---          ---
     835             :   //
     836             :   // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
     837             :   //             = StackSize + ArgumentPopSize
     838             :   //
     839             :   // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
     840             :   // it as the 2nd argument of AArch64ISD::TC_RETURN.
     841             : 
     842             :   bool IsWin64 =
     843             :       Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
     844       13528 :   unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
     845             : 
     846       13571 :   auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
     847       13571 :   bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
     848             : 
     849       13571 :   if (!CombineSPBump && PrologueSaveSize != 0)
     850         753 :     convertCalleeSaveRestoreToSPPrePostIncDec(
     851         753 :         MBB, std::prev(MBB.getFirstTerminator()), DL, TII, PrologueSaveSize);
     852             : 
     853             :   // Move past the restores of the callee-saved registers.
     854       13571 :   MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
     855             :   MachineBasicBlock::iterator Begin = MBB.begin();
     856       15501 :   while (LastPopI != Begin) {
     857             :     --LastPopI;
     858       15012 :     if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) {
     859             :       ++LastPopI;
     860             :       break;
     861        1930 :     } else if (CombineSPBump)
     862         789 :       fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize());
     863             :   }
     864             : 
     865             :   // If there is a single SP update, insert it before the ret and we're done.
     866       13571 :   if (CombineSPBump) {
     867         669 :     emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
     868         669 :                     NumBytes + ArgumentPopSize, TII,
     869             :                     MachineInstr::FrameDestroy);
     870         669 :     return;
     871             :   }
     872             : 
     873       12902 :   NumBytes -= PrologueSaveSize;
     874             :   assert(NumBytes >= 0 && "Negative stack allocation size!?");
     875             : 
     876       12902 :   if (!hasFP(MF)) {
     877       12727 :     bool RedZone = canUseRedZone(MF);
     878             :     // If this was a redzone leaf function, we don't need to restore the
     879             :     // stack pointer (but we may need to pop stack args for fastcc).
     880       12727 :     if (RedZone && ArgumentPopSize == 0)
     881             :       return;
     882             : 
     883       12675 :     bool NoCalleeSaveRestore = PrologueSaveSize == 0;
     884       12675 :     int StackRestoreBytes = RedZone ? 0 : NumBytes;
     885       12675 :     if (NoCalleeSaveRestore)
     886       12097 :       StackRestoreBytes += ArgumentPopSize;
     887       12675 :     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
     888             :                     StackRestoreBytes, TII, MachineInstr::FrameDestroy);
     889             :     // If we were able to combine the local stack pop with the argument pop,
     890             :     // then we're done.
     891       12675 :     if (NoCalleeSaveRestore || ArgumentPopSize == 0)
     892             :       return;
     893             :     NumBytes = 0;
     894             :   }
     895             : 
     896             :   // Restore the original stack pointer.
     897             :   // FIXME: Rather than doing the math here, we should instead just use
     898             :   // non-post-indexed loads for the restores if we aren't actually going to
     899             :   // be able to save any instructions.
     900         178 :   if (MFI.hasVarSizedObjects() || AFI->isStackRealigned())
     901          50 :     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
     902          50 :                     -AFI->getCalleeSavedStackSize() + 16, TII,
     903             :                     MachineInstr::FrameDestroy);
     904         128 :   else if (NumBytes)
     905           6 :     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII,
     906             :                     MachineInstr::FrameDestroy);
     907             : 
     908             :   // This must be placed after the callee-save restore code because that code
     909             :   // assumes the SP is at the same location as it was after the callee-save save
     910             :   // code in the prologue.
     911         178 :   if (ArgumentPopSize)
     912           3 :     emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
     913             :                     ArgumentPopSize, TII, MachineInstr::FrameDestroy);
     914             : }
     915             : 
     916             : /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
     917             : /// debug info.  It's the same as what we use for resolving the code-gen
     918             : /// references for now.  FIXME: This can go wrong when references are
     919             : /// SP-relative and simple call frames aren't used.
     920          11 : int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
     921             :                                                  int FI,
     922             :                                                  unsigned &FrameReg) const {
     923          11 :   return resolveFrameIndexReference(MF, FI, FrameReg);
     924             : }
     925             : 
     926        3211 : int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
     927             :                                                      int FI, unsigned &FrameReg,
     928             :                                                      bool PreferFP) const {
     929        3211 :   const MachineFrameInfo &MFI = MF.getFrameInfo();
     930             :   const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
     931        3211 :       MF.getSubtarget().getRegisterInfo());
     932             :   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
     933        3211 :   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
     934             :   bool IsWin64 =
     935        3211 :       Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
     936        2858 :   unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
     937        3211 :   int FPOffset = MFI.getObjectOffset(FI) + FixedObject + 16;
     938        3211 :   int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
     939             :   bool isFixed = MFI.isFixedObjectIndex(FI);
     940             : 
     941             :   // Use frame pointer to reference fixed objects. Use it for locals if
     942             :   // there are VLAs or a dynamically realigned SP (and thus the SP isn't
     943             :   // reliable as a base). Make sure useFPForScavengingIndex() does the
     944             :   // right thing for the emergency spill slot.
     945             :   bool UseFP = false;
     946        3211 :   if (AFI->hasStackFrame()) {
     947             :     // Note: Keeping the following as multiple 'if' statements rather than
     948             :     // merging to a single expression for readability.
     949             :     //
     950             :     // Argument access should always use the FP.
     951        2388 :     if (isFixed) {
     952         194 :       UseFP = hasFP(MF);
     953        2757 :     } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) &&
     954         563 :                !RegInfo->needsStackRealignment(MF)) {
     955             :       // Use SP or FP, whichever gives us the best chance of the offset
     956             :       // being in range for direct access. If the FPOffset is positive,
     957             :       // that'll always be best, as the SP will be even further away.
     958             :       // If the FPOffset is negative, we have to keep in mind that the
     959             :       // available offset range for negative offsets is smaller than for
     960             :       // positive ones. If we have variable sized objects, we're stuck with
     961             :       // using the FP regardless, though, as the SP offset is unknown
     962             :       // and we don't have a base pointer available. If an offset is
     963             :       // available via the FP and the SP, use whichever is closest.
     964         489 :       if (PreferFP || MFI.hasVarSizedObjects() || FPOffset >= 0 ||
     965         406 :           (FPOffset >= -256 && Offset > -FPOffset))
     966             :         UseFP = true;
     967             :     }
     968             :   }
     969             : 
     970             :   assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
     971             :          "In the presence of dynamic stack pointer realignment, "
     972             :          "non-argument objects cannot be accessed through the frame pointer");
     973             : 
     974         194 :   if (UseFP) {
     975         357 :     FrameReg = RegInfo->getFrameRegister(MF);
     976         357 :     return FPOffset;
     977             :   }
     978             : 
     979             :   // Use the base pointer if we have one.
     980        2854 :   if (RegInfo->hasBasePointer(MF))
     981          15 :     FrameReg = RegInfo->getBaseRegister();
     982             :   else {
     983        2839 :     FrameReg = AArch64::SP;
     984             :     // If we're using the red zone for this function, the SP won't actually
     985             :     // be adjusted, so the offsets will be negative. They're also all
     986             :     // within range of the signed 9-bit immediate instructions.
     987        2839 :     if (canUseRedZone(MF))
     988           3 :       Offset -= AFI->getLocalStackSize();
     989             :   }
     990             : 
     991             :   return Offset;
     992             : }
     993             : 
     994             : static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
     995             :   // Do not set a kill flag on values that are also marked as live-in. This
     996             :   // happens with the @llvm-returnaddress intrinsic and with arguments passed in
     997             :   // callee saved registers.
     998             :   // Omitting the kill flags is conservatively correct even if the live-in
     999             :   // is not used after all.
    1000        3189 :   bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg);
    1001             :   return getKillRegState(!IsLiveIn);
    1002             : }
    1003             : 
    1004      273915 : static bool produceCompactUnwindFrame(MachineFunction &MF) {
    1005      273915 :   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
    1006      273915 :   AttributeList Attrs = MF.getFunction().getAttributes();
    1007      325973 :   return Subtarget.isTargetMachO() &&
    1008      104116 :          !(Subtarget.getTargetLowering()->supportSwiftError() &&
    1009      325973 :            Attrs.hasAttrSomewhere(Attribute::SwiftError));
    1010             : }
    1011             : 
    1012             : namespace {
    1013             : 
    1014             : struct RegPairInfo {
    1015             :   unsigned Reg1 = AArch64::NoRegister;
    1016             :   unsigned Reg2 = AArch64::NoRegister;
    1017             :   int FrameIdx;
    1018             :   int Offset;
    1019             :   bool IsGPR;
    1020             : 
    1021        3842 :   RegPairInfo() = default;
    1022             : 
    1023             :   bool isPaired() const { return Reg2 != AArch64::NoRegister; }
    1024             : };
    1025             : 
    1026             : } // end anonymous namespace
    1027             : 
    1028        2400 : static void computeCalleeSaveRegisterPairs(
    1029             :     MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI,
    1030             :     const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) {
    1031             : 
    1032        2400 :   if (CSI.empty())
    1033             :     return;
    1034             : 
    1035             :   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
    1036        2400 :   MachineFrameInfo &MFI = MF.getFrameInfo();
    1037             :   CallingConv::ID CC = MF.getFunction().getCallingConv();
    1038        4800 :   unsigned Count = CSI.size();
    1039             :   (void)CC;
    1040             :   // MachO's compact unwind format relies on all registers being stored in
    1041             :   // pairs.
    1042             :   assert((!produceCompactUnwindFrame(MF) ||
    1043             :           CC == CallingConv::PreserveMost ||
    1044             :           (Count & 1) == 0) &&
    1045             :          "Odd number of callee-saved regs to spill!");
    1046        2400 :   int Offset = AFI->getCalleeSavedStackSize();
    1047             : 
    1048       10084 :   for (unsigned i = 0; i < Count; ++i) {
    1049             :     RegPairInfo RPI;
    1050        7684 :     RPI.Reg1 = CSI[i].getReg();
    1051             : 
    1052             :     assert(AArch64::GPR64RegClass.contains(RPI.Reg1) ||
    1053             :            AArch64::FPR64RegClass.contains(RPI.Reg1));
    1054        7684 :     RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1);
    1055             : 
    1056             :     // Add the next reg to the pair if it is in the same register class.
    1057        3842 :     if (i + 1 < Count) {
    1058        5410 :       unsigned NextReg = CSI[i + 1].getReg();
    1059        5036 :       if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) ||
    1060         748 :           (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg)))
    1061        2553 :         RPI.Reg2 = NextReg;
    1062             :     }
    1063             : 
    1064             :     // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
    1065             :     // list to come in sorted by frame index so that we can issue the store
    1066             :     // pair instructions directly. Assert if we see anything otherwise.
    1067             :     //
    1068             :     // The order of the registers in the list is controlled by
    1069             :     // getCalleeSavedRegs(), so they will always be in-order, as well.
    1070             :     assert((!RPI.isPaired() ||
    1071             :             (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) &&
    1072             :            "Out of order callee saved regs!");
    1073             : 
    1074             :     // MachO's compact unwind format relies on all registers being stored in
    1075             :     // adjacent register pairs.
    1076             :     assert((!produceCompactUnwindFrame(MF) ||
    1077             :             CC == CallingConv::PreserveMost ||
    1078             :             (RPI.isPaired() &&
    1079             :              ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
    1080             :               RPI.Reg1 + 1 == RPI.Reg2))) &&
    1081             :            "Callee-save registers not saved as adjacent register pair!");
    1082             : 
    1083        3842 :     RPI.FrameIdx = CSI[i].getFrameIdx();
    1084             : 
    1085        3842 :     if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) {
    1086             :       // Round up size of non-pair to pair size if we need to pad the
    1087             :       // callee-save area to ensure 16-byte alignment.
    1088        1029 :       Offset -= 16;
    1089             :       assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
    1090             :       MFI.setObjectAlignment(RPI.FrameIdx, 16);
    1091             :       AFI->setCalleeSaveStackHasFreeSpace(true);
    1092             :     } else
    1093        5626 :       Offset -= RPI.isPaired() ? 16 : 8;
    1094             :     assert(Offset % 8 == 0);
    1095        3842 :     RPI.Offset = Offset / 8;
    1096             :     assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
    1097             :            "Offset out of bounds for LDP/STP immediate");
    1098             : 
    1099        3842 :     RegPairs.push_back(RPI);
    1100        3842 :     if (RPI.isPaired())
    1101             :       ++i;
    1102             :   }
    1103             : }
    1104             : 
    1105        1193 : bool AArch64FrameLowering::spillCalleeSavedRegisters(
    1106             :     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
    1107             :     const std::vector<CalleeSavedInfo> &CSI,
    1108             :     const TargetRegisterInfo *TRI) const {
    1109        1193 :   MachineFunction &MF = *MBB.getParent();
    1110        1193 :   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
    1111        1193 :   DebugLoc DL;
    1112             :   SmallVector<RegPairInfo, 8> RegPairs;
    1113             : 
    1114        1193 :   computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
    1115        1193 :   const MachineRegisterInfo &MRI = MF.getRegInfo();
    1116             : 
    1117        1193 :   for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
    1118             :        ++RPII) {
    1119        1912 :     RegPairInfo RPI = *RPII;
    1120             :     unsigned Reg1 = RPI.Reg1;
    1121             :     unsigned Reg2 = RPI.Reg2;
    1122             :     unsigned StrOpc;
    1123             : 
    1124             :     // Issue sequence of spills for cs regs.  The first spill may be converted
    1125             :     // to a pre-decrement store later by emitPrologue if the callee-save stack
    1126             :     // area allocation can't be combined with the local stack area allocation.
    1127             :     // For example:
    1128             :     //    stp     x22, x21, [sp, #0]     // addImm(+0)
    1129             :     //    stp     x20, x19, [sp, #16]    // addImm(+2)
    1130             :     //    stp     fp, lr, [sp, #32]      // addImm(+4)
    1131             :     // Rationale: This sequence saves uop updates compared to a sequence of
    1132             :     // pre-increment spills like stp xi,xj,[sp,#-16]!
    1133             :     // Note: Similar rationale and sequence for restores in epilog.
    1134        1912 :     if (RPI.IsGPR)
    1135        1655 :       StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
    1136             :     else
    1137         257 :       StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
    1138             :     DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
    1139             :           if (RPI.isPaired())
    1140             :             dbgs() << ", " << printReg(Reg2, TRI);
    1141             :           dbgs() << ") -> fi#(" << RPI.FrameIdx;
    1142             :           if (RPI.isPaired())
    1143             :             dbgs() << ", " << RPI.FrameIdx+1;
    1144             :           dbgs() << ")\n");
    1145             : 
    1146        3824 :     MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
    1147        1912 :     if (!MRI.isReserved(Reg1))
    1148        1898 :       MBB.addLiveIn(Reg1);
    1149        1912 :     if (RPI.isPaired()) {
    1150        1277 :       if (!MRI.isReserved(Reg2))
    1151         822 :         MBB.addLiveIn(Reg2);
    1152        1277 :       MIB.addReg(Reg2, getPrologueDeath(MF, Reg2));
    1153             :       MIB.addMemOperand(MF.getMachineMemOperand(
    1154             :           MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
    1155        2554 :           MachineMemOperand::MOStore, 8, 8));
    1156             :     }
    1157        1912 :     MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
    1158        1912 :         .addReg(AArch64::SP)
    1159        1912 :         .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit
    1160             :         .setMIFlag(MachineInstr::FrameSetup);
    1161             :     MIB.addMemOperand(MF.getMachineMemOperand(
    1162             :         MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
    1163        3824 :         MachineMemOperand::MOStore, 8, 8));
    1164             :   }
    1165        1193 :   return true;
    1166             : }
    1167             : 
    1168        1207 : bool AArch64FrameLowering::restoreCalleeSavedRegisters(
    1169             :     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
    1170             :     std::vector<CalleeSavedInfo> &CSI,
    1171             :     const TargetRegisterInfo *TRI) const {
    1172        1207 :   MachineFunction &MF = *MBB.getParent();
    1173        1207 :   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
    1174        1207 :   DebugLoc DL;
    1175             :   SmallVector<RegPairInfo, 8> RegPairs;
    1176             : 
    1177        1207 :   if (MI != MBB.end())
    1178             :     DL = MI->getDebugLoc();
    1179             : 
    1180        1207 :   computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
    1181             : 
    1182        5067 :   for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE;
    1183             :        ++RPII) {
    1184        1930 :     RegPairInfo RPI = *RPII;
    1185             :     unsigned Reg1 = RPI.Reg1;
    1186             :     unsigned Reg2 = RPI.Reg2;
    1187             : 
    1188             :     // Issue sequence of restores for cs regs. The last restore may be converted
    1189             :     // to a post-increment load later by emitEpilogue if the callee-save stack
    1190             :     // area allocation can't be combined with the local stack area allocation.
    1191             :     // For example:
    1192             :     //    ldp     fp, lr, [sp, #32]       // addImm(+4)
    1193             :     //    ldp     x20, x19, [sp, #16]     // addImm(+2)
    1194             :     //    ldp     x22, x21, [sp, #0]      // addImm(+0)
    1195             :     // Note: see comment in spillCalleeSavedRegisters()
    1196             :     unsigned LdrOpc;
    1197        1930 :     if (RPI.IsGPR)
    1198        1673 :       LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
    1199             :     else
    1200         257 :       LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
    1201             :     DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
    1202             :           if (RPI.isPaired())
    1203             :             dbgs() << ", " << printReg(Reg2, TRI);
    1204             :           dbgs() << ") -> fi#(" << RPI.FrameIdx;
    1205             :           if (RPI.isPaired())
    1206             :             dbgs() << ", " << RPI.FrameIdx+1;
    1207             :           dbgs() << ")\n");
    1208             : 
    1209        3860 :     MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
    1210        1930 :     if (RPI.isPaired()) {
    1211        1276 :       MIB.addReg(Reg2, getDefRegState(true));
    1212             :       MIB.addMemOperand(MF.getMachineMemOperand(
    1213             :           MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
    1214        2552 :           MachineMemOperand::MOLoad, 8, 8));
    1215             :     }
    1216        1930 :     MIB.addReg(Reg1, getDefRegState(true))
    1217        1930 :         .addReg(AArch64::SP)
    1218        1930 :         .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit
    1219             :         .setMIFlag(MachineInstr::FrameDestroy);
    1220             :     MIB.addMemOperand(MF.getMachineMemOperand(
    1221             :         MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
    1222        3860 :         MachineMemOperand::MOLoad, 8, 8));
    1223             :   }
    1224        1207 :   return true;
    1225             : }
    1226             : 
    1227       13528 : void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
    1228             :                                                 BitVector &SavedRegs,
    1229             :                                                 RegScavenger *RS) const {
    1230             :   // All calls are tail calls in GHC calling conv, and functions have no
    1231             :   // prologue/epilogue.
    1232       27056 :   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
    1233             :     return;
    1234             : 
    1235       13524 :   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
    1236             :   const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
    1237       13524 :       MF.getSubtarget().getRegisterInfo());
    1238             :   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
    1239             :   unsigned UnspilledCSGPR = AArch64::NoRegister;
    1240             :   unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
    1241             : 
    1242       13524 :   MachineFrameInfo &MFI = MF.getFrameInfo();
    1243       13524 :   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
    1244             : 
    1245       13524 :   unsigned BasePointerReg = RegInfo->hasBasePointer(MF)
    1246       13524 :                                 ? RegInfo->getBaseRegister()
    1247             :                                 : (unsigned)AArch64::NoRegister;
    1248             : 
    1249             :   unsigned SpillEstimate = SavedRegs.count();
    1250      554800 :   for (unsigned i = 0; CSRegs[i]; ++i) {
    1251      270638 :     unsigned Reg = CSRegs[i];
    1252      270638 :     unsigned PairedReg = CSRegs[i ^ 1];
    1253      270638 :     if (Reg == BasePointerReg)
    1254          14 :       SpillEstimate++;
    1255      320612 :     if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg))
    1256       48803 :       SpillEstimate++;
    1257             :   }
    1258       13524 :   SpillEstimate += 2; // Conservatively include FP+LR in the estimate
    1259       13524 :   unsigned StackEstimate = MFI.estimateStackSize(MF) + 8 * SpillEstimate;
    1260             : 
    1261             :   // The frame record needs to be created by saving the appropriate registers
    1262       13524 :   if (hasFP(MF) || windowsRequiresStackProbe(MF, StackEstimate)) {
    1263             :     SavedRegs.set(AArch64::FP);
    1264             :     SavedRegs.set(AArch64::LR);
    1265             :   }
    1266             : 
    1267             :   unsigned ExtraCSSpill = 0;
    1268             :   // Figure out which callee-saved registers to save/restore.
    1269      554800 :   for (unsigned i = 0; CSRegs[i]; ++i) {
    1270      270638 :     const unsigned Reg = CSRegs[i];
    1271             : 
    1272             :     // Add the base pointer register to SavedRegs if it is callee-save.
    1273      270638 :     if (Reg == BasePointerReg)
    1274             :       SavedRegs.set(Reg);
    1275             : 
    1276             :     bool RegUsed = SavedRegs.test(Reg);
    1277      270638 :     unsigned PairedReg = CSRegs[i ^ 1];
    1278      538034 :     if (!RegUsed) {
    1279      694358 :       if (AArch64::GPR64RegClass.contains(Reg) &&
    1280      159566 :           !RegInfo->isReservedReg(MF, Reg)) {
    1281             :         UnspilledCSGPR = Reg;
    1282             :         UnspilledCSGPRPaired = PairedReg;
    1283             :       }
    1284      267396 :       continue;
    1285             :     }
    1286             : 
    1287             :     // MachO's compact unwind format relies on all registers being stored in
    1288             :     // pairs.
    1289             :     // FIXME: the usual format is actually better if unwinding isn't needed.
    1290        4821 :     if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) {
    1291             :       SavedRegs.set(PairedReg);
    1292         740 :       if (AArch64::GPR64RegClass.contains(PairedReg) &&
    1293         236 :           !RegInfo->isReservedReg(MF, PairedReg))
    1294             :         ExtraCSSpill = PairedReg;
    1295             :     }
    1296             :   }
    1297             : 
    1298             :   DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
    1299             :         for (unsigned Reg : SavedRegs.set_bits())
    1300             :           dbgs() << ' ' << printReg(Reg, RegInfo);
    1301             :         dbgs() << "\n";);
    1302             : 
    1303             :   // If any callee-saved registers are used, the frame cannot be eliminated.
    1304             :   unsigned NumRegsSpilled = SavedRegs.count();
    1305             :   bool CanEliminateFrame = NumRegsSpilled == 0;
    1306             : 
    1307             :   // The CSR spill slots have not been allocated yet, so estimateStackSize
    1308             :   // won't include them.
    1309       13524 :   unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled;
    1310             :   DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
    1311       13524 :   unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
    1312       13524 :   bool BigStack = (CFSize > EstimatedStackSizeLimit);
    1313       13524 :   if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
    1314             :     AFI->setHasStackFrame(true);
    1315             : 
    1316             :   // Estimate if we might need to scavenge a register at some point in order
    1317             :   // to materialize a stack offset. If so, either spill one additional
    1318             :   // callee-saved register or reserve a special spill slot to facilitate
    1319             :   // register scavenging. If we already spilled an extra callee-saved register
    1320             :   // above to keep the number of spills even, we don't need to do anything else
    1321             :   // here.
    1322       13524 :   if (BigStack) {
    1323          47 :     if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
    1324             :       DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo)
    1325             :                    << " to get a scratch register.\n");
    1326             :       SavedRegs.set(UnspilledCSGPR);
    1327             :       // MachO's compact unwind format relies on all registers being stored in
    1328             :       // pairs, so if we need to spill one extra for BigStack, then we need to
    1329             :       // store the pair.
    1330          35 :       if (produceCompactUnwindFrame(MF))
    1331             :         SavedRegs.set(UnspilledCSGPRPaired);
    1332             :       ExtraCSSpill = UnspilledCSGPRPaired;
    1333             :       NumRegsSpilled = SavedRegs.count();
    1334             :     }
    1335             : 
    1336             :     // If we didn't find an extra callee-saved register to spill, create
    1337             :     // an emergency spill slot.
    1338          47 :     if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) {
    1339          11 :       const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
    1340             :       const TargetRegisterClass &RC = AArch64::GPR64RegClass;
    1341             :       unsigned Size = TRI->getSpillSize(RC);
    1342             :       unsigned Align = TRI->getSpillAlignment(RC);
    1343          11 :       int FI = MFI.CreateStackObject(Size, Align, false);
    1344             :       RS->addScavengingFrameIndex(FI);
    1345             :       DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
    1346             :                    << " as the emergency spill slot.\n");
    1347             :     }
    1348             :   }
    1349             : 
    1350             :   // Round up to register pair alignment to avoid additional SP adjustment
    1351             :   // instructions.
    1352       27048 :   AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16));
    1353             : }
    1354             : 
    1355         407 : bool AArch64FrameLowering::enableStackSlotScavenging(
    1356             :     const MachineFunction &MF) const {
    1357             :   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
    1358         407 :   return AFI->hasCalleeSaveStackFreeSpace();
    1359      245058 : }

Generated by: LCOV version 1.13