LCOV - code coverage report
Current view: top level - lib/Target/AArch64 - AArch64FrameLowering.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 450 453 99.3 %
Date: 2017-09-14 15:23:50 Functions: 23 23 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file contains the AArch64 implementation of TargetFrameLowering class.
      11             : //
      12             : // On AArch64, stack frames are structured as follows:
      13             : //
      14             : // The stack grows downward.
      15             : //
      16             : // All of the individual frame areas on the frame below are optional, i.e. it's
      17             : // possible to create a function so that the particular area isn't present
      18             : // in the frame.
      19             : //
      20             : // At function entry, the "frame" looks as follows:
      21             : //
      22             : // |                                   | Higher address
      23             : // |-----------------------------------|
      24             : // |                                   |
      25             : // | arguments passed on the stack     |
      26             : // |                                   |
      27             : // |-----------------------------------| <- sp
      28             : // |                                   | Lower address
      29             : //
      30             : //
      31             : // After the prologue has run, the frame has the following general structure.
      32             : // Note that this doesn't depict the case where a red-zone is used. Also,
      33             : // technically the last frame area (VLAs) doesn't get created until in the
      34             : // main function body, after the prologue is run. However, it's depicted here
      35             : // for completeness.
      36             : //
      37             : // |                                   | Higher address
      38             : // |-----------------------------------|
      39             : // |                                   |
      40             : // | arguments passed on the stack     |
      41             : // |                                   |
      42             : // |-----------------------------------|
      43             : // |                                   |
      44             : // | (Win64 only) varargs from reg     |
      45             : // |                                   |
      46             : // |-----------------------------------|
      47             : // |                                   |
      48             : // | prev_fp, prev_lr                  |
      49             : // | (a.k.a. "frame record")           |
      50             : // |-----------------------------------| <- fp(=x29)
      51             : // |                                   |
      52             : // | other callee-saved registers      |
      53             : // |                                   |
      54             : // |-----------------------------------|
      55             : // |.empty.space.to.make.part.below....|
      56             : // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
      57             : // |.the.standard.16-byte.alignment....|  compile time; if present)
      58             : // |-----------------------------------|
      59             : // |                                   |
      60             : // | local variables of fixed size     |
      61             : // | including spill slots             |
      62             : // |-----------------------------------| <- bp(not defined by ABI,
      63             : // |.variable-sized.local.variables....|       LLVM chooses X19)
      64             : // |.(VLAs)............................| (size of this area is unknown at
      65             : // |...................................|  compile time)
      66             : // |-----------------------------------| <- sp
      67             : // |                                   | Lower address
      68             : //
      69             : //
      70             : // To access the data in a frame, at-compile time, a constant offset must be
      71             : // computable from one of the pointers (fp, bp, sp) to access it. The size
      72             : // of the areas with a dotted background cannot be computed at compile-time
      73             : // if they are present, making it required to have all three of fp, bp and
      74             : // sp to be set up to be able to access all contents in the frame areas,
      75             : // assuming all of the frame areas are non-empty.
      76             : //
      77             : // For most functions, some of the frame areas are empty. For those functions,
      78             : // it may not be necessary to set up fp or bp:
      79             : // * A base pointer is definitely needed when there are both VLAs and local
      80             : //   variables with more-than-default alignment requirements.
      81             : // * A frame pointer is definitely needed when there are local variables with
      82             : //   more-than-default alignment requirements.
      83             : //
      84             : // In some cases when a base pointer is not strictly needed, it is generated
      85             : // anyway when offsets from the frame pointer to access local variables become
      86             : // so large that the offset can't be encoded in the immediate fields of loads
      87             : // or stores.
      88             : //
      89             : // FIXME: also explain the redzone concept.
      90             : // FIXME: also explain the concept of reserved call frames.
      91             : //
      92             : //===----------------------------------------------------------------------===//
      93             : 
      94             : #include "AArch64FrameLowering.h"
      95             : #include "AArch64InstrInfo.h"
      96             : #include "AArch64MachineFunctionInfo.h"
      97             : #include "AArch64RegisterInfo.h"
      98             : #include "AArch64Subtarget.h"
      99             : #include "AArch64TargetMachine.h"
     100             : #include "llvm/ADT/SmallVector.h"
     101             : #include "llvm/ADT/Statistic.h"
     102             : #include "llvm/CodeGen/LivePhysRegs.h"
     103             : #include "llvm/CodeGen/MachineBasicBlock.h"
     104             : #include "llvm/CodeGen/MachineFrameInfo.h"
     105             : #include "llvm/CodeGen/MachineFunction.h"
     106             : #include "llvm/CodeGen/MachineInstr.h"
     107             : #include "llvm/CodeGen/MachineInstrBuilder.h"
     108             : #include "llvm/CodeGen/MachineMemOperand.h"
     109             : #include "llvm/CodeGen/MachineModuleInfo.h"
     110             : #include "llvm/CodeGen/MachineOperand.h"
     111             : #include "llvm/CodeGen/MachineRegisterInfo.h"
     112             : #include "llvm/CodeGen/RegisterScavenging.h"
     113             : #include "llvm/IR/Attributes.h"
     114             : #include "llvm/IR/CallingConv.h"
     115             : #include "llvm/IR/DataLayout.h"
     116             : #include "llvm/IR/DebugLoc.h"
     117             : #include "llvm/IR/Function.h"
     118             : #include "llvm/MC/MCDwarf.h"
     119             : #include "llvm/Support/CommandLine.h"
     120             : #include "llvm/Support/Debug.h"
     121             : #include "llvm/Support/ErrorHandling.h"
     122             : #include "llvm/Support/MathExtras.h"
     123             : #include "llvm/Support/raw_ostream.h"
     124             : #include "llvm/Target/TargetInstrInfo.h"
     125             : #include "llvm/Target/TargetMachine.h"
     126             : #include "llvm/Target/TargetOptions.h"
     127             : #include "llvm/Target/TargetRegisterInfo.h"
     128             : #include "llvm/Target/TargetSubtargetInfo.h"
     129             : #include <cassert>
     130             : #include <cstdint>
     131             : #include <iterator>
     132             : #include <vector>
     133             : 
     134             : using namespace llvm;
     135             : 
     136             : #define DEBUG_TYPE "frame-info"
     137             : 
     138       72306 : static cl::opt<bool> EnableRedZone("aarch64-redzone",
     139      216918 :                                    cl::desc("enable use of redzone on AArch64"),
     140      289224 :                                    cl::init(false), cl::Hidden);
     141             : 
     142             : STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
     143             : 
     144             : /// Look at each instruction that references stack frames and return the stack
     145             : /// size limit beyond which some of these instructions will require a scratch
     146             : /// register during their expansion later.
     147       11825 : static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
     148             :   // FIXME: For now, just conservatively guestimate based on unscaled indexing
     149             :   // range. We'll end up allocating an unnecessary spill slot a lot, but
     150             :   // realistically that's not a big deal at this stage of the game.
     151       50043 :   for (MachineBasicBlock &MBB : MF) {
     152      178859 :     for (MachineInstr &MI : MBB) {
     153      181546 :       if (MI.isDebugValue() || MI.isPseudo() ||
     154      113752 :           MI.getOpcode() == AArch64::ADDXri ||
     155             :           MI.getOpcode() == AArch64::ADDSXri)
     156        6867 :         continue;
     157             : 
     158      199891 :       for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     159      439410 :         if (!MI.getOperand(i).isFI())
     160      143499 :           continue;
     161             : 
     162        2971 :         int Offset = 0;
     163        2971 :         if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) ==
     164             :             AArch64FrameOffsetCannotUpdate)
     165          11 :           return 0;
     166             :       }
     167             :     }
     168             :   }
     169             :   return 255;
     170             : }
     171             : 
     172       15267 : bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
     173       15267 :   if (!EnableRedZone)
     174             :     return false;
     175             :   // Don't use the red zone if the function explicitly asks us not to.
     176             :   // This is typically used for kernel code.
     177         142 :   if (MF.getFunction()->hasFnAttribute(Attribute::NoRedZone))
     178             :     return false;
     179             : 
     180          71 :   const MachineFrameInfo &MFI = MF.getFrameInfo();
     181          71 :   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
     182          71 :   unsigned NumBytes = AFI->getLocalStackSize();
     183             : 
     184          71 :   return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128);
     185             : }
     186             : 
     187             : /// hasFP - Return true if the specified function should have a dedicated frame
     188             : /// pointer register.
     189       86899 : bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
     190       86899 :   const MachineFrameInfo &MFI = MF.getFrameInfo();
     191       86899 :   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
     192             :   // Retain behavior of always omitting the FP for leaf functions when possible.
     193       10707 :   return (MFI.hasCalls() &&
     194       95859 :           MF.getTarget().Options.DisableFramePointerElim(MF)) ||
     195       84649 :          MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
     196      255195 :          MFI.hasStackMap() || MFI.hasPatchPoint() ||
     197      170835 :          RegInfo->needsStackRealignment(MF);
     198             : }
     199             : 
     200             : /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
     201             : /// not required, we reserve argument space for call sites in the function
     202             : /// immediately on entry to the current function.  This eliminates the need for
     203             : /// add/sub sp brackets around call sites.  Returns true if the call frame is
     204             : /// included as part of the stack frame.
     205             : bool
     206        8944 : AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
     207        8944 :   return !MF.getFrameInfo().hasVarSizedObjects();
     208             : }
     209             : 
     210        3410 : MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
     211             :     MachineFunction &MF, MachineBasicBlock &MBB,
     212             :     MachineBasicBlock::iterator I) const {
     213             :   const AArch64InstrInfo *TII =
     214        3410 :       static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
     215       10230 :   DebugLoc DL = I->getDebugLoc();
     216        6820 :   unsigned Opc = I->getOpcode();
     217        3410 :   bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
     218        5115 :   uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
     219             : 
     220        3410 :   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
     221        3410 :   if (!TFI->hasReservedCallFrame(MF)) {
     222         124 :     unsigned Align = getStackAlignment();
     223             : 
     224         124 :     int64_t Amount = I->getOperand(0).getImm();
     225         248 :     Amount = alignTo(Amount, Align);
     226         124 :     if (!IsDestroy)
     227          62 :       Amount = -Amount;
     228             : 
     229             :     // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
     230             :     // doesn't have to pop anything), then the first operand will be zero too so
     231             :     // this adjustment is a no-op.
     232         124 :     if (CalleePopAmount == 0) {
     233             :       // FIXME: in-function stack adjustment for calls is limited to 24-bits
     234             :       // because there's no guaranteed temporary register available.
     235             :       //
     236             :       // ADD/SUB (immediate) has only LSL #0 and LSL #12 available.
     237             :       // 1) For offset <= 12-bit, we use LSL #0
     238             :       // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
     239             :       // LSL #0, and the other uses LSL #12.
     240             :       //
     241             :       // Most call frames will be allocated at the start of a function so
     242             :       // this is OK, but it is a limitation that needs dealing with.
     243             :       assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
     244         123 :       emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII);
     245             :     }
     246        3286 :   } else if (CalleePopAmount != 0) {
     247             :     // If the calling convention demands that the callee pops arguments from the
     248             :     // stack, we want to add it back if we have a reserved call frame.
     249             :     assert(CalleePopAmount < 0xffffff && "call frame too large");
     250          12 :     emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount,
     251             :                     TII);
     252             :   }
     253        6820 :   return MBB.erase(I);
     254             : }
     255             : 
     256         884 : void AArch64FrameLowering::emitCalleeSavedFrameMoves(
     257             :     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
     258         884 :   MachineFunction &MF = *MBB.getParent();
     259         884 :   MachineFrameInfo &MFI = MF.getFrameInfo();
     260         884 :   const TargetSubtargetInfo &STI = MF.getSubtarget();
     261         884 :   const MCRegisterInfo *MRI = STI.getRegisterInfo();
     262         884 :   const TargetInstrInfo *TII = STI.getInstrInfo();
     263        1768 :   DebugLoc DL = MBB.findDebugLoc(MBBI);
     264             : 
     265             :   // Add callee saved registers to move list.
     266         884 :   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
     267         884 :   if (CSI.empty())
     268           0 :     return;
     269             : 
     270        2947 :   for (const auto &Info : CSI) {
     271        2063 :     unsigned Reg = Info.getReg();
     272             :     int64_t Offset =
     273        4126 :         MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea();
     274        2063 :     unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
     275             :     unsigned CFIIndex = MF.addFrameInst(
     276        8252 :         MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
     277        6189 :     BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
     278        2063 :         .addCFIIndex(CFIIndex)
     279        2063 :         .setMIFlags(MachineInstr::FrameSetup);
     280             :   }
     281             : }
     282             : 
     283             : // Find a scratch register that we can use at the start of the prologue to
     284             : // re-align the stack pointer.  We avoid using callee-save registers since they
     285             : // may appear to be free when this is called from canUseAsPrologue (during
     286             : // shrink wrapping), but then no longer be free when this is called from
     287             : // emitPrologue.
     288             : //
     289             : // FIXME: This is a bit conservative, since in the above case we could use one
     290             : // of the callee-save registers as a scratch temp to re-align the stack pointer,
     291             : // but we would then have to make sure that we were in fact saving at least one
     292             : // callee-save register in the prologue, which is additional complexity that
     293             : // doesn't seem worth the benefit.
     294          26 : static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
     295          26 :   MachineFunction *MF = MBB->getParent();
     296             : 
     297             :   // If MBB is an entry block, use X9 as the scratch register
     298          26 :   if (&MF->front() == MBB)
     299             :     return AArch64::X9;
     300             : 
     301           9 :   const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
     302           9 :   const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
     303          18 :   LivePhysRegs LiveRegs(TRI);
     304           9 :   LiveRegs.addLiveIns(*MBB);
     305             : 
     306             :   // Mark callee saved registers as used so we will not choose them.
     307           9 :   const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF);
     308         189 :   for (unsigned i = 0; CSRegs[i]; ++i)
     309         180 :     LiveRegs.addReg(CSRegs[i]);
     310             : 
     311             :   // Prefer X9 since it was historically used for the prologue scratch reg.
     312           9 :   const MachineRegisterInfo &MRI = MF->getRegInfo();
     313           9 :   if (LiveRegs.available(MRI, AArch64::X9))
     314             :     return AArch64::X9;
     315             : 
     316          43 :   for (unsigned Reg : AArch64::GPR64RegClass) {
     317          36 :     if (LiveRegs.available(MRI, Reg))
     318             :       return Reg;
     319             :   }
     320             :   return AArch64::NoRegister;
     321             : }
     322             : 
     323          76 : bool AArch64FrameLowering::canUseAsPrologue(
     324             :     const MachineBasicBlock &MBB) const {
     325          76 :   const MachineFunction *MF = MBB.getParent();
     326          76 :   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
     327          76 :   const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
     328          76 :   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
     329             : 
     330             :   // Don't need a scratch register if we're not going to re-align the stack.
     331          76 :   if (!RegInfo->needsStackRealignment(*MF))
     332             :     return true;
     333             :   // Otherwise, we can use any block as long as it has a scratch register
     334             :   // available.
     335           7 :   return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
     336             : }
     337             : 
     338       13052 : bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
     339             :     MachineFunction &MF, unsigned StackBumpBytes) const {
     340       13052 :   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
     341       13052 :   const MachineFrameInfo &MFI = MF.getFrameInfo();
     342       13052 :   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
     343       13052 :   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
     344             : 
     345       13052 :   if (AFI->getLocalStackSize() == 0)
     346             :     return false;
     347             : 
     348             :   // 512 is the maximum immediate for stp/ldp that will be used for
     349             :   // callee-save save/restores
     350        1213 :   if (StackBumpBytes >= 512)
     351             :     return false;
     352             : 
     353        1174 :   if (MFI.hasVarSizedObjects())
     354             :     return false;
     355             : 
     356        1142 :   if (RegInfo->needsStackRealignment(MF))
     357             :     return false;
     358             : 
     359             :   // This isn't strictly necessary, but it simplifies things a bit since the
     360             :   // current RedZone handling code assumes the SP is adjusted by the
     361             :   // callee-save save/restore code.
     362        1120 :   if (canUseRedZone(MF))
     363             :     return false;
     364             : 
     365        1116 :   return true;
     366             : }
     367             : 
     368             : // Convert callee-save register save/restore instruction to do stack pointer
     369             : // decrement/increment to allocate/deallocate the callee-save stack area by
     370             : // converting store/load to use pre/post increment version.
     371        1434 : static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
     372             :     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
     373             :     const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) {
     374             :   unsigned NewOpc;
     375        1434 :   bool NewIsUnscaled = false;
     376        2868 :   switch (MBBI->getOpcode()) {
     377           0 :   default:
     378           0 :     llvm_unreachable("Unexpected callee-save save/restore opcode!");
     379             :   case AArch64::STPXi:
     380             :     NewOpc = AArch64::STPXpre;
     381             :     break;
     382          35 :   case AArch64::STPDi:
     383          35 :     NewOpc = AArch64::STPDpre;
     384          35 :     break;
     385         252 :   case AArch64::STRXui:
     386         252 :     NewOpc = AArch64::STRXpre;
     387         252 :     NewIsUnscaled = true;
     388         252 :     break;
     389          34 :   case AArch64::STRDui:
     390          34 :     NewOpc = AArch64::STRDpre;
     391          34 :     NewIsUnscaled = true;
     392          34 :     break;
     393         387 :   case AArch64::LDPXi:
     394         387 :     NewOpc = AArch64::LDPXpost;
     395         387 :     break;
     396          35 :   case AArch64::LDPDi:
     397          35 :     NewOpc = AArch64::LDPDpost;
     398          35 :     break;
     399         265 :   case AArch64::LDRXui:
     400         265 :     NewOpc = AArch64::LDRXpost;
     401         265 :     NewIsUnscaled = true;
     402         265 :     break;
     403          34 :   case AArch64::LDRDui:
     404          34 :     NewOpc = AArch64::LDRDpost;
     405          34 :     NewIsUnscaled = true;
     406          34 :     break;
     407             :   }
     408             : 
     409        2868 :   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
     410        1434 :   MIB.addReg(AArch64::SP, RegState::Define);
     411             : 
     412             :   // Copy all operands other than the immediate offset.
     413        1434 :   unsigned OpndIdx = 0;
     414        5151 :   for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
     415             :        ++OpndIdx)
     416       11151 :     MIB.add(MBBI->getOperand(OpndIdx));
     417             : 
     418             :   assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
     419             :          "Unexpected immediate offset in first/last callee-save save/restore "
     420             :          "instruction!");
     421             :   assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
     422             :          "Unexpected base register in callee-save save/restore instruction!");
     423             :   // Last operand is immediate offset that needs fixing.
     424             :   assert(CSStackSizeInc % 8 == 0);
     425        1434 :   int64_t CSStackSizeIncImm = CSStackSizeInc;
     426        1434 :   if (!NewIsUnscaled)
     427         849 :     CSStackSizeIncImm /= 8;
     428        1434 :   MIB.addImm(CSStackSizeIncImm);
     429             : 
     430        2868 :   MIB.setMIFlags(MBBI->getFlags());
     431        5736 :   MIB.setMemRefs(MBBI->memoperands_begin(), MBBI->memoperands_end());
     432             : 
     433        2868 :   return std::prev(MBB.erase(MBBI));
     434             : }
     435             : 
     436             : // Fixup callee-save register save/restore instructions to take into account
     437             : // combined SP bump by adding the local stack size to the stack offsets.
     438        1482 : static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
     439             :                                               unsigned LocalStackSize) {
     440        1482 :   unsigned Opc = MI.getOpcode();
     441             :   (void)Opc;
     442             :   assert((Opc == AArch64::STPXi || Opc == AArch64::STPDi ||
     443             :           Opc == AArch64::STRXui || Opc == AArch64::STRDui ||
     444             :           Opc == AArch64::LDPXi || Opc == AArch64::LDPDi ||
     445             :           Opc == AArch64::LDRXui || Opc == AArch64::LDRDui) &&
     446             :          "Unexpected callee-save save/restore opcode!");
     447             : 
     448        1482 :   unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
     449             :   assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
     450             :          "Unexpected base register in callee-save save/restore instruction!");
     451             :   // Last operand is immediate offset that needs fixing.
     452        2964 :   MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
     453             :   // All generated opcodes have scaled offsets.
     454             :   assert(LocalStackSize % 8 == 0);
     455        2964 :   OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8);
     456        1482 : }
     457             : 
     458       11661 : void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
     459             :                                         MachineBasicBlock &MBB) const {
     460       11661 :   MachineBasicBlock::iterator MBBI = MBB.begin();
     461       11661 :   const MachineFrameInfo &MFI = MF.getFrameInfo();
     462       11661 :   const Function *Fn = MF.getFunction();
     463       11661 :   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
     464       11661 :   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
     465       11661 :   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
     466       11661 :   MachineModuleInfo &MMI = MF.getMMI();
     467       11661 :   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
     468       11661 :   bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
     469       11661 :   bool HasFP = hasFP(MF);
     470             : 
     471             :   // Debug location must be unknown since the first debug location is used
     472             :   // to determine the end of the prologue.
     473       12807 :   DebugLoc DL;
     474             : 
     475             :   // All calls are tail calls in GHC calling conv, and functions have no
     476             :   // prologue/epilogue.
     477       23322 :   if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
     478       10515 :     return;
     479             : 
     480       11657 :   int NumBytes = (int)MFI.getStackSize();
     481       11657 :   if (!AFI->hasStackFrame()) {
     482             :     assert(!HasFP && "unexpected function without stack frame but with FP");
     483             : 
     484             :     // All of the stack allocation is for locals.
     485       21022 :     AFI->setLocalStackSize(NumBytes);
     486             : 
     487       10511 :     if (!NumBytes)
     488             :       return;
     489             :     // REDZONE: If the stack size is less than 128 bytes, we don't need
     490             :     // to actually allocate.
     491         224 :     if (canUseRedZone(MF))
     492             :       ++NumRedZoneFunctions;
     493             :     else {
     494         222 :       emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
     495             :                       MachineInstr::FrameSetup);
     496             : 
     497             :       // Label used to tie together the PROLOG_LABEL and the MachineMoves.
     498         222 :       MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
     499             :       // Encode the stack size of the leaf function.
     500             :       unsigned CFIIndex = MF.addFrameInst(
     501         888 :           MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
     502         666 :       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
     503         222 :           .addCFIIndex(CFIIndex)
     504         222 :           .setMIFlags(MachineInstr::FrameSetup);
     505             :     }
     506             :     return;
     507             :   }
     508             : 
     509             :   bool IsWin64 =
     510        3377 :       Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
     511        1092 :   unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
     512             : 
     513        1146 :   auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
     514             :   // All of the remaining stack allocations are for locals.
     515        2292 :   AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
     516             : 
     517        1146 :   bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
     518        1146 :   if (CombineSPBump) {
     519         433 :     emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
     520             :                     MachineInstr::FrameSetup);
     521         433 :     NumBytes = 0;
     522         713 :   } else if (PrologueSaveSize != 0) {
     523         713 :     MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII,
     524         713 :                                                      -PrologueSaveSize);
     525         713 :     NumBytes -= PrologueSaveSize;
     526             :   }
     527             :   assert(NumBytes >= 0 && "Negative stack allocation size!?");
     528             : 
     529             :   // Move past the saves of the callee-saved registers, fixing up the offsets
     530             :   // and pre-inc if we decided to combine the callee-save and local stack
     531             :   // pointer bump above.
     532        1146 :   MachineBasicBlock::iterator End = MBB.end();
     533        5941 :   while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) {
     534        1826 :     if (CombineSPBump)
     535        1472 :       fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize());
     536             :     ++MBBI;
     537             :   }
     538        1146 :   if (HasFP) {
     539             :     // Only set up FP if we actually need to. Frame pointer is fp =
     540             :     // sp - fixedobject - 16.
     541         276 :     int FPOffset = AFI->getCalleeSavedStackSize() - 16;
     542         276 :     if (CombineSPBump)
     543         102 :       FPOffset += AFI->getLocalStackSize();
     544             : 
     545             :     // Issue    sub fp, sp, FPOffset or
     546             :     //          mov fp,sp          when FPOffset is zero.
     547             :     // Note: All stores of callee-saved registers are marked as "FrameSetup".
     548             :     // This code marks the instruction(s) that set the FP also.
     549         276 :     emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
     550             :                     MachineInstr::FrameSetup);
     551             :   }
     552             : 
     553             :   // Allocate space for the rest of the frame.
     554        1146 :   if (NumBytes) {
     555          48 :     const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
     556          48 :     unsigned scratchSPReg = AArch64::SP;
     557             : 
     558          48 :     if (NeedsRealignment) {
     559          19 :       scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
     560             :       assert(scratchSPReg != AArch64::NoRegister);
     561             :     }
     562             : 
     563             :     // If we're a leaf function, try using the red zone.
     564          48 :     if (!canUseRedZone(MF))
     565             :       // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
     566             :       // the correct value here, as NumBytes also includes padding bytes,
     567             :       // which shouldn't be counted here.
     568          47 :       emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
     569             :                       MachineInstr::FrameSetup);
     570             : 
     571          48 :     if (NeedsRealignment) {
     572          19 :       const unsigned Alignment = MFI.getMaxAlignment();
     573          19 :       const unsigned NrBitsToZero = countTrailingZeros(Alignment);
     574             :       assert(NrBitsToZero > 1);
     575             :       assert(scratchSPReg != AArch64::SP);
     576             : 
     577             :       // SUB X9, SP, NumBytes
     578             :       //   -- X9 is temporary register, so shouldn't contain any live data here,
     579             :       //   -- free to use. This is already produced by emitFrameOffset above.
     580             :       // AND SP, X9, 0b11111...0000
     581             :       // The logical immediates have a non-trivial encoding. The following
     582             :       // formula computes the encoded immediate with all ones but
     583             :       // NrBitsToZero zero bits as least significant bits.
     584          19 :       uint32_t andMaskEncoded = (1 << 12)                         // = N
     585          19 :                                 | ((64 - NrBitsToZero) << 6)      // immr
     586          19 :                                 | ((64 - NrBitsToZero - 1) << 0); // imms
     587             : 
     588          57 :       BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
     589          19 :           .addReg(scratchSPReg, RegState::Kill)
     590          38 :           .addImm(andMaskEncoded);
     591             :       AFI->setStackRealigned(true);
     592             :     }
     593             :   }
     594             : 
     595             :   // If we need a base pointer, set it up here. It's whatever the value of the
     596             :   // stack pointer is at this point. Any variable size objects will be allocated
     597             :   // after this, so we can still use the base pointer to reference locals.
     598             :   //
     599             :   // FIXME: Clarify FrameSetup flags here.
     600             :   // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
     601             :   // needed.
     602        1146 :   if (RegInfo->hasBasePointer(MF)) {
     603          14 :     TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
     604          14 :                      false);
     605             :   }
     606             : 
     607        1146 :   if (needsFrameMoves) {
     608         884 :     const DataLayout &TD = MF.getDataLayout();
     609         884 :     const int StackGrowth = -TD.getPointerSize(0);
     610         884 :     unsigned FramePtr = RegInfo->getFrameRegister(MF);
     611             :     // An example of the prologue:
     612             :     //
     613             :     //     .globl __foo
     614             :     //     .align 2
     615             :     //  __foo:
     616             :     // Ltmp0:
     617             :     //     .cfi_startproc
     618             :     //     .cfi_personality 155, ___gxx_personality_v0
     619             :     // Leh_func_begin:
     620             :     //     .cfi_lsda 16, Lexception33
     621             :     //
     622             :     //     stp  xa,bx, [sp, -#offset]!
     623             :     //     ...
     624             :     //     stp  x28, x27, [sp, #offset-32]
     625             :     //     stp  fp, lr, [sp, #offset-16]
     626             :     //     add  fp, sp, #offset - 16
     627             :     //     sub  sp, sp, #1360
     628             :     //
     629             :     // The Stack:
     630             :     //       +-------------------------------------------+
     631             :     // 10000 | ........ | ........ | ........ | ........ |
     632             :     // 10004 | ........ | ........ | ........ | ........ |
     633             :     //       +-------------------------------------------+
     634             :     // 10008 | ........ | ........ | ........ | ........ |
     635             :     // 1000c | ........ | ........ | ........ | ........ |
     636             :     //       +===========================================+
     637             :     // 10010 |                X28 Register               |
     638             :     // 10014 |                X28 Register               |
     639             :     //       +-------------------------------------------+
     640             :     // 10018 |                X27 Register               |
     641             :     // 1001c |                X27 Register               |
     642             :     //       +===========================================+
     643             :     // 10020 |                Frame Pointer              |
     644             :     // 10024 |                Frame Pointer              |
     645             :     //       +-------------------------------------------+
     646             :     // 10028 |                Link Register              |
     647             :     // 1002c |                Link Register              |
     648             :     //       +===========================================+
     649             :     // 10030 | ........ | ........ | ........ | ........ |
     650             :     // 10034 | ........ | ........ | ........ | ........ |
     651             :     //       +-------------------------------------------+
     652             :     // 10038 | ........ | ........ | ........ | ........ |
     653             :     // 1003c | ........ | ........ | ........ | ........ |
     654             :     //       +-------------------------------------------+
     655             :     //
     656             :     //     [sp] = 10030        ::    >>initial value<<
     657             :     //     sp = 10020          ::  stp fp, lr, [sp, #-16]!
     658             :     //     fp = sp == 10020    ::  mov fp, sp
     659             :     //     [sp] == 10020       ::  stp x28, x27, [sp, #-16]!
     660             :     //     sp == 10010         ::    >>final value<<
     661             :     //
     662             :     // The frame pointer (w29) points to address 10020. If we use an offset of
     663             :     // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
     664             :     // for w27, and -32 for w28:
     665             :     //
     666             :     //  Ltmp1:
     667             :     //     .cfi_def_cfa w29, 16
     668             :     //  Ltmp2:
     669             :     //     .cfi_offset w30, -8
     670             :     //  Ltmp3:
     671             :     //     .cfi_offset w29, -16
     672             :     //  Ltmp4:
     673             :     //     .cfi_offset w27, -24
     674             :     //  Ltmp5:
     675             :     //     .cfi_offset w28, -32
     676             : 
     677         884 :     if (HasFP) {
     678             :       // Define the current CFA rule to use the provided FP.
     679         170 :       unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
     680         510 :       unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
     681         340 :           nullptr, Reg, 2 * StackGrowth - FixedObject));
     682         510 :       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
     683         170 :           .addCFIIndex(CFIIndex)
     684         170 :           .setMIFlags(MachineInstr::FrameSetup);
     685             :     } else {
     686             :       // Encode the stack size of the leaf function.
     687             :       unsigned CFIIndex = MF.addFrameInst(
     688        2856 :           MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize()));
     689        2142 :       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
     690         714 :           .addCFIIndex(CFIIndex)
     691         714 :           .setMIFlags(MachineInstr::FrameSetup);
     692             :     }
     693             : 
     694             :     // Now emit the moves for whatever callee saved regs we have (including FP,
     695             :     // LR if those are saved).
     696         884 :     emitCalleeSavedFrameMoves(MBB, MBBI);
     697             :   }
     698             : }
     699             : 
     700       11910 : void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
     701             :                                         MachineBasicBlock &MBB) const {
     702       11910 :   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
     703       11910 :   MachineFrameInfo &MFI = MF.getFrameInfo();
     704       11910 :   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
     705       11910 :   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
     706       12085 :   DebugLoc DL;
     707       11910 :   bool IsTailCallReturn = false;
     708       23820 :   if (MBB.end() != MBBI) {
     709       23820 :     DL = MBBI->getDebugLoc();
     710       23820 :     unsigned RetOpcode = MBBI->getOpcode();
     711       11910 :     IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
     712             :       RetOpcode == AArch64::TCRETURNri;
     713             :   }
     714       11910 :   int NumBytes = MFI.getStackSize();
     715       11910 :   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
     716             : 
     717             :   // All calls are tail calls in GHC calling conv, and functions have no
     718             :   // prologue/epilogue.
     719       23820 :   if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
     720       11735 :     return;
     721             : 
     722             :   // Initial and residual are named for consistency with the prologue. Note that
     723             :   // in the epilogue, the residual adjustment is executed first.
     724       11906 :   uint64_t ArgumentPopSize = 0;
     725       11906 :   if (IsTailCallReturn) {
     726         168 :     MachineOperand &StackAdjust = MBBI->getOperand(1);
     727             : 
     728             :     // For a tail-call in a callee-pops-arguments environment, some or all of
     729             :     // the stack may actually be in use for the call's arguments, this is
     730             :     // calculated during LowerCall and consumed here...
     731         168 :     ArgumentPopSize = StackAdjust.getImm();
     732             :   } else {
     733             :     // ... otherwise the amount to pop is *all* of the argument space,
     734             :     // conveniently stored in the MachineFunctionInfo by
     735             :     // LowerFormalArguments. This will, of course, be zero for the C calling
     736             :     // convention.
     737       11738 :     ArgumentPopSize = AFI->getArgumentStackToRestore();
     738             :   }
     739             : 
     740             :   // The stack frame should be like below,
     741             :   //
     742             :   //      ----------------------                     ---
     743             :   //      |                    |                      |
     744             :   //      | BytesInStackArgArea|              CalleeArgStackSize
     745             :   //      | (NumReusableBytes) |                (of tail call)
     746             :   //      |                    |                     ---
     747             :   //      |                    |                      |
     748             :   //      ---------------------|        ---           |
     749             :   //      |                    |         |            |
     750             :   //      |   CalleeSavedReg   |         |            |
     751             :   //      | (CalleeSavedStackSize)|      |            |
     752             :   //      |                    |         |            |
     753             :   //      ---------------------|         |         NumBytes
     754             :   //      |                    |     StackSize  (StackAdjustUp)
     755             :   //      |   LocalStackSize   |         |            |
     756             :   //      | (covering callee   |         |            |
     757             :   //      |       args)        |         |            |
     758             :   //      |                    |         |            |
     759             :   //      ----------------------        ---          ---
     760             :   //
     761             :   // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
     762             :   //             = StackSize + ArgumentPopSize
     763             :   //
     764             :   // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
     765             :   // it as the 2nd argument of AArch64ISD::TC_RETURN.
     766             : 
     767             :   bool IsWin64 =
     768       35624 :       Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
     769       11840 :   unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
     770             : 
     771       11906 :   auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
     772       11906 :   bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
     773             : 
     774       11906 :   if (!CombineSPBump && PrologueSaveSize != 0)
     775         721 :     convertCalleeSaveRestoreToSPPrePostIncDec(
     776        1442 :         MBB, std::prev(MBB.getFirstTerminator()), DL, TII, PrologueSaveSize);
     777             : 
     778             :   // Move past the restores of the callee-saved registers.
     779       11906 :   MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
     780       11906 :   MachineBasicBlock::iterator Begin = MBB.begin();
     781       13751 :   while (LastPopI != Begin) {
     782       13277 :     --LastPopI;
     783       13277 :     if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) {
     784             :       ++LastPopI;
     785             :       break;
     786        1845 :     } else if (CombineSPBump)
     787        1492 :       fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize());
     788             :   }
     789             : 
     790             :   // If there is a single SP update, insert it before the ret and we're done.
     791       11906 :   if (CombineSPBump) {
     792         683 :     emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
     793         683 :                     NumBytes + ArgumentPopSize, TII,
     794             :                     MachineInstr::FrameDestroy);
     795         683 :     return;
     796             :   }
     797             : 
     798       11223 :   NumBytes -= PrologueSaveSize;
     799             :   assert(NumBytes >= 0 && "Negative stack allocation size!?");
     800             : 
     801       11223 :   if (!hasFP(MF)) {
     802       11051 :     bool RedZone = canUseRedZone(MF);
     803             :     // If this was a redzone leaf function, we don't need to restore the
     804             :     // stack pointer (but we may need to pop stack args for fastcc).
     805       11051 :     if (RedZone && ArgumentPopSize == 0)
     806             :       return;
     807             : 
     808       10999 :     bool NoCalleeSaveRestore = PrologueSaveSize == 0;
     809       10999 :     int StackRestoreBytes = RedZone ? 0 : NumBytes;
     810       10999 :     if (NoCalleeSaveRestore)
     811       10450 :       StackRestoreBytes += ArgumentPopSize;
     812       10999 :     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
     813             :                     StackRestoreBytes, TII, MachineInstr::FrameDestroy);
     814             :     // If we were able to combine the local stack pop with the argument pop,
     815             :     // then we're done.
     816       10999 :     if (NoCalleeSaveRestore || ArgumentPopSize == 0)
     817             :       return;
     818             :     NumBytes = 0;
     819             :   }
     820             : 
     821             :   // Restore the original stack pointer.
     822             :   // FIXME: Rather than doing the math here, we should instead just use
     823             :   // non-post-indexed loads for the restores if we aren't actually going to
     824             :   // be able to save any instructions.
     825         175 :   if (MFI.hasVarSizedObjects() || AFI->isStackRealigned())
     826          48 :     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
     827          48 :                     -AFI->getCalleeSavedStackSize() + 16, TII,
     828             :                     MachineInstr::FrameDestroy);
     829         127 :   else if (NumBytes)
     830           5 :     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII,
     831             :                     MachineInstr::FrameDestroy);
     832             : 
     833             :   // This must be placed after the callee-save restore code because that code
     834             :   // assumes the SP is at the same location as it was after the callee-save save
     835             :   // code in the prologue.
     836         175 :   if (ArgumentPopSize)
     837           3 :     emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
     838             :                     ArgumentPopSize, TII, MachineInstr::FrameDestroy);
     839             : }
     840             : 
     841             : /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
     842             : /// debug info.  It's the same as what we use for resolving the code-gen
     843             : /// references for now.  FIXME: This can go wrong when references are
     844             : /// SP-relative and simple call frames aren't used.
     845          14 : int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
     846             :                                                  int FI,
     847             :                                                  unsigned &FrameReg) const {
     848          14 :   return resolveFrameIndexReference(MF, FI, FrameReg);
     849             : }
     850             : 
     851        3193 : int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
     852             :                                                      int FI, unsigned &FrameReg,
     853             :                                                      bool PreferFP) const {
     854        3193 :   const MachineFrameInfo &MFI = MF.getFrameInfo();
     855             :   const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
     856        3193 :       MF.getSubtarget().getRegisterInfo());
     857        3193 :   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
     858        3193 :   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
     859             :   bool IsWin64 =
     860        9150 :       Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
     861        2842 :   unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
     862        3193 :   int FPOffset = MFI.getObjectOffset(FI) + FixedObject + 16;
     863        3193 :   int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
     864        6386 :   bool isFixed = MFI.isFixedObjectIndex(FI);
     865             : 
     866             :   // Use frame pointer to reference fixed objects. Use it for locals if
     867             :   // there are VLAs or a dynamically realigned SP (and thus the SP isn't
     868             :   // reliable as a base). Make sure useFPForScavengingIndex() does the
     869             :   // right thing for the emergency spill slot.
     870        3193 :   bool UseFP = false;
     871        3193 :   if (AFI->hasStackFrame()) {
     872             :     // Note: Keeping the following as multiple 'if' statements rather than
     873             :     // merging to a single expression for readability.
     874             :     //
     875             :     // Argument access should always use the FP.
     876        2387 :     if (isFixed) {
     877         194 :       UseFP = hasFP(MF);
     878        2753 :     } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) &&
     879         560 :                !RegInfo->needsStackRealignment(MF)) {
     880             :       // Use SP or FP, whichever gives us the best chance of the offset
     881             :       // being in range for direct access. If the FPOffset is positive,
     882             :       // that'll always be best, as the SP will be even further away.
     883             :       // If the FPOffset is negative, we have to keep in mind that the
     884             :       // available offset range for negative offsets is smaller than for
     885             :       // positive ones. If we have variable sized objects, we're stuck with
     886             :       // using the FP regardless, though, as the SP offset is unknown
     887             :       // and we don't have a base pointer available. If an offset is
     888             :       // available via the FP and the SP, use whichever is closest.
     889         486 :       if (PreferFP || MFI.hasVarSizedObjects() || FPOffset >= 0 ||
     890         405 :           (FPOffset >= -256 && Offset > -FPOffset))
     891             :         UseFP = true;
     892             :     }
     893             :   }
     894             : 
     895             :   assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
     896             :          "In the presence of dynamic stack pointer realignment, "
     897             :          "non-argument objects cannot be accessed through the frame pointer");
     898             : 
     899         194 :   if (UseFP) {
     900         354 :     FrameReg = RegInfo->getFrameRegister(MF);
     901         354 :     return FPOffset;
     902             :   }
     903             : 
     904             :   // Use the base pointer if we have one.
     905        2839 :   if (RegInfo->hasBasePointer(MF))
     906          15 :     FrameReg = RegInfo->getBaseRegister();
     907             :   else {
     908        2824 :     FrameReg = AArch64::SP;
     909             :     // If we're using the red zone for this function, the SP won't actually
     910             :     // be adjusted, so the offsets will be negative. They're also all
     911             :     // within range of the signed 9-bit immediate instructions.
     912        2824 :     if (canUseRedZone(MF))
     913           3 :       Offset -= AFI->getLocalStackSize();
     914             :   }
     915             : 
     916             :   return Offset;
     917             : }
     918             : 
     919             : static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
     920             :   // Do not set a kill flag on values that are also marked as live-in. This
     921             :   // happens with the @llvm-returnaddress intrinsic and with arguments passed in
     922             :   // callee saved registers.
     923             :   // Omitting the kill flags is conservatively correct even if the live-in
     924             :   // is not used after all.
     925        3064 :   bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg);
     926        3064 :   return getKillRegState(!IsLiveIn);
     927             : }
     928             : 
     929        3152 : static bool produceCompactUnwindFrame(MachineFunction &MF) {
     930        3152 :   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
     931        3152 :   AttributeList Attrs = MF.getFunction()->getAttributes();
     932        4756 :   return Subtarget.isTargetMachO() &&
     933        3208 :          !(Subtarget.getTargetLowering()->supportSwiftError() &&
     934        4756 :            Attrs.hasAttrSomewhere(Attribute::SwiftError));
     935             : }
     936             : 
     937             : namespace {
     938             : 
     939             : struct RegPairInfo {
     940             :   unsigned Reg1 = AArch64::NoRegister;
     941             :   unsigned Reg2 = AArch64::NoRegister;
     942             :   int FrameIdx;
     943             :   int Offset;
     944             :   bool IsGPR;
     945             : 
     946        3671 :   RegPairInfo() = default;
     947             : 
     948             :   bool isPaired() const { return Reg2 != AArch64::NoRegister; }
     949             : };
     950             : 
     951             : } // end anonymous namespace
     952             : 
     953        2306 : static void computeCalleeSaveRegisterPairs(
     954             :     MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI,
     955             :     const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) {
     956             : 
     957        2306 :   if (CSI.empty())
     958             :     return;
     959             : 
     960        2306 :   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
     961        2306 :   MachineFrameInfo &MFI = MF.getFrameInfo();
     962        4612 :   CallingConv::ID CC = MF.getFunction()->getCallingConv();
     963        4612 :   unsigned Count = CSI.size();
     964             :   (void)CC;
     965             :   // MachO's compact unwind format relies on all registers being stored in
     966             :   // pairs.
     967             :   assert((!produceCompactUnwindFrame(MF) ||
     968             :           CC == CallingConv::PreserveMost ||
     969             :           (Count & 1) == 0) &&
     970             :          "Odd number of callee-saved regs to spill!");
     971        2306 :   int Offset = AFI->getCalleeSavedStackSize();
     972             : 
     973        5977 :   for (unsigned i = 0; i < Count; ++i) {
     974        3671 :     RegPairInfo RPI;
     975        7342 :     RPI.Reg1 = CSI[i].getReg();
     976             : 
     977             :     assert(AArch64::GPR64RegClass.contains(RPI.Reg1) ||
     978             :            AArch64::FPR64RegClass.contains(RPI.Reg1));
     979        7342 :     RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1);
     980             : 
     981             :     // Add the next reg to the pair if it is in the same register class.
     982        3671 :     if (i + 1 < Count) {
     983        5212 :       unsigned NextReg = CSI[i + 1].getReg();
     984        4862 :       if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) ||
     985         700 :           (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg)))
     986        2476 :         RPI.Reg2 = NextReg;
     987             :     }
     988             : 
     989             :     // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
     990             :     // list to come in sorted by frame index so that we can issue the store
     991             :     // pair instructions directly. Assert if we see anything otherwise.
     992             :     //
     993             :     // The order of the registers in the list is controlled by
     994             :     // getCalleeSavedRegs(), so they will always be in-order, as well.
     995             :     assert((!RPI.isPaired() ||
     996             :             (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) &&
     997             :            "Out of order callee saved regs!");
     998             : 
     999             :     // MachO's compact unwind format relies on all registers being stored in
    1000             :     // adjacent register pairs.
    1001             :     assert((!produceCompactUnwindFrame(MF) ||
    1002             :             CC == CallingConv::PreserveMost ||
    1003             :             (RPI.isPaired() &&
    1004             :              ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
    1005             :               RPI.Reg1 + 1 == RPI.Reg2))) &&
    1006             :            "Callee-save registers not saved as adjacent register pair!");
    1007             : 
    1008        3671 :     RPI.FrameIdx = CSI[i].getFrameIdx();
    1009             : 
    1010        3671 :     if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) {
    1011             :       // Round up size of non-pair to pair size if we need to pad the
    1012             :       // callee-save area to ensure 16-byte alignment.
    1013         979 :       Offset -= 16;
    1014             :       assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
    1015        1958 :       MFI.setObjectAlignment(RPI.FrameIdx, 16);
    1016             :       AFI->setCalleeSaveStackHasFreeSpace(true);
    1017             :     } else
    1018        5384 :       Offset -= RPI.isPaired() ? 16 : 8;
    1019             :     assert(Offset % 8 == 0);
    1020        3671 :     RPI.Offset = Offset / 8;
    1021             :     assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
    1022             :            "Offset out of bounds for LDP/STP immediate");
    1023             : 
    1024        3671 :     RegPairs.push_back(RPI);
    1025        3671 :     if (RPI.isPaired())
    1026        2476 :       ++i;
    1027             :   }
    1028             : }
    1029             : 
    1030        1146 : bool AArch64FrameLowering::spillCalleeSavedRegisters(
    1031             :     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
    1032             :     const std::vector<CalleeSavedInfo> &CSI,
    1033             :     const TargetRegisterInfo *TRI) const {
    1034        1146 :   MachineFunction &MF = *MBB.getParent();
    1035        1146 :   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
    1036        2292 :   DebugLoc DL;
    1037        2292 :   SmallVector<RegPairInfo, 8> RegPairs;
    1038             : 
    1039        1146 :   computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
    1040        1146 :   const MachineRegisterInfo &MRI = MF.getRegInfo();
    1041             : 
    1042        5264 :   for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
    1043             :        ++RPII) {
    1044        1826 :     RegPairInfo RPI = *RPII;
    1045        1826 :     unsigned Reg1 = RPI.Reg1;
    1046        1826 :     unsigned Reg2 = RPI.Reg2;
    1047             :     unsigned StrOpc;
    1048             : 
    1049             :     // Issue sequence of spills for cs regs.  The first spill may be converted
    1050             :     // to a pre-decrement store later by emitPrologue if the callee-save stack
    1051             :     // area allocation can't be combined with the local stack area allocation.
    1052             :     // For example:
    1053             :     //    stp     x22, x21, [sp, #0]     // addImm(+0)
    1054             :     //    stp     x20, x19, [sp, #16]    // addImm(+2)
    1055             :     //    stp     fp, lr, [sp, #32]      // addImm(+4)
    1056             :     // Rationale: This sequence saves uop updates compared to a sequence of
    1057             :     // pre-increment spills like stp xi,xj,[sp,#-16]!
    1058             :     // Note: Similar rationale and sequence for restores in epilog.
    1059        1826 :     if (RPI.IsGPR)
    1060        1593 :       StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
    1061             :     else
    1062         233 :       StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
    1063             :     DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1);
    1064             :           if (RPI.isPaired())
    1065             :             dbgs() << ", " << TRI->getName(Reg2);
    1066             :           dbgs() << ") -> fi#(" << RPI.FrameIdx;
    1067             :           if (RPI.isPaired())
    1068             :             dbgs() << ", " << RPI.FrameIdx+1;
    1069             :           dbgs() << ")\n");
    1070             : 
    1071        3652 :     MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
    1072        1826 :     if (!MRI.isReserved(Reg1))
    1073        1812 :       MBB.addLiveIn(Reg1);
    1074        1826 :     if (RPI.isPaired()) {
    1075        1238 :       if (!MRI.isReserved(Reg2))
    1076         794 :         MBB.addLiveIn(Reg2);
    1077        1238 :       MIB.addReg(Reg2, getPrologueDeath(MF, Reg2));
    1078        1238 :       MIB.addMemOperand(MF.getMachineMemOperand(
    1079             :           MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
    1080        2476 :           MachineMemOperand::MOStore, 8, 8));
    1081             :     }
    1082        1826 :     MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
    1083        1826 :         .addReg(AArch64::SP)
    1084        3652 :         .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit
    1085        1826 :         .setMIFlag(MachineInstr::FrameSetup);
    1086        1826 :     MIB.addMemOperand(MF.getMachineMemOperand(
    1087             :         MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
    1088        3652 :         MachineMemOperand::MOStore, 8, 8));
    1089             :   }
    1090        2292 :   return true;
    1091             : }
    1092             : 
    1093        1160 : bool AArch64FrameLowering::restoreCalleeSavedRegisters(
    1094             :     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
    1095             :     std::vector<CalleeSavedInfo> &CSI,
    1096             :     const TargetRegisterInfo *TRI) const {
    1097        1160 :   MachineFunction &MF = *MBB.getParent();
    1098        1160 :   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
    1099        2320 :   DebugLoc DL;
    1100        2320 :   SmallVector<RegPairInfo, 8> RegPairs;
    1101             : 
    1102        2320 :   if (MI != MBB.end())
    1103        1113 :     DL = MI->getDebugLoc();
    1104             : 
    1105        1160 :   computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
    1106             : 
    1107        4165 :   for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE;
    1108             :        ++RPII) {
    1109        1845 :     RegPairInfo RPI = *RPII;
    1110        1845 :     unsigned Reg1 = RPI.Reg1;
    1111        1845 :     unsigned Reg2 = RPI.Reg2;
    1112             : 
    1113             :     // Issue sequence of restores for cs regs. The last restore may be converted
    1114             :     // to a post-increment load later by emitEpilogue if the callee-save stack
    1115             :     // area allocation can't be combined with the local stack area allocation.
    1116             :     // For example:
    1117             :     //    ldp     fp, lr, [sp, #32]       // addImm(+4)
    1118             :     //    ldp     x20, x19, [sp, #16]     // addImm(+2)
    1119             :     //    ldp     x22, x21, [sp, #0]      // addImm(+0)
    1120             :     // Note: see comment in spillCalleeSavedRegisters()
    1121             :     unsigned LdrOpc;
    1122        1845 :     if (RPI.IsGPR)
    1123        1612 :       LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
    1124             :     else
    1125         233 :       LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
    1126             :     DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1);
    1127             :           if (RPI.isPaired())
    1128             :             dbgs() << ", " << TRI->getName(Reg2);
    1129             :           dbgs() << ") -> fi#(" << RPI.FrameIdx;
    1130             :           if (RPI.isPaired())
    1131             :             dbgs() << ", " << RPI.FrameIdx+1;
    1132             :           dbgs() << ")\n");
    1133             : 
    1134        3690 :     MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
    1135        1845 :     if (RPI.isPaired()) {
    1136        1238 :       MIB.addReg(Reg2, getDefRegState(true));
    1137        1238 :       MIB.addMemOperand(MF.getMachineMemOperand(
    1138             :           MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
    1139        2476 :           MachineMemOperand::MOLoad, 8, 8));
    1140             :     }
    1141        1845 :     MIB.addReg(Reg1, getDefRegState(true))
    1142        1845 :         .addReg(AArch64::SP)
    1143        3690 :         .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit
    1144        1845 :         .setMIFlag(MachineInstr::FrameDestroy);
    1145        1845 :     MIB.addMemOperand(MF.getMachineMemOperand(
    1146             :         MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
    1147        3690 :         MachineMemOperand::MOLoad, 8, 8));
    1148             :   }
    1149        2320 :   return true;
    1150             : }
    1151             : 
    1152       11829 : void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
    1153             :                                                 BitVector &SavedRegs,
    1154             :                                                 RegScavenger *RS) const {
    1155             :   // All calls are tail calls in GHC calling conv, and functions have no
    1156             :   // prologue/epilogue.
    1157       23658 :   if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
    1158             :     return;
    1159             : 
    1160       11825 :   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
    1161             :   const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
    1162       11825 :       MF.getSubtarget().getRegisterInfo());
    1163       11825 :   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
    1164       11825 :   unsigned UnspilledCSGPR = AArch64::NoRegister;
    1165       11825 :   unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
    1166             : 
    1167             :   // The frame record needs to be created by saving the appropriate registers
    1168       11825 :   if (hasFP(MF)) {
    1169         276 :     SavedRegs.set(AArch64::FP);
    1170             :     SavedRegs.set(AArch64::LR);
    1171             :   }
    1172             : 
    1173       11825 :   unsigned BasePointerReg = AArch64::NoRegister;
    1174       11825 :   if (RegInfo->hasBasePointer(MF))
    1175          14 :     BasePointerReg = RegInfo->getBaseRegister();
    1176             : 
    1177       11825 :   unsigned ExtraCSSpill = 0;
    1178       11825 :   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
    1179             :   // Figure out which callee-saved registers to save/restore.
    1180      248484 :   for (unsigned i = 0; CSRegs[i]; ++i) {
    1181      236659 :     const unsigned Reg = CSRegs[i];
    1182             : 
    1183             :     // Add the base pointer register to SavedRegs if it is callee-save.
    1184      236659 :     if (Reg == BasePointerReg)
    1185             :       SavedRegs.set(Reg);
    1186             : 
    1187      236659 :     bool RegUsed = SavedRegs.test(Reg);
    1188      236659 :     unsigned PairedReg = CSRegs[i ^ 1];
    1189      470199 :     if (!RegUsed) {
    1190      606346 :       if (AArch64::GPR64RegClass.contains(Reg) &&
    1191      139266 :           !RegInfo->isReservedReg(MF, Reg)) {
    1192             :         UnspilledCSGPR = Reg;
    1193             :         UnspilledCSGPRPaired = PairedReg;
    1194             :       }
    1195      233540 :       continue;
    1196             :     }
    1197             : 
    1198             :     // MachO's compact unwind format relies on all registers being stored in
    1199             :     // pairs.
    1200             :     // FIXME: the usual format is actually better if unwinding isn't needed.
    1201        4636 :     if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) {
    1202         246 :       SavedRegs.set(PairedReg);
    1203         723 :       if (AArch64::GPR64RegClass.contains(PairedReg) &&
    1204         231 :           !RegInfo->isReservedReg(MF, PairedReg))
    1205             :         ExtraCSSpill = PairedReg;
    1206             :     }
    1207             :   }
    1208             : 
    1209             :   DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
    1210             :         for (unsigned Reg : SavedRegs.set_bits())
    1211             :           dbgs() << ' ' << PrintReg(Reg, RegInfo);
    1212             :         dbgs() << "\n";);
    1213             : 
    1214             :   // If any callee-saved registers are used, the frame cannot be eliminated.
    1215       11825 :   unsigned NumRegsSpilled = SavedRegs.count();
    1216       11825 :   bool CanEliminateFrame = NumRegsSpilled == 0;
    1217             : 
    1218             :   // The CSR spill slots have not been allocated yet, so estimateStackSize
    1219             :   // won't include them.
    1220       11825 :   MachineFrameInfo &MFI = MF.getFrameInfo();
    1221       11825 :   unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled;
    1222             :   DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
    1223       11825 :   unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
    1224       11825 :   bool BigStack = (CFSize > EstimatedStackSizeLimit);
    1225       11825 :   if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
    1226             :     AFI->setHasStackFrame(true);
    1227             : 
    1228             :   // Estimate if we might need to scavenge a register at some point in order
    1229             :   // to materialize a stack offset. If so, either spill one additional
    1230             :   // callee-saved register or reserve a special spill slot to facilitate
    1231             :   // register scavenging. If we already spilled an extra callee-saved register
    1232             :   // above to keep the number of spills even, we don't need to do anything else
    1233             :   // here.
    1234       11825 :   if (BigStack) {
    1235          45 :     if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
    1236             :       DEBUG(dbgs() << "Spilling " << PrintReg(UnspilledCSGPR, RegInfo)
    1237             :             << " to get a scratch register.\n");
    1238          33 :       SavedRegs.set(UnspilledCSGPR);
    1239             :       // MachO's compact unwind format relies on all registers being stored in
    1240             :       // pairs, so if we need to spill one extra for BigStack, then we need to
    1241             :       // store the pair.
    1242          33 :       if (produceCompactUnwindFrame(MF))
    1243             :         SavedRegs.set(UnspilledCSGPRPaired);
    1244          33 :       ExtraCSSpill = UnspilledCSGPRPaired;
    1245             :       NumRegsSpilled = SavedRegs.count();
    1246             :     }
    1247             : 
    1248             :     // If we didn't find an extra callee-saved register to spill, create
    1249             :     // an emergency spill slot.
    1250          45 :     if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) {
    1251          11 :       const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
    1252          11 :       const TargetRegisterClass &RC = AArch64::GPR64RegClass;
    1253          11 :       unsigned Size = TRI->getSpillSize(RC);
    1254          22 :       unsigned Align = TRI->getSpillAlignment(RC);
    1255          11 :       int FI = MFI.CreateStackObject(Size, Align, false);
    1256             :       RS->addScavengingFrameIndex(FI);
    1257             :       DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
    1258             :                    << " as the emergency spill slot.\n");
    1259             :     }
    1260             :   }
    1261             : 
    1262             :   // Round up to register pair alignment to avoid additional SP adjustment
    1263             :   // instructions.
    1264       23650 :   AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16));
    1265             : }
    1266             : 
    1267         379 : bool AArch64FrameLowering::enableStackSlotScavenging(
    1268             :     const MachineFunction &MF) const {
    1269         379 :   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
    1270         379 :   return AFI->hasCalleeSaveStackFreeSpace();
    1271      216918 : }

Generated by: LCOV version 1.13