LLVM  mainline
X86FrameLowering.cpp
Go to the documentation of this file.
00001 //===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file contains the X86 implementation of TargetFrameLowering class.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "X86FrameLowering.h"
00015 #include "X86InstrBuilder.h"
00016 #include "X86InstrInfo.h"
00017 #include "X86MachineFunctionInfo.h"
00018 #include "X86Subtarget.h"
00019 #include "X86TargetMachine.h"
00020 #include "llvm/ADT/SmallSet.h"
00021 #include "llvm/CodeGen/MachineFrameInfo.h"
00022 #include "llvm/CodeGen/MachineFunction.h"
00023 #include "llvm/CodeGen/MachineInstrBuilder.h"
00024 #include "llvm/CodeGen/MachineModuleInfo.h"
00025 #include "llvm/CodeGen/MachineRegisterInfo.h"
00026 #include "llvm/IR/DataLayout.h"
00027 #include "llvm/IR/Function.h"
00028 #include "llvm/MC/MCAsmInfo.h"
00029 #include "llvm/MC/MCSymbol.h"
00030 #include "llvm/Support/CommandLine.h"
00031 #include "llvm/Target/TargetOptions.h"
00032 #include "llvm/Support/Debug.h"
00033 #include <cstdlib>
00034 
00035 using namespace llvm;
00036 
00037 // FIXME: completely move here.
00038 extern cl::opt<bool> ForceStackAlign;
00039 
00040 bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
00041   return !MF.getFrameInfo()->hasVarSizedObjects() &&
00042          !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
00043 }
00044 
00045 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the
00046 /// call frame pseudos can be simplified.  Having a FP, as in the default
00047 /// implementation, is not sufficient here since we can't always use it.
00048 /// Use a more nuanced condition.
00049 bool
00050 X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
00051   const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>
00052                                (MF.getSubtarget().getRegisterInfo());
00053   return hasReservedCallFrame(MF) ||
00054          (hasFP(MF) && !TRI->needsStackRealignment(MF))
00055          || TRI->hasBasePointer(MF);
00056 }
00057 
00058 // needsFrameIndexResolution - Do we need to perform FI resolution for
00059 // this function. Normally, this is required only when the function
00060 // has any stack objects. However, FI resolution actually has another job,
00061 // not apparent from the title - it resolves callframesetup/destroy 
00062 // that were not simplified earlier.
00063 // So, this is required for x86 functions that have push sequences even
00064 // when there are no stack objects.
00065 bool
00066 X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
00067   return MF.getFrameInfo()->hasStackObjects() ||
00068          MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
00069 }
00070 
00071 /// hasFP - Return true if the specified function should have a dedicated frame
00072 /// pointer register.  This is true if the function has variable sized allocas
00073 /// or if frame pointer elimination is disabled.
00074 bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
00075   const MachineFrameInfo *MFI = MF.getFrameInfo();
00076   const MachineModuleInfo &MMI = MF.getMMI();
00077   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
00078 
00079   return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
00080           RegInfo->needsStackRealignment(MF) ||
00081           MFI->hasVarSizedObjects() ||
00082           MFI->isFrameAddressTaken() || MFI->hasInlineAsmWithSPAdjust() ||
00083           MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
00084           MMI.callsUnwindInit() || MMI.callsEHReturn() ||
00085           MFI->hasStackMap() || MFI->hasPatchPoint());
00086 }
00087 
00088 static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) {
00089   if (IsLP64) {
00090     if (isInt<8>(Imm))
00091       return X86::SUB64ri8;
00092     return X86::SUB64ri32;
00093   } else {
00094     if (isInt<8>(Imm))
00095       return X86::SUB32ri8;
00096     return X86::SUB32ri;
00097   }
00098 }
00099 
00100 static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) {
00101   if (IsLP64) {
00102     if (isInt<8>(Imm))
00103       return X86::ADD64ri8;
00104     return X86::ADD64ri32;
00105   } else {
00106     if (isInt<8>(Imm))
00107       return X86::ADD32ri8;
00108     return X86::ADD32ri;
00109   }
00110 }
00111 
00112 static unsigned getSUBrrOpcode(unsigned isLP64) {
00113   return isLP64 ? X86::SUB64rr : X86::SUB32rr;
00114 }
00115 
00116 static unsigned getADDrrOpcode(unsigned isLP64) {
00117   return isLP64 ? X86::ADD64rr : X86::ADD32rr;
00118 }
00119 
00120 static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
00121   if (IsLP64) {
00122     if (isInt<8>(Imm))
00123       return X86::AND64ri8;
00124     return X86::AND64ri32;
00125   }
00126   if (isInt<8>(Imm))
00127     return X86::AND32ri8;
00128   return X86::AND32ri;
00129 }
00130 
00131 static unsigned getLEArOpcode(unsigned IsLP64) {
00132   return IsLP64 ? X86::LEA64r : X86::LEA32r;
00133 }
00134 
00135 /// findDeadCallerSavedReg - Return a caller-saved register that isn't live
00136 /// when it reaches the "return" instruction. We can then pop a stack object
00137 /// to this register without worry about clobbering it.
00138 static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
00139                                        MachineBasicBlock::iterator &MBBI,
00140                                        const TargetRegisterInfo &TRI,
00141                                        bool Is64Bit) {
00142   const MachineFunction *MF = MBB.getParent();
00143   const Function *F = MF->getFunction();
00144   if (!F || MF->getMMI().callsEHReturn())
00145     return 0;
00146 
00147   static const uint16_t CallerSavedRegs32Bit[] = {
00148     X86::EAX, X86::EDX, X86::ECX, 0
00149   };
00150 
00151   static const uint16_t CallerSavedRegs64Bit[] = {
00152     X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,
00153     X86::R8,  X86::R9,  X86::R10, X86::R11, 0
00154   };
00155 
00156   unsigned Opc = MBBI->getOpcode();
00157   switch (Opc) {
00158   default: return 0;
00159   case X86::RETL:
00160   case X86::RETQ:
00161   case X86::RETIL:
00162   case X86::RETIQ:
00163   case X86::TCRETURNdi:
00164   case X86::TCRETURNri:
00165   case X86::TCRETURNmi:
00166   case X86::TCRETURNdi64:
00167   case X86::TCRETURNri64:
00168   case X86::TCRETURNmi64:
00169   case X86::EH_RETURN:
00170   case X86::EH_RETURN64: {
00171     SmallSet<uint16_t, 8> Uses;
00172     for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) {
00173       MachineOperand &MO = MBBI->getOperand(i);
00174       if (!MO.isReg() || MO.isDef())
00175         continue;
00176       unsigned Reg = MO.getReg();
00177       if (!Reg)
00178         continue;
00179       for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
00180         Uses.insert(*AI);
00181     }
00182 
00183     const uint16_t *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit;
00184     for (; *CS; ++CS)
00185       if (!Uses.count(*CS))
00186         return *CS;
00187   }
00188   }
00189 
00190   return 0;
00191 }
00192 
00193 static bool isEAXLiveIn(MachineFunction &MF) {
00194   for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(),
00195        EE = MF.getRegInfo().livein_end(); II != EE; ++II) {
00196     unsigned Reg = II->first;
00197 
00198     if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
00199         Reg == X86::AH || Reg == X86::AL)
00200       return true;
00201   }
00202 
00203   return false;
00204 }
00205 
00206 /// emitSPUpdate - Emit a series of instructions to increment / decrement the
00207 /// stack pointer by a constant value.
00208 void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
00209                                     MachineBasicBlock::iterator &MBBI,
00210                                     unsigned StackPtr, int64_t NumBytes,
00211                                     bool Is64BitTarget, bool Is64BitStackPtr,
00212                                     bool UseLEA, const TargetInstrInfo &TII,
00213                                     const TargetRegisterInfo &TRI) {
00214   bool isSub = NumBytes < 0;
00215   uint64_t Offset = isSub ? -NumBytes : NumBytes;
00216   unsigned Opc;
00217   if (UseLEA)
00218     Opc = getLEArOpcode(Is64BitStackPtr);
00219   else
00220     Opc = isSub
00221       ? getSUBriOpcode(Is64BitStackPtr, Offset)
00222       : getADDriOpcode(Is64BitStackPtr, Offset);
00223 
00224   uint64_t Chunk = (1LL << 31) - 1;
00225   DebugLoc DL = MBB.findDebugLoc(MBBI);
00226 
00227   while (Offset) {
00228     if (Offset > Chunk) {
00229       // Rather than emit a long series of instructions for large offsets,
00230       // load the offset into a register and do one sub/add
00231       unsigned Reg = 0;
00232 
00233       if (isSub && !isEAXLiveIn(*MBB.getParent()))
00234         Reg = (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX);
00235       else
00236         Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget);
00237 
00238       if (Reg) {
00239         Opc = Is64BitTarget ? X86::MOV64ri : X86::MOV32ri;
00240         BuildMI(MBB, MBBI, DL, TII.get(Opc), Reg)
00241           .addImm(Offset);
00242         Opc = isSub
00243           ? getSUBrrOpcode(Is64BitTarget)
00244           : getADDrrOpcode(Is64BitTarget);
00245         MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
00246           .addReg(StackPtr)
00247           .addReg(Reg);
00248         MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
00249         Offset = 0;
00250         continue;
00251       }
00252     }
00253 
00254     uint64_t ThisVal = std::min(Offset, Chunk);
00255     if (ThisVal == (Is64BitTarget ? 8 : 4)) {
00256       // Use push / pop instead.
00257       unsigned Reg = isSub
00258         ? (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX)
00259         : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget);
00260       if (Reg) {
00261         Opc = isSub
00262           ? (Is64BitTarget ? X86::PUSH64r : X86::PUSH32r)
00263           : (Is64BitTarget ? X86::POP64r  : X86::POP32r);
00264         MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc))
00265           .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
00266         if (isSub)
00267           MI->setFlag(MachineInstr::FrameSetup);
00268         Offset -= ThisVal;
00269         continue;
00270       }
00271     }
00272 
00273     MachineInstr *MI = nullptr;
00274 
00275     if (UseLEA) {
00276       MI =  addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
00277                           StackPtr, false, isSub ? -ThisVal : ThisVal);
00278     } else {
00279       MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
00280             .addReg(StackPtr)
00281             .addImm(ThisVal);
00282       MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
00283     }
00284 
00285     if (isSub)
00286       MI->setFlag(MachineInstr::FrameSetup);
00287 
00288     Offset -= ThisVal;
00289   }
00290 }
00291 
00292 /// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
00293 static
00294 void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
00295                       unsigned StackPtr, uint64_t *NumBytes = nullptr) {
00296   if (MBBI == MBB.begin()) return;
00297 
00298   MachineBasicBlock::iterator PI = std::prev(MBBI);
00299   unsigned Opc = PI->getOpcode();
00300   if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
00301        Opc == X86::ADD32ri || Opc == X86::ADD32ri8 ||
00302        Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
00303       PI->getOperand(0).getReg() == StackPtr) {
00304     if (NumBytes)
00305       *NumBytes += PI->getOperand(2).getImm();
00306     MBB.erase(PI);
00307   } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
00308               Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
00309              PI->getOperand(0).getReg() == StackPtr) {
00310     if (NumBytes)
00311       *NumBytes -= PI->getOperand(2).getImm();
00312     MBB.erase(PI);
00313   }
00314 }
00315 
00316 int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
00317                                      MachineBasicBlock::iterator &MBBI,
00318                                      unsigned StackPtr,
00319                                      bool doMergeWithPrevious) {
00320   if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
00321       (!doMergeWithPrevious && MBBI == MBB.end()))
00322     return 0;
00323 
00324   MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
00325   MachineBasicBlock::iterator NI = doMergeWithPrevious ? nullptr
00326                                                        : std::next(MBBI);
00327   unsigned Opc = PI->getOpcode();
00328   int Offset = 0;
00329 
00330   if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
00331        Opc == X86::ADD32ri || Opc == X86::ADD32ri8 ||
00332        Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
00333       PI->getOperand(0).getReg() == StackPtr){
00334     Offset += PI->getOperand(2).getImm();
00335     MBB.erase(PI);
00336     if (!doMergeWithPrevious) MBBI = NI;
00337   } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
00338               Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
00339              PI->getOperand(0).getReg() == StackPtr) {
00340     Offset -= PI->getOperand(2).getImm();
00341     MBB.erase(PI);
00342     if (!doMergeWithPrevious) MBBI = NI;
00343   }
00344 
00345   return Offset;
00346 }
00347 
00348 void
00349 X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
00350                                             MachineBasicBlock::iterator MBBI,
00351                                             DebugLoc DL) const {
00352   MachineFunction &MF = *MBB.getParent();
00353   MachineFrameInfo *MFI = MF.getFrameInfo();
00354   MachineModuleInfo &MMI = MF.getMMI();
00355   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
00356   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
00357 
00358   // Add callee saved registers to move list.
00359   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
00360   if (CSI.empty()) return;
00361 
00362   // Calculate offsets.
00363   for (std::vector<CalleeSavedInfo>::const_iterator
00364          I = CSI.begin(), E = CSI.end(); I != E; ++I) {
00365     int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
00366     unsigned Reg = I->getReg();
00367 
00368     unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
00369     unsigned CFIIndex =
00370         MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg,
00371                                                         Offset));
00372     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
00373         .addCFIIndex(CFIIndex);
00374   }
00375 }
00376 
00377 /// usesTheStack - This function checks if any of the users of EFLAGS
00378 /// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has
00379 /// to use the stack, and if we don't adjust the stack we clobber the first
00380 /// frame index.
00381 /// See X86InstrInfo::copyPhysReg.
00382 static bool usesTheStack(const MachineFunction &MF) {
00383   const MachineRegisterInfo &MRI = MF.getRegInfo();
00384 
00385   for (MachineRegisterInfo::reg_instr_iterator
00386        ri = MRI.reg_instr_begin(X86::EFLAGS), re = MRI.reg_instr_end();
00387        ri != re; ++ri)
00388     if (ri->isCopy())
00389       return true;
00390 
00391   return false;
00392 }
00393 
00394 void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
00395                                           MachineBasicBlock &MBB,
00396                                           MachineBasicBlock::iterator MBBI,
00397                                           DebugLoc DL) {
00398   const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
00399   const TargetInstrInfo &TII = *STI.getInstrInfo();
00400   bool Is64Bit = STI.is64Bit();
00401   bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
00402 
00403   unsigned CallOp;
00404   if (Is64Bit)
00405     CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
00406   else
00407     CallOp = X86::CALLpcrel32;
00408 
00409   const char *Symbol;
00410   if (Is64Bit) {
00411     if (STI.isTargetCygMing()) {
00412       Symbol = "___chkstk_ms";
00413     } else {
00414       Symbol = "__chkstk";
00415     }
00416   } else if (STI.isTargetCygMing())
00417     Symbol = "_alloca";
00418   else
00419     Symbol = "_chkstk";
00420 
00421   MachineInstrBuilder CI;
00422 
00423   // All current stack probes take AX and SP as input, clobber flags, and
00424   // preserve all registers. x86_64 probes leave RSP unmodified.
00425   if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) {
00426     // For the large code model, we have to call through a register. Use R11,
00427     // as it is scratch in all supported calling conventions.
00428     BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
00429         .addExternalSymbol(Symbol);
00430     CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
00431   } else {
00432     CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addExternalSymbol(Symbol);
00433   }
00434 
00435   unsigned AX = Is64Bit ? X86::RAX : X86::EAX;
00436   unsigned SP = Is64Bit ? X86::RSP : X86::ESP;
00437   CI.addReg(AX, RegState::Implicit)
00438       .addReg(SP, RegState::Implicit)
00439       .addReg(AX, RegState::Define | RegState::Implicit)
00440       .addReg(SP, RegState::Define | RegState::Implicit)
00441       .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
00442 
00443   if (Is64Bit) {
00444     // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
00445     // themselves. It also does not clobber %rax so we can reuse it when
00446     // adjusting %rsp.
00447     BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
00448         .addReg(X86::RSP)
00449         .addReg(X86::RAX);
00450   }
00451 }
00452 
00453 static unsigned calculateSetFPREG(uint64_t SPAdjust) {
00454   // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
00455   // and might require smaller successive adjustments.
00456   const uint64_t Win64MaxSEHOffset = 128;
00457   uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
00458   // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
00459   return SEHFrameOffset & -16;
00460 }
00461 
00462 // If we're forcing a stack realignment we can't rely on just the frame
00463 // info, we need to know the ABI stack alignment as well in case we
00464 // have a call out.  Otherwise just make sure we have some alignment - we'll
00465 // go with the minimum SlotSize.
00466 static uint64_t calculateMaxStackAlign(const MachineFunction &MF) {
00467   const MachineFrameInfo *MFI = MF.getFrameInfo();
00468   uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
00469   const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
00470   const X86RegisterInfo *RegInfo = STI.getRegisterInfo();
00471   unsigned SlotSize = RegInfo->getSlotSize();
00472   unsigned StackAlign = STI.getFrameLowering()->getStackAlignment();
00473   if (ForceStackAlign) {
00474     if (MFI->hasCalls())
00475       MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
00476     else if (MaxAlign < SlotSize)
00477       MaxAlign = SlotSize;
00478   }
00479   return MaxAlign;
00480 }
00481 
00482 /// emitPrologue - Push callee-saved registers onto the stack, which
00483 /// automatically adjust the stack pointer. Adjust the stack pointer to allocate
00484 /// space for local variables. Also emit labels used by the exception handler to
00485 /// generate the exception handling frames.
00486 
00487 /*
00488   Here's a gist of what gets emitted:
00489 
00490   ; Establish frame pointer, if needed
00491   [if needs FP]
00492       push  %rbp
00493       .cfi_def_cfa_offset 16
00494       .cfi_offset %rbp, -16
00495       .seh_pushreg %rpb
00496       mov  %rsp, %rbp
00497       .cfi_def_cfa_register %rbp
00498 
00499   ; Spill general-purpose registers
00500   [for all callee-saved GPRs]
00501       pushq %<reg>
00502       [if not needs FP]
00503          .cfi_def_cfa_offset (offset from RETADDR)
00504       .seh_pushreg %<reg>
00505 
00506   ; If the required stack alignment > default stack alignment
00507   ; rsp needs to be re-aligned.  This creates a "re-alignment gap"
00508   ; of unknown size in the stack frame.
00509   [if stack needs re-alignment]
00510       and  $MASK, %rsp
00511 
00512   ; Allocate space for locals
00513   [if target is Windows and allocated space > 4096 bytes]
00514       ; Windows needs special care for allocations larger
00515       ; than one page.
00516       mov $NNN, %rax
00517       call ___chkstk_ms/___chkstk
00518       sub  %rax, %rsp
00519   [else]
00520       sub  $NNN, %rsp
00521 
00522   [if needs FP]
00523       .seh_stackalloc (size of XMM spill slots)
00524       .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
00525   [else]
00526       .seh_stackalloc NNN
00527 
00528   ; Spill XMMs
00529   ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
00530   ; they may get spilled on any platform, if the current function
00531   ; calls @llvm.eh.unwind.init
00532   [if needs FP]
00533       [for all callee-saved XMM registers]
00534           movaps  %<xmm reg>, -MMM(%rbp)
00535       [for all callee-saved XMM registers]
00536           .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
00537               ; i.e. the offset relative to (%rbp - SEHFrameOffset)
00538   [else]
00539       [for all callee-saved XMM registers]
00540           movaps  %<xmm reg>, KKK(%rsp)
00541       [for all callee-saved XMM registers]
00542           .seh_savexmm %<xmm reg>, KKK
00543 
00544   .seh_endprologue
00545 
00546   [if needs base pointer]
00547       mov  %rsp, %rbx
00548       [if needs to restore base pointer]
00549           mov %rsp, -MMM(%rbp)
00550 
00551   ; Emit CFI info
00552   [if needs FP]
00553       [for all callee-saved registers]
00554           .cfi_offset %<reg>, (offset from %rbp)
00555   [else]
00556        .cfi_def_cfa_offset (offset from RETADDR)
00557       [for all callee-saved registers]
00558           .cfi_offset %<reg>, (offset from %rsp)
00559 
00560   Notes:
00561   - .seh directives are emitted only for Windows 64 ABI
00562   - .cfi directives are emitted for all other ABIs
00563   - for 32-bit code, substitute %e?? registers for %r??
00564 */
00565 
00566 void X86FrameLowering::emitPrologue(MachineFunction &MF,
00567                                     MachineBasicBlock &MBB) const {
00568   MachineBasicBlock::iterator MBBI = MBB.begin();
00569   MachineFrameInfo *MFI = MF.getFrameInfo();
00570   const Function *Fn = MF.getFunction();
00571   const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
00572   const X86RegisterInfo *RegInfo = STI.getRegisterInfo();
00573   const TargetInstrInfo &TII = *STI.getInstrInfo();
00574   MachineModuleInfo &MMI = MF.getMMI();
00575   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
00576   uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
00577   uint64_t StackSize = MFI->getStackSize();    // Number of bytes to allocate.
00578   bool HasFP = hasFP(MF);
00579   bool Is64Bit = STI.is64Bit();
00580   // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
00581   const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
00582   bool IsWin64 = STI.isCallingConvWin64(Fn->getCallingConv());
00583   // Not necessarily synonymous with IsWin64.
00584   bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
00585   bool NeedsWinEH = IsWinEH && Fn->needsUnwindTableEntry();
00586   bool NeedsDwarfCFI =
00587       !IsWinEH && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
00588   bool UseLEA = STI.useLeaForSP();
00589   unsigned SlotSize = RegInfo->getSlotSize();
00590   unsigned FramePtr = RegInfo->getFrameRegister(MF);
00591   const unsigned MachineFramePtr =
00592       STI.isTarget64BitILP32()
00593           ? getX86SubSuperRegister(FramePtr, MVT::i64, false)
00594           : FramePtr;
00595   unsigned StackPtr = RegInfo->getStackRegister();
00596   unsigned BasePtr = RegInfo->getBaseRegister();
00597   DebugLoc DL;
00598 
00599   // Add RETADDR move area to callee saved frame size.
00600   int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
00601   if (TailCallReturnAddrDelta && IsWinEH)
00602     report_fatal_error("Can't handle guaranteed tail call under win64 yet");
00603 
00604   if (TailCallReturnAddrDelta < 0)
00605     X86FI->setCalleeSavedFrameSize(
00606       X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
00607 
00608   bool UseStackProbe = (STI.isOSWindows() && !STI.isTargetMachO());
00609 
00610   // The default stack probe size is 4096 if the function has no stackprobesize
00611   // attribute.
00612   unsigned StackProbeSize = 4096;
00613   if (Fn->hasFnAttribute("stack-probe-size"))
00614     Fn->getFnAttribute("stack-probe-size")
00615         .getValueAsString()
00616         .getAsInteger(0, StackProbeSize);
00617 
00618   // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
00619   // function, and use up to 128 bytes of stack space, don't have a frame
00620   // pointer, calls, or dynamic alloca then we do not need to adjust the
00621   // stack pointer (we fit in the Red Zone). We also check that we don't
00622   // push and pop from the stack.
00623   if (Is64Bit && !Fn->hasFnAttribute(Attribute::NoRedZone) &&
00624       !RegInfo->needsStackRealignment(MF) &&
00625       !MFI->hasVarSizedObjects() && // No dynamic alloca.
00626       !MFI->adjustsStack() &&       // No calls.
00627       !IsWin64 &&                   // Win64 has no Red Zone
00628       !usesTheStack(MF) &&          // Don't push and pop.
00629       !MF.shouldSplitStack()) {     // Regular stack
00630     uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
00631     if (HasFP) MinSize += SlotSize;
00632     StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
00633     MFI->setStackSize(StackSize);
00634   }
00635 
00636   // Insert stack pointer adjustment for later moving of return addr.  Only
00637   // applies to tail call optimized functions where the callee argument stack
00638   // size is bigger than the callers.
00639   if (TailCallReturnAddrDelta < 0) {
00640     MachineInstr *MI =
00641       BuildMI(MBB, MBBI, DL,
00642               TII.get(getSUBriOpcode(Uses64BitFramePtr, -TailCallReturnAddrDelta)),
00643               StackPtr)
00644         .addReg(StackPtr)
00645         .addImm(-TailCallReturnAddrDelta)
00646         .setMIFlag(MachineInstr::FrameSetup);
00647     MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
00648   }
00649 
00650   // Mapping for machine moves:
00651   //
00652   //   DST: VirtualFP AND
00653   //        SRC: VirtualFP              => DW_CFA_def_cfa_offset
00654   //        ELSE                        => DW_CFA_def_cfa
00655   //
00656   //   SRC: VirtualFP AND
00657   //        DST: Register               => DW_CFA_def_cfa_register
00658   //
00659   //   ELSE
00660   //        OFFSET < 0                  => DW_CFA_offset_extended_sf
00661   //        REG < 64                    => DW_CFA_offset + Reg
00662   //        ELSE                        => DW_CFA_offset_extended
00663 
00664   uint64_t NumBytes = 0;
00665   int stackGrowth = -SlotSize;
00666 
00667   if (HasFP) {
00668     // Calculate required stack adjustment.
00669     uint64_t FrameSize = StackSize - SlotSize;
00670     // If required, include space for extra hidden slot for stashing base pointer.
00671     if (X86FI->getRestoreBasePointer())
00672       FrameSize += SlotSize;
00673 
00674     NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
00675 
00676     // Callee-saved registers are pushed on stack before the stack is realigned.
00677     if (RegInfo->needsStackRealignment(MF) && !IsWinEH)
00678       NumBytes = RoundUpToAlignment(NumBytes, MaxAlign);
00679 
00680     // Get the offset of the stack slot for the EBP register, which is
00681     // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
00682     // Update the frame offset adjustment.
00683     MFI->setOffsetAdjustment(-NumBytes);
00684 
00685     // Save EBP/RBP into the appropriate stack slot.
00686     BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
00687       .addReg(MachineFramePtr, RegState::Kill)
00688       .setMIFlag(MachineInstr::FrameSetup);
00689 
00690     if (NeedsDwarfCFI) {
00691       // Mark the place where EBP/RBP was saved.
00692       // Define the current CFA rule to use the provided offset.
00693       assert(StackSize);
00694       unsigned CFIIndex = MMI.addFrameInst(
00695           MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth));
00696       BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
00697           .addCFIIndex(CFIIndex);
00698 
00699       // Change the rule for the FramePtr to be an "offset" rule.
00700       unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true);
00701       CFIIndex = MMI.addFrameInst(
00702           MCCFIInstruction::createOffset(nullptr,
00703                                          DwarfFramePtr, 2 * stackGrowth));
00704       BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
00705           .addCFIIndex(CFIIndex);
00706     }
00707 
00708     if (NeedsWinEH) {
00709       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
00710           .addImm(FramePtr)
00711           .setMIFlag(MachineInstr::FrameSetup);
00712     }
00713 
00714     if (!IsWinEH) {
00715       // Update EBP with the new base value.
00716       BuildMI(MBB, MBBI, DL,
00717               TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
00718               FramePtr)
00719           .addReg(StackPtr)
00720           .setMIFlag(MachineInstr::FrameSetup);
00721     }
00722 
00723     if (NeedsDwarfCFI) {
00724       // Mark effective beginning of when frame pointer becomes valid.
00725       // Define the current CFA to use the EBP/RBP register.
00726       unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true);
00727       unsigned CFIIndex = MMI.addFrameInst(
00728           MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr));
00729       BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
00730           .addCFIIndex(CFIIndex);
00731     }
00732 
00733     // Mark the FramePtr as live-in in every block.
00734     for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
00735       I->addLiveIn(MachineFramePtr);
00736   } else {
00737     NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
00738   }
00739 
00740   // Skip the callee-saved push instructions.
00741   bool PushedRegs = false;
00742   int StackOffset = 2 * stackGrowth;
00743 
00744   while (MBBI != MBB.end() &&
00745          (MBBI->getOpcode() == X86::PUSH32r ||
00746           MBBI->getOpcode() == X86::PUSH64r)) {
00747     PushedRegs = true;
00748     unsigned Reg = MBBI->getOperand(0).getReg();
00749     ++MBBI;
00750 
00751     if (!HasFP && NeedsDwarfCFI) {
00752       // Mark callee-saved push instruction.
00753       // Define the current CFA rule to use the provided offset.
00754       assert(StackSize);
00755       unsigned CFIIndex = MMI.addFrameInst(
00756           MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset));
00757       BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
00758           .addCFIIndex(CFIIndex);
00759       StackOffset += stackGrowth;
00760     }
00761 
00762     if (NeedsWinEH) {
00763       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)).addImm(Reg).setMIFlag(
00764           MachineInstr::FrameSetup);
00765     }
00766   }
00767 
00768   // Realign stack after we pushed callee-saved registers (so that we'll be
00769   // able to calculate their offsets from the frame pointer).
00770   // Don't do this for Win64, it needs to realign the stack after the prologue.
00771   if (!IsWinEH && RegInfo->needsStackRealignment(MF)) {
00772     assert(HasFP && "There should be a frame pointer if stack is realigned.");
00773     uint64_t Val = -MaxAlign;
00774     MachineInstr *MI =
00775         BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)),
00776                 StackPtr)
00777             .addReg(StackPtr)
00778             .addImm(Val)
00779             .setMIFlag(MachineInstr::FrameSetup);
00780 
00781     // The EFLAGS implicit def is dead.
00782     MI->getOperand(3).setIsDead();
00783   }
00784 
00785   // If there is an SUB32ri of ESP immediately before this instruction, merge
00786   // the two. This can be the case when tail call elimination is enabled and
00787   // the callee has more arguments then the caller.
00788   NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
00789 
00790   // Adjust stack pointer: ESP -= numbytes.
00791 
00792   // Windows and cygwin/mingw require a prologue helper routine when allocating
00793   // more than 4K bytes on the stack.  Windows uses __chkstk and cygwin/mingw
00794   // uses __alloca.  __alloca and the 32-bit version of __chkstk will probe the
00795   // stack and adjust the stack pointer in one go.  The 64-bit version of
00796   // __chkstk is only responsible for probing the stack.  The 64-bit prologue is
00797   // responsible for adjusting the stack pointer.  Touching the stack at 4K
00798   // increments is necessary to ensure that the guard pages used by the OS
00799   // virtual memory manager are allocated in correct sequence.
00800   uint64_t AlignedNumBytes = NumBytes;
00801   if (IsWinEH && RegInfo->needsStackRealignment(MF))
00802     AlignedNumBytes = RoundUpToAlignment(AlignedNumBytes, MaxAlign);
00803   if (AlignedNumBytes >= StackProbeSize && UseStackProbe) {
00804     // Check whether EAX is livein for this function.
00805     bool isEAXAlive = isEAXLiveIn(MF);
00806 
00807     if (isEAXAlive) {
00808       // Sanity check that EAX is not livein for this function.
00809       // It should not be, so throw an assert.
00810       assert(!Is64Bit && "EAX is livein in x64 case!");
00811 
00812       // Save EAX
00813       BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
00814         .addReg(X86::EAX, RegState::Kill)
00815         .setMIFlag(MachineInstr::FrameSetup);
00816     }
00817 
00818     if (Is64Bit) {
00819       // Handle the 64-bit Windows ABI case where we need to call __chkstk.
00820       // Function prologue is responsible for adjusting the stack pointer.
00821       if (isUInt<32>(NumBytes)) {
00822         BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
00823             .addImm(NumBytes)
00824             .setMIFlag(MachineInstr::FrameSetup);
00825       } else if (isInt<32>(NumBytes)) {
00826         BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri32), X86::RAX)
00827             .addImm(NumBytes)
00828             .setMIFlag(MachineInstr::FrameSetup);
00829       } else {
00830         BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX)
00831             .addImm(NumBytes)
00832             .setMIFlag(MachineInstr::FrameSetup);
00833       }
00834     } else {
00835       // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
00836       // We'll also use 4 already allocated bytes for EAX.
00837       BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
00838         .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
00839         .setMIFlag(MachineInstr::FrameSetup);
00840     }
00841 
00842     // Save a pointer to the MI where we set AX.
00843     MachineBasicBlock::iterator SetRAX = MBBI;
00844     --SetRAX;
00845 
00846     // Call __chkstk, __chkstk_ms, or __alloca.
00847     emitStackProbeCall(MF, MBB, MBBI, DL);
00848 
00849     // Apply the frame setup flag to all inserted instrs.
00850     for (; SetRAX != MBBI; ++SetRAX)
00851       SetRAX->setFlag(MachineInstr::FrameSetup);
00852 
00853     if (isEAXAlive) {
00854       // Restore EAX
00855       MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
00856                                               X86::EAX),
00857                                       StackPtr, false, NumBytes - 4);
00858       MI->setFlag(MachineInstr::FrameSetup);
00859       MBB.insert(MBBI, MI);
00860     }
00861   } else if (NumBytes) {
00862     emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, Uses64BitFramePtr,
00863                  UseLEA, TII, *RegInfo);
00864   }
00865 
00866   if (NeedsWinEH && NumBytes)
00867     BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
00868         .addImm(NumBytes)
00869         .setMIFlag(MachineInstr::FrameSetup);
00870 
00871   int SEHFrameOffset = 0;
00872   if (IsWinEH && HasFP) {
00873     SEHFrameOffset = calculateSetFPREG(NumBytes);
00874     if (SEHFrameOffset)
00875       addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
00876                    StackPtr, false, SEHFrameOffset);
00877     else
00878       BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr).addReg(StackPtr);
00879 
00880     if (NeedsWinEH)
00881       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
00882           .addImm(FramePtr)
00883           .addImm(SEHFrameOffset)
00884           .setMIFlag(MachineInstr::FrameSetup);
00885   }
00886 
00887   while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
00888     const MachineInstr *FrameInstr = &*MBBI;
00889     ++MBBI;
00890 
00891     if (NeedsWinEH) {
00892       int FI;
00893       if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
00894         if (X86::FR64RegClass.contains(Reg)) {
00895           int Offset = getFrameIndexOffset(MF, FI);
00896           Offset += SEHFrameOffset;
00897 
00898           BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
00899               .addImm(Reg)
00900               .addImm(Offset)
00901               .setMIFlag(MachineInstr::FrameSetup);
00902         }
00903       }
00904     }
00905   }
00906 
00907   if (NeedsWinEH)
00908     BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
00909         .setMIFlag(MachineInstr::FrameSetup);
00910 
00911   // Realign stack after we spilled callee-saved registers (so that we'll be
00912   // able to calculate their offsets from the frame pointer).
00913   // Win64 requires aligning the stack after the prologue.
00914   if (IsWinEH && RegInfo->needsStackRealignment(MF)) {
00915     assert(HasFP && "There should be a frame pointer if stack is realigned.");
00916     uint64_t Val = -MaxAlign;
00917     MachineInstr *MI =
00918         BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)),
00919                 StackPtr)
00920             .addReg(StackPtr)
00921             .addImm(Val)
00922             .setMIFlag(MachineInstr::FrameSetup);
00923 
00924     // The EFLAGS implicit def is dead.
00925     MI->getOperand(3).setIsDead();
00926   }
00927 
00928   // If we need a base pointer, set it up here. It's whatever the value
00929   // of the stack pointer is at this point. Any variable size objects
00930   // will be allocated after this, so we can still use the base pointer
00931   // to reference locals.
00932   if (RegInfo->hasBasePointer(MF)) {
00933     // Update the base pointer with the current stack pointer.
00934     unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
00935     BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
00936       .addReg(StackPtr)
00937       .setMIFlag(MachineInstr::FrameSetup);
00938     if (X86FI->getRestoreBasePointer()) {
00939       // Stash value of base pointer.  Saving RSP instead of EBP shortens dependence chain.
00940       unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
00941       addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)),
00942                    FramePtr, true, X86FI->getRestoreBasePointerOffset())
00943         .addReg(StackPtr)
00944         .setMIFlag(MachineInstr::FrameSetup);
00945     }
00946   }
00947 
00948   if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
00949     // Mark end of stack pointer adjustment.
00950     if (!HasFP && NumBytes) {
00951       // Define the current CFA rule to use the provided offset.
00952       assert(StackSize);
00953       unsigned CFIIndex = MMI.addFrameInst(
00954           MCCFIInstruction::createDefCfaOffset(nullptr,
00955                                                -StackSize + stackGrowth));
00956 
00957       BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
00958           .addCFIIndex(CFIIndex);
00959     }
00960 
00961     // Emit DWARF info specifying the offsets of the callee-saved registers.
00962     if (PushedRegs)
00963       emitCalleeSavedFrameMoves(MBB, MBBI, DL);
00964   }
00965 }
00966 
00967 bool X86FrameLowering::canUseLEAForSPInEpilogue(
00968     const MachineFunction &MF) const {
00969   // We can't use LEA instructions for adjusting the stack pointer if this is a
00970   // leaf function in the Win64 ABI.  Only ADD instructions may be used to
00971   // deallocate the stack.
00972   // This means that we can use LEA for SP in two situations:
00973   // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
00974   // 2. We *have* a frame pointer which means we are permitted to use LEA.
00975   return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
00976 }
00977 
00978 /// Check whether or not the terminators of \p MBB needs to read EFLAGS.
00979 static bool terminatorsNeedFlagsAsInput(const MachineBasicBlock &MBB) {
00980   for (const MachineInstr &MI : MBB.terminators()) {
00981     bool BreakNext = false;
00982     for (const MachineOperand &MO : MI.operands()) {
00983       if (!MO.isReg())
00984         continue;
00985       unsigned Reg = MO.getReg();
00986       if (Reg != X86::EFLAGS)
00987         continue;
00988 
00989       // This terminator needs an eflag that is not defined
00990       // by a previous terminator.
00991       if (!MO.isDef())
00992         return true;
00993       BreakNext = true;
00994     }
00995     if (BreakNext)
00996       break;
00997   }
00998   return false;
00999 }
01000 
01001 void X86FrameLowering::emitEpilogue(MachineFunction &MF,
01002                                     MachineBasicBlock &MBB) const {
01003   const MachineFrameInfo *MFI = MF.getFrameInfo();
01004   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
01005   const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
01006   const X86RegisterInfo *RegInfo = STI.getRegisterInfo();
01007   const TargetInstrInfo &TII = *STI.getInstrInfo();
01008   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
01009   DebugLoc DL;
01010   if (MBBI != MBB.end())
01011     DL = MBBI->getDebugLoc();
01012   bool Is64Bit = STI.is64Bit();
01013   // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
01014   const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
01015   const bool Is64BitILP32 = STI.isTarget64BitILP32();
01016   unsigned SlotSize = RegInfo->getSlotSize();
01017   unsigned FramePtr = RegInfo->getFrameRegister(MF);
01018   unsigned MachineFramePtr =
01019       Is64BitILP32 ? getX86SubSuperRegister(FramePtr, MVT::i64, false)
01020                    : FramePtr;
01021   unsigned StackPtr = RegInfo->getStackRegister();
01022 
01023   bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
01024   bool NeedsWinEH = IsWinEH && MF.getFunction()->needsUnwindTableEntry();
01025   bool UseLEAForSP = canUseLEAForSPInEpilogue(MF);
01026   // If we can use LEA for SP but we shouldn't, check that none
01027   // of the terminators uses the eflags. Otherwise we will insert
01028   // a ADD that will redefine the eflags and break the condition.
01029   // Alternatively, we could move the ADD, but this may not be possible
01030   // and is an optimization anyway.
01031   if (UseLEAForSP && !MF.getSubtarget<X86Subtarget>().useLeaForSP())
01032     UseLEAForSP = terminatorsNeedFlagsAsInput(MBB);
01033   // If that assert breaks, that means we do not do the right thing
01034   // in canUseAsEpilogue.
01035   assert((UseLEAForSP || !terminatorsNeedFlagsAsInput(MBB)) &&
01036          "We shouldn't have allowed this insertion point");
01037 
01038   // Get the number of bytes to allocate from the FrameInfo.
01039   uint64_t StackSize = MFI->getStackSize();
01040   uint64_t MaxAlign = calculateMaxStackAlign(MF);
01041   unsigned CSSize = X86FI->getCalleeSavedFrameSize();
01042   uint64_t NumBytes = 0;
01043 
01044   if (hasFP(MF)) {
01045     // Calculate required stack adjustment.
01046     uint64_t FrameSize = StackSize - SlotSize;
01047     NumBytes = FrameSize - CSSize;
01048 
01049     // Callee-saved registers were pushed on stack before the stack was
01050     // realigned.
01051     if (RegInfo->needsStackRealignment(MF) && !IsWinEH)
01052       NumBytes = RoundUpToAlignment(FrameSize, MaxAlign);
01053 
01054     // Pop EBP.
01055     BuildMI(MBB, MBBI, DL,
01056             TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr);
01057   } else {
01058     NumBytes = StackSize - CSSize;
01059   }
01060   uint64_t SEHStackAllocAmt = NumBytes;
01061 
01062   // Skip the callee-saved pop instructions.
01063   while (MBBI != MBB.begin()) {
01064     MachineBasicBlock::iterator PI = std::prev(MBBI);
01065     unsigned Opc = PI->getOpcode();
01066 
01067     if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE &&
01068         !PI->isTerminator())
01069       break;
01070 
01071     --MBBI;
01072   }
01073   MachineBasicBlock::iterator FirstCSPop = MBBI;
01074 
01075   if (MBBI != MBB.end())
01076     DL = MBBI->getDebugLoc();
01077 
01078   // If there is an ADD32ri or SUB32ri of ESP immediately before this
01079   // instruction, merge the two instructions.
01080   if (NumBytes || MFI->hasVarSizedObjects())
01081     mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
01082 
01083   // If dynamic alloca is used, then reset esp to point to the last callee-saved
01084   // slot before popping them off! Same applies for the case, when stack was
01085   // realigned.
01086   if (RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) {
01087     if (RegInfo->needsStackRealignment(MF))
01088       MBBI = FirstCSPop;
01089     unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
01090     uint64_t LEAAmount = IsWinEH ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
01091 
01092     // There are only two legal forms of epilogue:
01093     // - add SEHAllocationSize, %rsp
01094     // - lea SEHAllocationSize(%FramePtr), %rsp
01095     //
01096     // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
01097     // However, we may use this sequence if we have a frame pointer because the
01098     // effects of the prologue can safely be undone.
01099     if (LEAAmount != 0) {
01100       unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
01101       addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
01102                    FramePtr, false, LEAAmount);
01103       --MBBI;
01104     } else {
01105       unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
01106       BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
01107         .addReg(FramePtr);
01108       --MBBI;
01109     }
01110   } else if (NumBytes) {
01111     // Adjust stack pointer back: ESP += numbytes.
01112     emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, Uses64BitFramePtr,
01113                  UseLEAForSP, TII, *RegInfo);
01114     --MBBI;
01115   }
01116 
01117   // Windows unwinder will not invoke function's exception handler if IP is
01118   // either in prologue or in epilogue.  This behavior causes a problem when a
01119   // call immediately precedes an epilogue, because the return address points
01120   // into the epilogue.  To cope with that, we insert an epilogue marker here,
01121   // then replace it with a 'nop' if it ends up immediately after a CALL in the
01122   // final emitted code.
01123   if (NeedsWinEH)
01124     BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
01125 
01126   // Add the return addr area delta back since we are not tail calling.
01127   int Offset = -1 * X86FI->getTCReturnAddrDelta();
01128   assert(Offset >= 0 && "TCDelta should never be positive");
01129   if (Offset) {
01130     MBBI = MBB.getFirstTerminator();
01131 
01132     // Check for possible merge with preceding ADD instruction.
01133     Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
01134     emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, Uses64BitFramePtr,
01135                  UseLEAForSP, TII, *RegInfo);
01136   }
01137 }
01138 
01139 int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
01140                                           int FI) const {
01141   const X86RegisterInfo *RegInfo =
01142       MF.getSubtarget<X86Subtarget>().getRegisterInfo();
01143   const MachineFrameInfo *MFI = MF.getFrameInfo();
01144   // Offset will hold the offset from the stack pointer at function entry to the
01145   // object.
01146   // We need to factor in additional offsets applied during the prologue to the
01147   // frame, base, and stack pointer depending on which is used.
01148   int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
01149   const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
01150   unsigned CSSize = X86FI->getCalleeSavedFrameSize();
01151   uint64_t StackSize = MFI->getStackSize();
01152   unsigned SlotSize = RegInfo->getSlotSize();
01153   bool HasFP = hasFP(MF);
01154   bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
01155   int64_t FPDelta = 0;
01156 
01157   if (IsWinEH) {
01158     assert(!MFI->hasCalls() || (StackSize % 16) == 8);
01159 
01160     // Calculate required stack adjustment.
01161     uint64_t FrameSize = StackSize - SlotSize;
01162     // If required, include space for extra hidden slot for stashing base pointer.
01163     if (X86FI->getRestoreBasePointer())
01164       FrameSize += SlotSize;
01165     uint64_t NumBytes = FrameSize - CSSize;
01166 
01167     uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
01168     if (FI && FI == X86FI->getFAIndex())
01169       return -SEHFrameOffset;
01170 
01171     // FPDelta is the offset from the "traditional" FP location of the old base
01172     // pointer followed by return address and the location required by the
01173     // restricted Win64 prologue.
01174     // Add FPDelta to all offsets below that go through the frame pointer.
01175     FPDelta = FrameSize - SEHFrameOffset;
01176     assert((!MFI->hasCalls() || (FPDelta % 16) == 0) &&
01177            "FPDelta isn't aligned per the Win64 ABI!");
01178   }
01179 
01180 
01181   if (RegInfo->hasBasePointer(MF)) {
01182     assert(HasFP && "VLAs and dynamic stack realign, but no FP?!");
01183     if (FI < 0) {
01184       // Skip the saved EBP.
01185       return Offset + SlotSize + FPDelta;
01186     } else {
01187       assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
01188       return Offset + StackSize;
01189     }
01190   } else if (RegInfo->needsStackRealignment(MF)) {
01191     if (FI < 0) {
01192       // Skip the saved EBP.
01193       return Offset + SlotSize + FPDelta;
01194     } else {
01195       assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
01196       return Offset + StackSize;
01197     }
01198     // FIXME: Support tail calls
01199   } else {
01200     if (!HasFP)
01201       return Offset + StackSize;
01202 
01203     // Skip the saved EBP.
01204     Offset += SlotSize;
01205 
01206     // Skip the RETADDR move area
01207     int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
01208     if (TailCallReturnAddrDelta < 0)
01209       Offset -= TailCallReturnAddrDelta;
01210   }
01211 
01212   return Offset + FPDelta;
01213 }
01214 
01215 int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
01216                                              unsigned &FrameReg) const {
01217   const X86RegisterInfo *RegInfo =
01218       MF.getSubtarget<X86Subtarget>().getRegisterInfo();
01219   // We can't calculate offset from frame pointer if the stack is realigned,
01220   // so enforce usage of stack/base pointer.  The base pointer is used when we
01221   // have dynamic allocas in addition to dynamic realignment.
01222   if (RegInfo->hasBasePointer(MF))
01223     FrameReg = RegInfo->getBaseRegister();
01224   else if (RegInfo->needsStackRealignment(MF))
01225     FrameReg = RegInfo->getStackRegister();
01226   else
01227     FrameReg = RegInfo->getFrameRegister(MF);
01228   return getFrameIndexOffset(MF, FI);
01229 }
01230 
01231 // Simplified from getFrameIndexOffset keeping only StackPointer cases
01232 int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int FI) const {
01233   const MachineFrameInfo *MFI = MF.getFrameInfo();
01234   // Does not include any dynamic realign.
01235   const uint64_t StackSize = MFI->getStackSize();
01236   {
01237 #ifndef NDEBUG
01238     const X86RegisterInfo *RegInfo =
01239         MF.getSubtarget<X86Subtarget>().getRegisterInfo();
01240     // Note: LLVM arranges the stack as:
01241     // Args > Saved RetPC (<--FP) > CSRs > dynamic alignment (<--BP)
01242     //      > "Stack Slots" (<--SP)
01243     // We can always address StackSlots from RSP.  We can usually (unless
01244     // needsStackRealignment) address CSRs from RSP, but sometimes need to
01245     // address them from RBP.  FixedObjects can be placed anywhere in the stack
01246     // frame depending on their specific requirements (i.e. we can actually
01247     // refer to arguments to the function which are stored in the *callers*
01248     // frame).  As a result, THE RESULT OF THIS CALL IS MEANINGLESS FOR CSRs
01249     // AND FixedObjects IFF needsStackRealignment or hasVarSizedObject.
01250 
01251     assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case");
01252 
01253     // We don't handle tail calls, and shouldn't be seeing them
01254     // either.
01255     int TailCallReturnAddrDelta =
01256         MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta();
01257     assert(!(TailCallReturnAddrDelta < 0) && "we don't handle this case!");
01258 #endif
01259   }
01260 
01261   // This is how the math works out:
01262   //
01263   //  %rsp grows (i.e. gets lower) left to right. Each box below is
01264   //  one word (eight bytes).  Obj0 is the stack slot we're trying to
01265   //  get to.
01266   //
01267   //    ----------------------------------
01268   //    | BP | Obj0 | Obj1 | ... | ObjN |
01269   //    ----------------------------------
01270   //    ^    ^      ^                   ^
01271   //    A    B      C                   E
01272   //
01273   // A is the incoming stack pointer.
01274   // (B - A) is the local area offset (-8 for x86-64) [1]
01275   // (C - A) is the Offset returned by MFI->getObjectOffset for Obj0 [2]
01276   //
01277   // |(E - B)| is the StackSize (absolute value, positive).  For a
01278   // stack that grown down, this works out to be (B - E). [3]
01279   //
01280   // E is also the value of %rsp after stack has been set up, and we
01281   // want (C - E) -- the value we can add to %rsp to get to Obj0.  Now
01282   // (C - E) == (C - A) - (B - A) + (B - E)
01283   //            { Using [1], [2] and [3] above }
01284   //         == getObjectOffset - LocalAreaOffset + StackSize
01285   //
01286 
01287   // Get the Offset from the StackPointer
01288   int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
01289 
01290   return Offset + StackSize;
01291 }
01292 // Simplified from getFrameIndexReference keeping only StackPointer cases
01293 int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
01294                                                    int FI,
01295                                                    unsigned &FrameReg) const {
01296   const X86RegisterInfo *RegInfo =
01297       MF.getSubtarget<X86Subtarget>().getRegisterInfo();
01298   assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case");
01299 
01300   FrameReg = RegInfo->getStackRegister();
01301   return getFrameIndexOffsetFromSP(MF, FI);
01302 }
01303 
01304 bool X86FrameLowering::assignCalleeSavedSpillSlots(
01305     MachineFunction &MF, const TargetRegisterInfo *TRI,
01306     std::vector<CalleeSavedInfo> &CSI) const {
01307   MachineFrameInfo *MFI = MF.getFrameInfo();
01308   const X86RegisterInfo *RegInfo =
01309       MF.getSubtarget<X86Subtarget>().getRegisterInfo();
01310   unsigned SlotSize = RegInfo->getSlotSize();
01311   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
01312 
01313   unsigned CalleeSavedFrameSize = 0;
01314   int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
01315 
01316   if (hasFP(MF)) {
01317     // emitPrologue always spills frame register the first thing.
01318     SpillSlotOffset -= SlotSize;
01319     MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
01320 
01321     // Since emitPrologue and emitEpilogue will handle spilling and restoring of
01322     // the frame register, we can delete it from CSI list and not have to worry
01323     // about avoiding it later.
01324     unsigned FPReg = RegInfo->getFrameRegister(MF);
01325     for (unsigned i = 0; i < CSI.size(); ++i) {
01326       if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) {
01327         CSI.erase(CSI.begin() + i);
01328         break;
01329       }
01330     }
01331   }
01332 
01333   // Assign slots for GPRs. It increases frame size.
01334   for (unsigned i = CSI.size(); i != 0; --i) {
01335     unsigned Reg = CSI[i - 1].getReg();
01336 
01337     if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
01338       continue;
01339 
01340     SpillSlotOffset -= SlotSize;
01341     CalleeSavedFrameSize += SlotSize;
01342 
01343     int SlotIndex = MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
01344     CSI[i - 1].setFrameIdx(SlotIndex);
01345   }
01346 
01347   X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
01348 
01349   // Assign slots for XMMs.
01350   for (unsigned i = CSI.size(); i != 0; --i) {
01351     unsigned Reg = CSI[i - 1].getReg();
01352     if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
01353       continue;
01354 
01355     const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
01356     // ensure alignment
01357     SpillSlotOffset -= std::abs(SpillSlotOffset) % RC->getAlignment();
01358     // spill into slot
01359     SpillSlotOffset -= RC->getSize();
01360     int SlotIndex =
01361         MFI->CreateFixedSpillStackObject(RC->getSize(), SpillSlotOffset);
01362     CSI[i - 1].setFrameIdx(SlotIndex);
01363     MFI->ensureMaxAlignment(RC->getAlignment());
01364   }
01365 
01366   return true;
01367 }
01368 
01369 bool X86FrameLowering::spillCalleeSavedRegisters(
01370     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
01371     const std::vector<CalleeSavedInfo> &CSI,
01372     const TargetRegisterInfo *TRI) const {
01373   DebugLoc DL = MBB.findDebugLoc(MI);
01374 
01375   MachineFunction &MF = *MBB.getParent();
01376   const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
01377   const TargetInstrInfo &TII = *STI.getInstrInfo();
01378 
01379   // Push GPRs. It increases frame size.
01380   unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
01381   for (unsigned i = CSI.size(); i != 0; --i) {
01382     unsigned Reg = CSI[i - 1].getReg();
01383 
01384     if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
01385       continue;
01386     // Add the callee-saved register as live-in. It's killed at the spill.
01387     MBB.addLiveIn(Reg);
01388 
01389     BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill)
01390       .setMIFlag(MachineInstr::FrameSetup);
01391   }
01392 
01393   // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
01394   // It can be done by spilling XMMs to stack frame.
01395   for (unsigned i = CSI.size(); i != 0; --i) {
01396     unsigned Reg = CSI[i-1].getReg();
01397     if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
01398       continue;
01399     // Add the callee-saved register as live-in. It's killed at the spill.
01400     MBB.addLiveIn(Reg);
01401     const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
01402 
01403     TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC,
01404                             TRI);
01405     --MI;
01406     MI->setFlag(MachineInstr::FrameSetup);
01407     ++MI;
01408   }
01409 
01410   return true;
01411 }
01412 
01413 bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
01414                                                MachineBasicBlock::iterator MI,
01415                                         const std::vector<CalleeSavedInfo> &CSI,
01416                                           const TargetRegisterInfo *TRI) const {
01417   if (CSI.empty())
01418     return false;
01419 
01420   DebugLoc DL = MBB.findDebugLoc(MI);
01421 
01422   MachineFunction &MF = *MBB.getParent();
01423   const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
01424   const TargetInstrInfo &TII = *STI.getInstrInfo();
01425 
01426   // Reload XMMs from stack frame.
01427   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
01428     unsigned Reg = CSI[i].getReg();
01429     if (X86::GR64RegClass.contains(Reg) ||
01430         X86::GR32RegClass.contains(Reg))
01431       continue;
01432 
01433     const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
01434     TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI);
01435   }
01436 
01437   // POP GPRs.
01438   unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
01439   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
01440     unsigned Reg = CSI[i].getReg();
01441     if (!X86::GR64RegClass.contains(Reg) &&
01442         !X86::GR32RegClass.contains(Reg))
01443       continue;
01444 
01445     BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
01446   }
01447   return true;
01448 }
01449 
01450 void
01451 X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
01452                                                        RegScavenger *RS) const {
01453   MachineFrameInfo *MFI = MF.getFrameInfo();
01454   const X86RegisterInfo *RegInfo =
01455       MF.getSubtarget<X86Subtarget>().getRegisterInfo();
01456   unsigned SlotSize = RegInfo->getSlotSize();
01457 
01458   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
01459   int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
01460 
01461   if (TailCallReturnAddrDelta < 0) {
01462     // create RETURNADDR area
01463     //   arg
01464     //   arg
01465     //   RETADDR
01466     //   { ...
01467     //     RETADDR area
01468     //     ...
01469     //   }
01470     //   [EBP]
01471     MFI->CreateFixedObject(-TailCallReturnAddrDelta,
01472                            TailCallReturnAddrDelta - SlotSize, true);
01473   }
01474 
01475   // Spill the BasePtr if it's used.
01476   if (RegInfo->hasBasePointer(MF))
01477     MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
01478 }
01479 
01480 static bool
01481 HasNestArgument(const MachineFunction *MF) {
01482   const Function *F = MF->getFunction();
01483   for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
01484        I != E; I++) {
01485     if (I->hasNestAttr())
01486       return true;
01487   }
01488   return false;
01489 }
01490 
01491 /// GetScratchRegister - Get a temp register for performing work in the
01492 /// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
01493 /// and the properties of the function either one or two registers will be
01494 /// needed. Set primary to true for the first register, false for the second.
01495 static unsigned
01496 GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary) {
01497   CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
01498 
01499   // Erlang stuff.
01500   if (CallingConvention == CallingConv::HiPE) {
01501     if (Is64Bit)
01502       return Primary ? X86::R14 : X86::R13;
01503     else
01504       return Primary ? X86::EBX : X86::EDI;
01505   }
01506 
01507   if (Is64Bit) {
01508     if (IsLP64)
01509       return Primary ? X86::R11 : X86::R12;
01510     else
01511       return Primary ? X86::R11D : X86::R12D;
01512   }
01513 
01514   bool IsNested = HasNestArgument(&MF);
01515 
01516   if (CallingConvention == CallingConv::X86_FastCall ||
01517       CallingConvention == CallingConv::Fast) {
01518     if (IsNested)
01519       report_fatal_error("Segmented stacks does not support fastcall with "
01520                          "nested function.");
01521     return Primary ? X86::EAX : X86::ECX;
01522   }
01523   if (IsNested)
01524     return Primary ? X86::EDX : X86::EAX;
01525   return Primary ? X86::ECX : X86::EAX;
01526 }
01527 
01528 // The stack limit in the TCB is set to this many bytes above the actual stack
01529 // limit.
01530 static const uint64_t kSplitStackAvailable = 256;
01531 
01532 void X86FrameLowering::adjustForSegmentedStacks(
01533     MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
01534   MachineFrameInfo *MFI = MF.getFrameInfo();
01535   const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
01536   const TargetInstrInfo &TII = *STI.getInstrInfo();
01537   uint64_t StackSize;
01538   bool Is64Bit = STI.is64Bit();
01539   const bool IsLP64 = STI.isTarget64BitLP64();
01540   unsigned TlsReg, TlsOffset;
01541   DebugLoc DL;
01542 
01543   unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
01544   assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
01545          "Scratch register is live-in");
01546 
01547   if (MF.getFunction()->isVarArg())
01548     report_fatal_error("Segmented stacks do not support vararg functions.");
01549   if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
01550       !STI.isTargetWin64() && !STI.isTargetFreeBSD() &&
01551       !STI.isTargetDragonFly())
01552     report_fatal_error("Segmented stacks not supported on this platform.");
01553 
01554   // Eventually StackSize will be calculated by a link-time pass; which will
01555   // also decide whether checking code needs to be injected into this particular
01556   // prologue.
01557   StackSize = MFI->getStackSize();
01558 
01559   // Do not generate a prologue for functions with a stack of size zero
01560   if (StackSize == 0)
01561     return;
01562 
01563   MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
01564   MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
01565   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
01566   bool IsNested = false;
01567 
01568   // We need to know if the function has a nest argument only in 64 bit mode.
01569   if (Is64Bit)
01570     IsNested = HasNestArgument(&MF);
01571 
01572   // The MOV R10, RAX needs to be in a different block, since the RET we emit in
01573   // allocMBB needs to be last (terminating) instruction.
01574 
01575   for (MachineBasicBlock::livein_iterator i = PrologueMBB.livein_begin(),
01576                                           e = PrologueMBB.livein_end();
01577        i != e; i++) {
01578     allocMBB->addLiveIn(*i);
01579     checkMBB->addLiveIn(*i);
01580   }
01581 
01582   if (IsNested)
01583     allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
01584 
01585   MF.push_front(allocMBB);
01586   MF.push_front(checkMBB);
01587 
01588   // When the frame size is less than 256 we just compare the stack
01589   // boundary directly to the value of the stack pointer, per gcc.
01590   bool CompareStackPointer = StackSize < kSplitStackAvailable;
01591 
01592   // Read the limit off the current stacklet off the stack_guard location.
01593   if (Is64Bit) {
01594     if (STI.isTargetLinux()) {
01595       TlsReg = X86::FS;
01596       TlsOffset = IsLP64 ? 0x70 : 0x40;
01597     } else if (STI.isTargetDarwin()) {
01598       TlsReg = X86::GS;
01599       TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
01600     } else if (STI.isTargetWin64()) {
01601       TlsReg = X86::GS;
01602       TlsOffset = 0x28; // pvArbitrary, reserved for application use
01603     } else if (STI.isTargetFreeBSD()) {
01604       TlsReg = X86::FS;
01605       TlsOffset = 0x18;
01606     } else if (STI.isTargetDragonFly()) {
01607       TlsReg = X86::FS;
01608       TlsOffset = 0x20; // use tls_tcb.tcb_segstack
01609     } else {
01610       report_fatal_error("Segmented stacks not supported on this platform.");
01611     }
01612 
01613     if (CompareStackPointer)
01614       ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
01615     else
01616       BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r), ScratchReg).addReg(X86::RSP)
01617         .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
01618 
01619     BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm)).addReg(ScratchReg)
01620       .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
01621   } else {
01622     if (STI.isTargetLinux()) {
01623       TlsReg = X86::GS;
01624       TlsOffset = 0x30;
01625     } else if (STI.isTargetDarwin()) {
01626       TlsReg = X86::GS;
01627       TlsOffset = 0x48 + 90*4;
01628     } else if (STI.isTargetWin32()) {
01629       TlsReg = X86::FS;
01630       TlsOffset = 0x14; // pvArbitrary, reserved for application use
01631     } else if (STI.isTargetDragonFly()) {
01632       TlsReg = X86::FS;
01633       TlsOffset = 0x10; // use tls_tcb.tcb_segstack
01634     } else if (STI.isTargetFreeBSD()) {
01635       report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
01636     } else {
01637       report_fatal_error("Segmented stacks not supported on this platform.");
01638     }
01639 
01640     if (CompareStackPointer)
01641       ScratchReg = X86::ESP;
01642     else
01643       BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
01644         .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
01645 
01646     if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64() ||
01647         STI.isTargetDragonFly()) {
01648       BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
01649         .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
01650     } else if (STI.isTargetDarwin()) {
01651 
01652       // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
01653       unsigned ScratchReg2;
01654       bool SaveScratch2;
01655       if (CompareStackPointer) {
01656         // The primary scratch register is available for holding the TLS offset.
01657         ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
01658         SaveScratch2 = false;
01659       } else {
01660         // Need to use a second register to hold the TLS offset
01661         ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
01662 
01663         // Unfortunately, with fastcc the second scratch register may hold an
01664         // argument.
01665         SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
01666       }
01667 
01668       // If Scratch2 is live-in then it needs to be saved.
01669       assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
01670              "Scratch register is live-in and not saved");
01671 
01672       if (SaveScratch2)
01673         BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
01674           .addReg(ScratchReg2, RegState::Kill);
01675 
01676       BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
01677         .addImm(TlsOffset);
01678       BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
01679         .addReg(ScratchReg)
01680         .addReg(ScratchReg2).addImm(1).addReg(0)
01681         .addImm(0)
01682         .addReg(TlsReg);
01683 
01684       if (SaveScratch2)
01685         BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
01686     }
01687   }
01688 
01689   // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
01690   // It jumps to normal execution of the function body.
01691   BuildMI(checkMBB, DL, TII.get(X86::JA_1)).addMBB(&PrologueMBB);
01692 
01693   // On 32 bit we first push the arguments size and then the frame size. On 64
01694   // bit, we pass the stack frame size in r10 and the argument size in r11.
01695   if (Is64Bit) {
01696     // Functions with nested arguments use R10, so it needs to be saved across
01697     // the call to _morestack
01698 
01699     const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
01700     const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
01701     const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
01702     const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
01703     const unsigned MOVri = IsLP64 ? X86::MOV64ri : X86::MOV32ri;
01704 
01705     if (IsNested)
01706       BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
01707 
01708     BuildMI(allocMBB, DL, TII.get(MOVri), Reg10)
01709       .addImm(StackSize);
01710     BuildMI(allocMBB, DL, TII.get(MOVri), Reg11)
01711       .addImm(X86FI->getArgumentStackSize());
01712     MF.getRegInfo().setPhysRegUsed(Reg10);
01713     MF.getRegInfo().setPhysRegUsed(Reg11);
01714   } else {
01715     BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
01716       .addImm(X86FI->getArgumentStackSize());
01717     BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
01718       .addImm(StackSize);
01719   }
01720 
01721   // __morestack is in libgcc
01722   if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) {
01723     // Under the large code model, we cannot assume that __morestack lives
01724     // within 2^31 bytes of the call site, so we cannot use pc-relative
01725     // addressing. We cannot perform the call via a temporary register,
01726     // as the rax register may be used to store the static chain, and all
01727     // other suitable registers may be either callee-save or used for
01728     // parameter passing. We cannot use the stack at this point either
01729     // because __morestack manipulates the stack directly.
01730     //
01731     // To avoid these issues, perform an indirect call via a read-only memory
01732     // location containing the address.
01733     //
01734     // This solution is not perfect, as it assumes that the .rodata section
01735     // is laid out within 2^31 bytes of each function body, but this seems
01736     // to be sufficient for JIT.
01737     BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
01738         .addReg(X86::RIP)
01739         .addImm(0)
01740         .addReg(0)
01741         .addExternalSymbol("__morestack_addr")
01742         .addReg(0);
01743     MF.getMMI().setUsesMorestackAddr(true);
01744   } else {
01745     if (Is64Bit)
01746       BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
01747         .addExternalSymbol("__morestack");
01748     else
01749       BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
01750         .addExternalSymbol("__morestack");
01751   }
01752 
01753   if (IsNested)
01754     BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
01755   else
01756     BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
01757 
01758   allocMBB->addSuccessor(&PrologueMBB);
01759 
01760   checkMBB->addSuccessor(allocMBB);
01761   checkMBB->addSuccessor(&PrologueMBB);
01762 
01763 #ifdef XDEBUG
01764   MF.verify();
01765 #endif
01766 }
01767 
01768 /// Erlang programs may need a special prologue to handle the stack size they
01769 /// might need at runtime. That is because Erlang/OTP does not implement a C
01770 /// stack but uses a custom implementation of hybrid stack/heap architecture.
01771 /// (for more information see Eric Stenman's Ph.D. thesis:
01772 /// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
01773 ///
01774 /// CheckStack:
01775 ///       temp0 = sp - MaxStack
01776 ///       if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
01777 /// OldStart:
01778 ///       ...
01779 /// IncStack:
01780 ///       call inc_stack   # doubles the stack space
01781 ///       temp0 = sp - MaxStack
01782 ///       if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
01783 void X86FrameLowering::adjustForHiPEPrologue(
01784     MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
01785   const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
01786   const TargetInstrInfo &TII = *STI.getInstrInfo();
01787   MachineFrameInfo *MFI = MF.getFrameInfo();
01788   const unsigned SlotSize = STI.getRegisterInfo()->getSlotSize();
01789   const bool Is64Bit = STI.is64Bit();
01790   const bool IsLP64 = STI.isTarget64BitLP64();
01791   DebugLoc DL;
01792   // HiPE-specific values
01793   const unsigned HipeLeafWords = 24;
01794   const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
01795   const unsigned Guaranteed = HipeLeafWords * SlotSize;
01796   unsigned CallerStkArity = MF.getFunction()->arg_size() > CCRegisteredArgs ?
01797                             MF.getFunction()->arg_size() - CCRegisteredArgs : 0;
01798   unsigned MaxStack = MFI->getStackSize() + CallerStkArity*SlotSize + SlotSize;
01799 
01800   assert(STI.isTargetLinux() &&
01801          "HiPE prologue is only supported on Linux operating systems.");
01802 
01803   // Compute the largest caller's frame that is needed to fit the callees'
01804   // frames. This 'MaxStack' is computed from:
01805   //
01806   // a) the fixed frame size, which is the space needed for all spilled temps,
01807   // b) outgoing on-stack parameter areas, and
01808   // c) the minimum stack space this function needs to make available for the
01809   //    functions it calls (a tunable ABI property).
01810   if (MFI->hasCalls()) {
01811     unsigned MoreStackForCalls = 0;
01812 
01813     for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end();
01814          MBBI != MBBE; ++MBBI)
01815       for (MachineBasicBlock::iterator MI = MBBI->begin(), ME = MBBI->end();
01816            MI != ME; ++MI) {
01817         if (!MI->isCall())
01818           continue;
01819 
01820         // Get callee operand.
01821         const MachineOperand &MO = MI->getOperand(0);
01822 
01823         // Only take account of global function calls (no closures etc.).
01824         if (!MO.isGlobal())
01825           continue;
01826 
01827         const Function *F = dyn_cast<Function>(MO.getGlobal());
01828         if (!F)
01829           continue;
01830 
01831         // Do not update 'MaxStack' for primitive and built-in functions
01832         // (encoded with names either starting with "erlang."/"bif_" or not
01833         // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
01834         // "_", such as the BIF "suspend_0") as they are executed on another
01835         // stack.
01836         if (F->getName().find("erlang.") != StringRef::npos ||
01837             F->getName().find("bif_") != StringRef::npos ||
01838             F->getName().find_first_of("._") == StringRef::npos)
01839           continue;
01840 
01841         unsigned CalleeStkArity =
01842           F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0;
01843         if (HipeLeafWords - 1 > CalleeStkArity)
01844           MoreStackForCalls = std::max(MoreStackForCalls,
01845                                (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
01846       }
01847     MaxStack += MoreStackForCalls;
01848   }
01849 
01850   // If the stack frame needed is larger than the guaranteed then runtime checks
01851   // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
01852   if (MaxStack > Guaranteed) {
01853     MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
01854     MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
01855 
01856     for (MachineBasicBlock::livein_iterator I = PrologueMBB.livein_begin(),
01857                                             E = PrologueMBB.livein_end();
01858          I != E; I++) {
01859       stackCheckMBB->addLiveIn(*I);
01860       incStackMBB->addLiveIn(*I);
01861     }
01862 
01863     MF.push_front(incStackMBB);
01864     MF.push_front(stackCheckMBB);
01865 
01866     unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
01867     unsigned LEAop, CMPop, CALLop;
01868     if (Is64Bit) {
01869       SPReg = X86::RSP;
01870       PReg  = X86::RBP;
01871       LEAop = X86::LEA64r;
01872       CMPop = X86::CMP64rm;
01873       CALLop = X86::CALL64pcrel32;
01874       SPLimitOffset = 0x90;
01875     } else {
01876       SPReg = X86::ESP;
01877       PReg  = X86::EBP;
01878       LEAop = X86::LEA32r;
01879       CMPop = X86::CMP32rm;
01880       CALLop = X86::CALLpcrel32;
01881       SPLimitOffset = 0x4c;
01882     }
01883 
01884     ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
01885     assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
01886            "HiPE prologue scratch register is live-in");
01887 
01888     // Create new MBB for StackCheck:
01889     addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg),
01890                  SPReg, false, -MaxStack);
01891     // SPLimitOffset is in a fixed heap location (pointed by BP).
01892     addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop))
01893                  .addReg(ScratchReg), PReg, false, SPLimitOffset);
01894     BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_1)).addMBB(&PrologueMBB);
01895 
01896     // Create new MBB for IncStack:
01897     BuildMI(incStackMBB, DL, TII.get(CALLop)).
01898       addExternalSymbol("inc_stack_0");
01899     addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg),
01900                  SPReg, false, -MaxStack);
01901     addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop))
01902                  .addReg(ScratchReg), PReg, false, SPLimitOffset);
01903     BuildMI(incStackMBB, DL, TII.get(X86::JLE_1)).addMBB(incStackMBB);
01904 
01905     stackCheckMBB->addSuccessor(&PrologueMBB, 99);
01906     stackCheckMBB->addSuccessor(incStackMBB, 1);
01907     incStackMBB->addSuccessor(&PrologueMBB, 99);
01908     incStackMBB->addSuccessor(incStackMBB, 1);
01909   }
01910 #ifdef XDEBUG
01911   MF.verify();
01912 #endif
01913 }
01914 
01915 void X86FrameLowering::
01916 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
01917                               MachineBasicBlock::iterator I) const {
01918   const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
01919   const TargetInstrInfo &TII = *STI.getInstrInfo();
01920   const X86RegisterInfo &RegInfo = *STI.getRegisterInfo();
01921   unsigned StackPtr = RegInfo.getStackRegister();
01922   bool reserveCallFrame = hasReservedCallFrame(MF);
01923   unsigned Opcode = I->getOpcode();
01924   bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
01925   bool IsLP64 = STI.isTarget64BitLP64();
01926   DebugLoc DL = I->getDebugLoc();
01927   uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0;
01928   uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0;
01929   I = MBB.erase(I);
01930 
01931   if (!reserveCallFrame) {
01932     // If the stack pointer can be changed after prologue, turn the
01933     // adjcallstackup instruction into a 'sub ESP, <amt>' and the
01934     // adjcallstackdown instruction into 'add ESP, <amt>'
01935     if (Amount == 0)
01936       return;
01937 
01938     // We need to keep the stack aligned properly.  To do this, we round the
01939     // amount of space needed for the outgoing arguments up to the next
01940     // alignment boundary.
01941     unsigned StackAlign = getStackAlignment();
01942     Amount = RoundUpToAlignment(Amount, StackAlign);
01943 
01944     MachineInstr *New = nullptr;
01945 
01946     // Factor out the amount that gets handled inside the sequence
01947     // (Pushes of argument for frame setup, callee pops for frame destroy)
01948     Amount -= InternalAmt;
01949 
01950     if (Amount) {
01951       if (Opcode == TII.getCallFrameSetupOpcode()) {
01952         New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)), StackPtr)
01953           .addReg(StackPtr).addImm(Amount);
01954       } else {
01955         assert(Opcode == TII.getCallFrameDestroyOpcode());
01956 
01957         unsigned Opc = getADDriOpcode(IsLP64, Amount);
01958         New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
01959           .addReg(StackPtr).addImm(Amount);
01960       }
01961     }
01962 
01963     if (New) {
01964       // The EFLAGS implicit def is dead.
01965       New->getOperand(3).setIsDead();
01966 
01967       // Replace the pseudo instruction with a new instruction.
01968       MBB.insert(I, New);
01969     }
01970 
01971     return;
01972   }
01973 
01974   if (Opcode == TII.getCallFrameDestroyOpcode() && InternalAmt) {
01975     // If we are performing frame pointer elimination and if the callee pops
01976     // something off the stack pointer, add it back.  We do this until we have
01977     // more advanced stack pointer tracking ability.
01978     unsigned Opc = getSUBriOpcode(IsLP64, InternalAmt);
01979     MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
01980       .addReg(StackPtr).addImm(InternalAmt);
01981 
01982     // The EFLAGS implicit def is dead.
01983     New->getOperand(3).setIsDead();
01984 
01985     // We are not tracking the stack pointer adjustment by the callee, so make
01986     // sure we restore the stack pointer immediately after the call, there may
01987     // be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
01988     MachineBasicBlock::iterator B = MBB.begin();
01989     while (I != B && !std::prev(I)->isCall())
01990       --I;
01991     MBB.insert(I, New);
01992   }
01993 }
01994 
01995 bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
01996   assert(MBB.getParent() && "Block is not attached to a function!");
01997 
01998   if (canUseLEAForSPInEpilogue(*MBB.getParent()))
01999     return true;
02000 
02001   // If we cannot use LEA to adjust SP, we may need to use ADD, which
02002   // clobbers the EFLAGS. Check that none of the terminators reads the
02003   // EFLAGS, and if one uses it, conservatively assume this is not
02004   // safe to insert the epilogue here.
02005   return !terminatorsNeedFlagsAsInput(MBB);
02006 }