LLVM API Documentation
00001 //===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file contains the X86 implementation of TargetFrameLowering class. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "X86FrameLowering.h" 00015 #include "X86InstrBuilder.h" 00016 #include "X86InstrInfo.h" 00017 #include "X86MachineFunctionInfo.h" 00018 #include "X86Subtarget.h" 00019 #include "X86TargetMachine.h" 00020 #include "llvm/ADT/SmallSet.h" 00021 #include "llvm/CodeGen/MachineFrameInfo.h" 00022 #include "llvm/CodeGen/MachineFunction.h" 00023 #include "llvm/CodeGen/MachineInstrBuilder.h" 00024 #include "llvm/CodeGen/MachineModuleInfo.h" 00025 #include "llvm/CodeGen/MachineRegisterInfo.h" 00026 #include "llvm/IR/DataLayout.h" 00027 #include "llvm/IR/Function.h" 00028 #include "llvm/MC/MCAsmInfo.h" 00029 #include "llvm/MC/MCSymbol.h" 00030 #include "llvm/Support/CommandLine.h" 00031 #include "llvm/Target/TargetOptions.h" 00032 00033 using namespace llvm; 00034 00035 // FIXME: completely move here. 00036 extern cl::opt<bool> ForceStackAlign; 00037 00038 bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { 00039 return !MF.getFrameInfo()->hasVarSizedObjects(); 00040 } 00041 00042 /// hasFP - Return true if the specified function should have a dedicated frame 00043 /// pointer register. This is true if the function has variable sized allocas 00044 /// or if frame pointer elimination is disabled. 00045 bool X86FrameLowering::hasFP(const MachineFunction &MF) const { 00046 const MachineFrameInfo *MFI = MF.getFrameInfo(); 00047 const MachineModuleInfo &MMI = MF.getMMI(); 00048 const TargetRegisterInfo *RegInfo = TM.getRegisterInfo(); 00049 00050 return (MF.getTarget().Options.DisableFramePointerElim(MF) || 00051 RegInfo->needsStackRealignment(MF) || 00052 MFI->hasVarSizedObjects() || 00053 MFI->isFrameAddressTaken() || MF.hasMSInlineAsm() || 00054 MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || 00055 MMI.callsUnwindInit() || MMI.callsEHReturn()); 00056 } 00057 00058 static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) { 00059 if (IsLP64) { 00060 if (isInt<8>(Imm)) 00061 return X86::SUB64ri8; 00062 return X86::SUB64ri32; 00063 } else { 00064 if (isInt<8>(Imm)) 00065 return X86::SUB32ri8; 00066 return X86::SUB32ri; 00067 } 00068 } 00069 00070 static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) { 00071 if (IsLP64) { 00072 if (isInt<8>(Imm)) 00073 return X86::ADD64ri8; 00074 return X86::ADD64ri32; 00075 } else { 00076 if (isInt<8>(Imm)) 00077 return X86::ADD32ri8; 00078 return X86::ADD32ri; 00079 } 00080 } 00081 00082 static unsigned getLEArOpcode(unsigned IsLP64) { 00083 return IsLP64 ? X86::LEA64r : X86::LEA32r; 00084 } 00085 00086 /// findDeadCallerSavedReg - Return a caller-saved register that isn't live 00087 /// when it reaches the "return" instruction. We can then pop a stack object 00088 /// to this register without worry about clobbering it. 00089 static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, 00090 MachineBasicBlock::iterator &MBBI, 00091 const TargetRegisterInfo &TRI, 00092 bool Is64Bit) { 00093 const MachineFunction *MF = MBB.getParent(); 00094 const Function *F = MF->getFunction(); 00095 if (!F || MF->getMMI().callsEHReturn()) 00096 return 0; 00097 00098 static const uint16_t CallerSavedRegs32Bit[] = { 00099 X86::EAX, X86::EDX, X86::ECX, 0 00100 }; 00101 00102 static const uint16_t CallerSavedRegs64Bit[] = { 00103 X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI, 00104 X86::R8, X86::R9, X86::R10, X86::R11, 0 00105 }; 00106 00107 unsigned Opc = MBBI->getOpcode(); 00108 switch (Opc) { 00109 default: return 0; 00110 case X86::RET: 00111 case X86::RETI: 00112 case X86::TCRETURNdi: 00113 case X86::TCRETURNri: 00114 case X86::TCRETURNmi: 00115 case X86::TCRETURNdi64: 00116 case X86::TCRETURNri64: 00117 case X86::TCRETURNmi64: 00118 case X86::EH_RETURN: 00119 case X86::EH_RETURN64: { 00120 SmallSet<uint16_t, 8> Uses; 00121 for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) { 00122 MachineOperand &MO = MBBI->getOperand(i); 00123 if (!MO.isReg() || MO.isDef()) 00124 continue; 00125 unsigned Reg = MO.getReg(); 00126 if (!Reg) 00127 continue; 00128 for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI) 00129 Uses.insert(*AI); 00130 } 00131 00132 const uint16_t *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit; 00133 for (; *CS; ++CS) 00134 if (!Uses.count(*CS)) 00135 return *CS; 00136 } 00137 } 00138 00139 return 0; 00140 } 00141 00142 00143 /// emitSPUpdate - Emit a series of instructions to increment / decrement the 00144 /// stack pointer by a constant value. 00145 static 00146 void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, 00147 unsigned StackPtr, int64_t NumBytes, 00148 bool Is64Bit, bool IsLP64, bool UseLEA, 00149 const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) { 00150 bool isSub = NumBytes < 0; 00151 uint64_t Offset = isSub ? -NumBytes : NumBytes; 00152 unsigned Opc; 00153 if (UseLEA) 00154 Opc = getLEArOpcode(IsLP64); 00155 else 00156 Opc = isSub 00157 ? getSUBriOpcode(IsLP64, Offset) 00158 : getADDriOpcode(IsLP64, Offset); 00159 00160 uint64_t Chunk = (1LL << 31) - 1; 00161 DebugLoc DL = MBB.findDebugLoc(MBBI); 00162 00163 while (Offset) { 00164 uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset; 00165 if (ThisVal == (Is64Bit ? 8 : 4)) { 00166 // Use push / pop instead. 00167 unsigned Reg = isSub 00168 ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX) 00169 : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit); 00170 if (Reg) { 00171 Opc = isSub 00172 ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r) 00173 : (Is64Bit ? X86::POP64r : X86::POP32r); 00174 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc)) 00175 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub)); 00176 if (isSub) 00177 MI->setFlag(MachineInstr::FrameSetup); 00178 Offset -= ThisVal; 00179 continue; 00180 } 00181 } 00182 00183 MachineInstr *MI = NULL; 00184 00185 if (UseLEA) { 00186 MI = addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), 00187 StackPtr, false, isSub ? -ThisVal : ThisVal); 00188 } else { 00189 MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 00190 .addReg(StackPtr) 00191 .addImm(ThisVal); 00192 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 00193 } 00194 00195 if (isSub) 00196 MI->setFlag(MachineInstr::FrameSetup); 00197 00198 Offset -= ThisVal; 00199 } 00200 } 00201 00202 /// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator. 00203 static 00204 void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, 00205 unsigned StackPtr, uint64_t *NumBytes = NULL) { 00206 if (MBBI == MBB.begin()) return; 00207 00208 MachineBasicBlock::iterator PI = prior(MBBI); 00209 unsigned Opc = PI->getOpcode(); 00210 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 00211 Opc == X86::ADD32ri || Opc == X86::ADD32ri8 || 00212 Opc == X86::LEA32r || Opc == X86::LEA64_32r) && 00213 PI->getOperand(0).getReg() == StackPtr) { 00214 if (NumBytes) 00215 *NumBytes += PI->getOperand(2).getImm(); 00216 MBB.erase(PI); 00217 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 00218 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 00219 PI->getOperand(0).getReg() == StackPtr) { 00220 if (NumBytes) 00221 *NumBytes -= PI->getOperand(2).getImm(); 00222 MBB.erase(PI); 00223 } 00224 } 00225 00226 /// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator. 00227 static 00228 void mergeSPUpdatesDown(MachineBasicBlock &MBB, 00229 MachineBasicBlock::iterator &MBBI, 00230 unsigned StackPtr, uint64_t *NumBytes = NULL) { 00231 // FIXME: THIS ISN'T RUN!!! 00232 return; 00233 00234 if (MBBI == MBB.end()) return; 00235 00236 MachineBasicBlock::iterator NI = llvm::next(MBBI); 00237 if (NI == MBB.end()) return; 00238 00239 unsigned Opc = NI->getOpcode(); 00240 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 00241 Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && 00242 NI->getOperand(0).getReg() == StackPtr) { 00243 if (NumBytes) 00244 *NumBytes -= NI->getOperand(2).getImm(); 00245 MBB.erase(NI); 00246 MBBI = NI; 00247 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 00248 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 00249 NI->getOperand(0).getReg() == StackPtr) { 00250 if (NumBytes) 00251 *NumBytes += NI->getOperand(2).getImm(); 00252 MBB.erase(NI); 00253 MBBI = NI; 00254 } 00255 } 00256 00257 /// mergeSPUpdates - Checks the instruction before/after the passed 00258 /// instruction. If it is an ADD/SUB/LEA instruction it is deleted argument and the 00259 /// stack adjustment is returned as a positive value for ADD/LEA and a negative for 00260 /// SUB. 00261 static int mergeSPUpdates(MachineBasicBlock &MBB, 00262 MachineBasicBlock::iterator &MBBI, 00263 unsigned StackPtr, 00264 bool doMergeWithPrevious) { 00265 if ((doMergeWithPrevious && MBBI == MBB.begin()) || 00266 (!doMergeWithPrevious && MBBI == MBB.end())) 00267 return 0; 00268 00269 MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI; 00270 MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI); 00271 unsigned Opc = PI->getOpcode(); 00272 int Offset = 0; 00273 00274 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 00275 Opc == X86::ADD32ri || Opc == X86::ADD32ri8 || 00276 Opc == X86::LEA32r || Opc == X86::LEA64_32r) && 00277 PI->getOperand(0).getReg() == StackPtr){ 00278 Offset += PI->getOperand(2).getImm(); 00279 MBB.erase(PI); 00280 if (!doMergeWithPrevious) MBBI = NI; 00281 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 00282 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 00283 PI->getOperand(0).getReg() == StackPtr) { 00284 Offset -= PI->getOperand(2).getImm(); 00285 MBB.erase(PI); 00286 if (!doMergeWithPrevious) MBBI = NI; 00287 } 00288 00289 return Offset; 00290 } 00291 00292 static bool isEAXLiveIn(MachineFunction &MF) { 00293 for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(), 00294 EE = MF.getRegInfo().livein_end(); II != EE; ++II) { 00295 unsigned Reg = II->first; 00296 00297 if (Reg == X86::EAX || Reg == X86::AX || 00298 Reg == X86::AH || Reg == X86::AL) 00299 return true; 00300 } 00301 00302 return false; 00303 } 00304 00305 void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, 00306 MCSymbol *Label, 00307 unsigned FramePtr) const { 00308 MachineFrameInfo *MFI = MF.getFrameInfo(); 00309 MachineModuleInfo &MMI = MF.getMMI(); 00310 const MCRegisterInfo &MRI = MMI.getContext().getRegisterInfo(); 00311 00312 // Add callee saved registers to move list. 00313 const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); 00314 if (CSI.empty()) return; 00315 00316 const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); 00317 bool HasFP = hasFP(MF); 00318 00319 // Calculate amount of bytes used for return address storing. 00320 int stackGrowth = -RegInfo->getSlotSize(); 00321 00322 // FIXME: This is dirty hack. The code itself is pretty mess right now. 00323 // It should be rewritten from scratch and generalized sometimes. 00324 00325 // Determine maximum offset (minimum due to stack growth). 00326 int64_t MaxOffset = 0; 00327 for (std::vector<CalleeSavedInfo>::const_iterator 00328 I = CSI.begin(), E = CSI.end(); I != E; ++I) 00329 MaxOffset = std::min(MaxOffset, 00330 MFI->getObjectOffset(I->getFrameIdx())); 00331 00332 // Calculate offsets. 00333 int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth; 00334 for (std::vector<CalleeSavedInfo>::const_iterator 00335 I = CSI.begin(), E = CSI.end(); I != E; ++I) { 00336 int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); 00337 unsigned Reg = I->getReg(); 00338 Offset = MaxOffset - Offset + saveAreaOffset; 00339 00340 // Don't output a new machine move if we're re-saving the frame 00341 // pointer. This happens when the PrologEpilogInserter has inserted an extra 00342 // "PUSH" of the frame pointer -- the "emitPrologue" method automatically 00343 // generates one when frame pointers are used. If we generate a "machine 00344 // move" for this extra "PUSH", the linker will lose track of the fact that 00345 // the frame pointer should have the value of the first "PUSH" when it's 00346 // trying to unwind. 00347 // 00348 // FIXME: This looks inelegant. It's possibly correct, but it's covering up 00349 // another bug. I.e., one where we generate a prolog like this: 00350 // 00351 // pushl %ebp 00352 // movl %esp, %ebp 00353 // pushl %ebp 00354 // pushl %esi 00355 // ... 00356 // 00357 // The immediate re-push of EBP is unnecessary. At the least, it's an 00358 // optimization bug. EBP can be used as a scratch register in certain 00359 // cases, but probably not when we have a frame pointer. 00360 if (HasFP && FramePtr == Reg) 00361 continue; 00362 00363 unsigned DwarfReg = MRI.getDwarfRegNum(Reg, true); 00364 MMI.addFrameInst(MCCFIInstruction::createOffset(Label, DwarfReg, Offset)); 00365 } 00366 } 00367 00368 /// getCompactUnwindRegNum - Get the compact unwind number for a given 00369 /// register. The number corresponds to the enum lists in 00370 /// compact_unwind_encoding.h. 00371 static int getCompactUnwindRegNum(unsigned Reg, bool is64Bit) { 00372 static const uint16_t CU32BitRegs[] = { 00373 X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 00374 }; 00375 static const uint16_t CU64BitRegs[] = { 00376 X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 00377 }; 00378 const uint16_t *CURegs = is64Bit ? CU64BitRegs : CU32BitRegs; 00379 for (int Idx = 1; *CURegs; ++CURegs, ++Idx) 00380 if (*CURegs == Reg) 00381 return Idx; 00382 00383 return -1; 00384 } 00385 00386 // Number of registers that can be saved in a compact unwind encoding. 00387 #define CU_NUM_SAVED_REGS 6 00388 00389 /// encodeCompactUnwindRegistersWithoutFrame - Create the permutation encoding 00390 /// used with frameless stacks. It is passed the number of registers to be saved 00391 /// and an array of the registers saved. 00392 static uint32_t 00393 encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS], 00394 unsigned RegCount, bool Is64Bit) { 00395 // The saved registers are numbered from 1 to 6. In order to encode the order 00396 // in which they were saved, we re-number them according to their place in the 00397 // register order. The re-numbering is relative to the last re-numbered 00398 // register. E.g., if we have registers {6, 2, 4, 5} saved in that order: 00399 // 00400 // Orig Re-Num 00401 // ---- ------ 00402 // 6 6 00403 // 2 2 00404 // 4 3 00405 // 5 3 00406 // 00407 for (unsigned i = 0; i != CU_NUM_SAVED_REGS; ++i) { 00408 int CUReg = getCompactUnwindRegNum(SavedRegs[i], Is64Bit); 00409 if (CUReg == -1) return ~0U; 00410 SavedRegs[i] = CUReg; 00411 } 00412 00413 // Reverse the list. 00414 std::swap(SavedRegs[0], SavedRegs[5]); 00415 std::swap(SavedRegs[1], SavedRegs[4]); 00416 std::swap(SavedRegs[2], SavedRegs[3]); 00417 00418 uint32_t RenumRegs[CU_NUM_SAVED_REGS]; 00419 for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i) { 00420 unsigned Countless = 0; 00421 for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j) 00422 if (SavedRegs[j] < SavedRegs[i]) 00423 ++Countless; 00424 00425 RenumRegs[i] = SavedRegs[i] - Countless - 1; 00426 } 00427 00428 // Take the renumbered values and encode them into a 10-bit number. 00429 uint32_t permutationEncoding = 0; 00430 switch (RegCount) { 00431 case 6: 00432 permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] 00433 + 6 * RenumRegs[2] + 2 * RenumRegs[3] 00434 + RenumRegs[4]; 00435 break; 00436 case 5: 00437 permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2] 00438 + 6 * RenumRegs[3] + 2 * RenumRegs[4] 00439 + RenumRegs[5]; 00440 break; 00441 case 4: 00442 permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3] 00443 + 3 * RenumRegs[4] + RenumRegs[5]; 00444 break; 00445 case 3: 00446 permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4] 00447 + RenumRegs[5]; 00448 break; 00449 case 2: 00450 permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5]; 00451 break; 00452 case 1: 00453 permutationEncoding |= RenumRegs[5]; 00454 break; 00455 } 00456 00457 assert((permutationEncoding & 0x3FF) == permutationEncoding && 00458 "Invalid compact register encoding!"); 00459 return permutationEncoding; 00460 } 00461 00462 /// encodeCompactUnwindRegistersWithFrame - Return the registers encoded for a 00463 /// compact encoding with a frame pointer. 00464 static uint32_t 00465 encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS], 00466 bool Is64Bit) { 00467 // Encode the registers in the order they were saved, 3-bits per register. The 00468 // registers are numbered from 1 to CU_NUM_SAVED_REGS. 00469 uint32_t RegEnc = 0; 00470 for (int I = CU_NUM_SAVED_REGS - 1, Idx = 0; I != -1; --I) { 00471 unsigned Reg = SavedRegs[I]; 00472 if (Reg == 0) continue; 00473 00474 int CURegNum = getCompactUnwindRegNum(Reg, Is64Bit); 00475 if (CURegNum == -1) return ~0U; 00476 00477 // Encode the 3-bit register number in order, skipping over 3-bits for each 00478 // register. 00479 RegEnc |= (CURegNum & 0x7) << (Idx++ * 3); 00480 } 00481 00482 assert((RegEnc & 0x3FFFF) == RegEnc && "Invalid compact register encoding!"); 00483 return RegEnc; 00484 } 00485 00486 uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { 00487 const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); 00488 unsigned FramePtr = RegInfo->getFrameRegister(MF); 00489 unsigned StackPtr = RegInfo->getStackRegister(); 00490 00491 bool Is64Bit = STI.is64Bit(); 00492 bool HasFP = hasFP(MF); 00493 00494 unsigned SavedRegs[CU_NUM_SAVED_REGS] = { 0, 0, 0, 0, 0, 0 }; 00495 unsigned SavedRegIdx = 0; 00496 00497 unsigned OffsetSize = (Is64Bit ? 8 : 4); 00498 00499 unsigned PushInstr = (Is64Bit ? X86::PUSH64r : X86::PUSH32r); 00500 unsigned PushInstrSize = 1; 00501 unsigned MoveInstr = (Is64Bit ? X86::MOV64rr : X86::MOV32rr); 00502 unsigned MoveInstrSize = (Is64Bit ? 3 : 2); 00503 unsigned SubtractInstrIdx = (Is64Bit ? 3 : 2); 00504 00505 unsigned StackDivide = (Is64Bit ? 8 : 4); 00506 00507 unsigned InstrOffset = 0; 00508 unsigned StackAdjust = 0; 00509 unsigned StackSize = 0; 00510 00511 MachineBasicBlock &MBB = MF.front(); // Prologue is in entry BB. 00512 bool ExpectEnd = false; 00513 for (MachineBasicBlock::iterator 00514 MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ++MBBI) { 00515 MachineInstr &MI = *MBBI; 00516 unsigned Opc = MI.getOpcode(); 00517 if (Opc == X86::PROLOG_LABEL) continue; 00518 if (!MI.getFlag(MachineInstr::FrameSetup)) break; 00519 00520 // We don't exect any more prolog instructions. 00521 if (ExpectEnd) return CU::UNWIND_MODE_DWARF; 00522 00523 if (Opc == PushInstr) { 00524 // If there are too many saved registers, we cannot use compact encoding. 00525 if (SavedRegIdx >= CU_NUM_SAVED_REGS) return CU::UNWIND_MODE_DWARF; 00526 00527 unsigned Reg = MI.getOperand(0).getReg(); 00528 if (Reg == (Is64Bit ? X86::RAX : X86::EAX)) { 00529 ExpectEnd = true; 00530 continue; 00531 } 00532 00533 SavedRegs[SavedRegIdx++] = MI.getOperand(0).getReg(); 00534 StackAdjust += OffsetSize; 00535 InstrOffset += PushInstrSize; 00536 } else if (Opc == MoveInstr) { 00537 unsigned SrcReg = MI.getOperand(1).getReg(); 00538 unsigned DstReg = MI.getOperand(0).getReg(); 00539 00540 if (DstReg != FramePtr || SrcReg != StackPtr) 00541 return CU::UNWIND_MODE_DWARF; 00542 00543 StackAdjust = 0; 00544 memset(SavedRegs, 0, sizeof(SavedRegs)); 00545 SavedRegIdx = 0; 00546 InstrOffset += MoveInstrSize; 00547 } else if (Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 00548 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) { 00549 if (StackSize) 00550 // We already have a stack size. 00551 return CU::UNWIND_MODE_DWARF; 00552 00553 if (!MI.getOperand(0).isReg() || 00554 MI.getOperand(0).getReg() != MI.getOperand(1).getReg() || 00555 MI.getOperand(0).getReg() != StackPtr || !MI.getOperand(2).isImm()) 00556 // We need this to be a stack adjustment pointer. Something like: 00557 // 00558 // %RSP<def> = SUB64ri8 %RSP, 48 00559 return CU::UNWIND_MODE_DWARF; 00560 00561 StackSize = MI.getOperand(2).getImm() / StackDivide; 00562 SubtractInstrIdx += InstrOffset; 00563 ExpectEnd = true; 00564 } 00565 } 00566 00567 // Encode that we are using EBP/RBP as the frame pointer. 00568 uint32_t CompactUnwindEncoding = 0; 00569 StackAdjust /= StackDivide; 00570 if (HasFP) { 00571 if ((StackAdjust & 0xFF) != StackAdjust) 00572 // Offset was too big for compact encoding. 00573 return CU::UNWIND_MODE_DWARF; 00574 00575 // Get the encoding of the saved registers when we have a frame pointer. 00576 uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(SavedRegs, Is64Bit); 00577 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; 00578 00579 CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME; 00580 CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16; 00581 CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS; 00582 } else { 00583 ++StackAdjust; 00584 uint32_t TotalStackSize = StackAdjust + StackSize; 00585 if ((TotalStackSize & 0xFF) == TotalStackSize) { 00586 // Frameless stack with a small stack size. 00587 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD; 00588 00589 // Encode the stack size. 00590 CompactUnwindEncoding |= (TotalStackSize & 0xFF) << 16; 00591 } else { 00592 if ((StackAdjust & 0x7) != StackAdjust) 00593 // The extra stack adjustments are too big for us to handle. 00594 return CU::UNWIND_MODE_DWARF; 00595 00596 // Frameless stack with an offset too large for us to encode compactly. 00597 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND; 00598 00599 // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP' 00600 // instruction. 00601 CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16; 00602 00603 // Encode any extra stack stack adjustments (done via push instructions). 00604 CompactUnwindEncoding |= (StackAdjust & 0x7) << 13; 00605 } 00606 00607 // Encode the number of registers saved. 00608 CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10; 00609 00610 // Get the encoding of the saved registers when we don't have a frame 00611 // pointer. 00612 uint32_t RegEnc = 00613 encodeCompactUnwindRegistersWithoutFrame(SavedRegs, SavedRegIdx, 00614 Is64Bit); 00615 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; 00616 00617 // Encode the register encoding. 00618 CompactUnwindEncoding |= 00619 RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION; 00620 } 00621 00622 return CompactUnwindEncoding; 00623 } 00624 00625 /// usesTheStack - This function checks if any of the users of EFLAGS 00626 /// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has 00627 /// to use the stack, and if we don't adjust the stack we clobber the first 00628 /// frame index. 00629 /// See X86InstrInfo::copyPhysReg. 00630 static bool usesTheStack(MachineFunction &MF) { 00631 MachineRegisterInfo &MRI = MF.getRegInfo(); 00632 00633 for (MachineRegisterInfo::reg_iterator ri = MRI.reg_begin(X86::EFLAGS), 00634 re = MRI.reg_end(); ri != re; ++ri) 00635 if (ri->isCopy()) 00636 return true; 00637 00638 return false; 00639 } 00640 00641 /// emitPrologue - Push callee-saved registers onto the stack, which 00642 /// automatically adjust the stack pointer. Adjust the stack pointer to allocate 00643 /// space for local variables. Also emit labels used by the exception handler to 00644 /// generate the exception handling frames. 00645 void X86FrameLowering::emitPrologue(MachineFunction &MF) const { 00646 MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. 00647 MachineBasicBlock::iterator MBBI = MBB.begin(); 00648 MachineFrameInfo *MFI = MF.getFrameInfo(); 00649 const Function *Fn = MF.getFunction(); 00650 const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); 00651 const X86InstrInfo &TII = *TM.getInstrInfo(); 00652 MachineModuleInfo &MMI = MF.getMMI(); 00653 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 00654 bool needsFrameMoves = MMI.hasDebugInfo() || 00655 Fn->needsUnwindTableEntry(); 00656 uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. 00657 uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. 00658 bool HasFP = hasFP(MF); 00659 bool Is64Bit = STI.is64Bit(); 00660 bool IsLP64 = STI.isTarget64BitLP64(); 00661 bool IsWin64 = STI.isTargetWin64(); 00662 bool UseLEA = STI.useLeaForSP(); 00663 unsigned StackAlign = getStackAlignment(); 00664 unsigned SlotSize = RegInfo->getSlotSize(); 00665 unsigned FramePtr = RegInfo->getFrameRegister(MF); 00666 unsigned StackPtr = RegInfo->getStackRegister(); 00667 unsigned BasePtr = RegInfo->getBaseRegister(); 00668 DebugLoc DL; 00669 00670 // If we're forcing a stack realignment we can't rely on just the frame 00671 // info, we need to know the ABI stack alignment as well in case we 00672 // have a call out. Otherwise just make sure we have some alignment - we'll 00673 // go with the minimum SlotSize. 00674 if (ForceStackAlign) { 00675 if (MFI->hasCalls()) 00676 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; 00677 else if (MaxAlign < SlotSize) 00678 MaxAlign = SlotSize; 00679 } 00680 00681 // Add RETADDR move area to callee saved frame size. 00682 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 00683 if (TailCallReturnAddrDelta < 0) 00684 X86FI->setCalleeSavedFrameSize( 00685 X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); 00686 00687 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf 00688 // function, and use up to 128 bytes of stack space, don't have a frame 00689 // pointer, calls, or dynamic alloca then we do not need to adjust the 00690 // stack pointer (we fit in the Red Zone). We also check that we don't 00691 // push and pop from the stack. 00692 if (Is64Bit && !Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex, 00693 Attribute::NoRedZone) && 00694 !RegInfo->needsStackRealignment(MF) && 00695 !MFI->hasVarSizedObjects() && // No dynamic alloca. 00696 !MFI->adjustsStack() && // No calls. 00697 !IsWin64 && // Win64 has no Red Zone 00698 !usesTheStack(MF) && // Don't push and pop. 00699 !MF.getTarget().Options.EnableSegmentedStacks) { // Regular stack 00700 uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); 00701 if (HasFP) MinSize += SlotSize; 00702 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); 00703 MFI->setStackSize(StackSize); 00704 } 00705 00706 // Insert stack pointer adjustment for later moving of return addr. Only 00707 // applies to tail call optimized functions where the callee argument stack 00708 // size is bigger than the callers. 00709 if (TailCallReturnAddrDelta < 0) { 00710 MachineInstr *MI = 00711 BuildMI(MBB, MBBI, DL, 00712 TII.get(getSUBriOpcode(IsLP64, -TailCallReturnAddrDelta)), 00713 StackPtr) 00714 .addReg(StackPtr) 00715 .addImm(-TailCallReturnAddrDelta) 00716 .setMIFlag(MachineInstr::FrameSetup); 00717 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 00718 } 00719 00720 // Mapping for machine moves: 00721 // 00722 // DST: VirtualFP AND 00723 // SRC: VirtualFP => DW_CFA_def_cfa_offset 00724 // ELSE => DW_CFA_def_cfa 00725 // 00726 // SRC: VirtualFP AND 00727 // DST: Register => DW_CFA_def_cfa_register 00728 // 00729 // ELSE 00730 // OFFSET < 0 => DW_CFA_offset_extended_sf 00731 // REG < 64 => DW_CFA_offset + Reg 00732 // ELSE => DW_CFA_offset_extended 00733 00734 uint64_t NumBytes = 0; 00735 int stackGrowth = -SlotSize; 00736 00737 if (HasFP) { 00738 // Calculate required stack adjustment. 00739 uint64_t FrameSize = StackSize - SlotSize; 00740 if (RegInfo->needsStackRealignment(MF)) { 00741 // Callee-saved registers are pushed on stack before the stack 00742 // is realigned. 00743 FrameSize -= X86FI->getCalleeSavedFrameSize(); 00744 NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign; 00745 } else { 00746 NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); 00747 } 00748 00749 // Get the offset of the stack slot for the EBP register, which is 00750 // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. 00751 // Update the frame offset adjustment. 00752 MFI->setOffsetAdjustment(-NumBytes); 00753 00754 // Save EBP/RBP into the appropriate stack slot. 00755 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) 00756 .addReg(FramePtr, RegState::Kill) 00757 .setMIFlag(MachineInstr::FrameSetup); 00758 00759 if (needsFrameMoves) { 00760 // Mark the place where EBP/RBP was saved. 00761 MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); 00762 BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)) 00763 .addSym(FrameLabel); 00764 00765 // Define the current CFA rule to use the provided offset. 00766 assert(StackSize); 00767 MMI.addFrameInst( 00768 MCCFIInstruction::createDefCfaOffset(FrameLabel, 2 * stackGrowth)); 00769 00770 // Change the rule for the FramePtr to be an "offset" rule. 00771 unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true); 00772 MMI.addFrameInst(MCCFIInstruction::createOffset(FrameLabel, DwarfFramePtr, 00773 2 * stackGrowth)); 00774 } 00775 00776 // Update EBP with the new base value. 00777 BuildMI(MBB, MBBI, DL, 00778 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr) 00779 .addReg(StackPtr) 00780 .setMIFlag(MachineInstr::FrameSetup); 00781 00782 if (needsFrameMoves) { 00783 // Mark effective beginning of when frame pointer becomes valid. 00784 MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); 00785 BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)) 00786 .addSym(FrameLabel); 00787 00788 // Define the current CFA to use the EBP/RBP register. 00789 unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true); 00790 MMI.addFrameInst( 00791 MCCFIInstruction::createDefCfaRegister(FrameLabel, DwarfFramePtr)); 00792 } 00793 00794 // Mark the FramePtr as live-in in every block except the entry. 00795 for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); 00796 I != E; ++I) 00797 I->addLiveIn(FramePtr); 00798 } else { 00799 NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); 00800 } 00801 00802 // Skip the callee-saved push instructions. 00803 bool PushedRegs = false; 00804 int StackOffset = 2 * stackGrowth; 00805 00806 while (MBBI != MBB.end() && 00807 (MBBI->getOpcode() == X86::PUSH32r || 00808 MBBI->getOpcode() == X86::PUSH64r)) { 00809 PushedRegs = true; 00810 MBBI->setFlag(MachineInstr::FrameSetup); 00811 ++MBBI; 00812 00813 if (!HasFP && needsFrameMoves) { 00814 // Mark callee-saved push instruction. 00815 MCSymbol *Label = MMI.getContext().CreateTempSymbol(); 00816 BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); 00817 00818 // Define the current CFA rule to use the provided offset. 00819 assert(StackSize); 00820 MMI.addFrameInst( 00821 MCCFIInstruction::createDefCfaOffset(Label, StackOffset)); 00822 StackOffset += stackGrowth; 00823 } 00824 } 00825 00826 // Realign stack after we pushed callee-saved registers (so that we'll be 00827 // able to calculate their offsets from the frame pointer). 00828 00829 // NOTE: We push the registers before realigning the stack, so 00830 // vector callee-saved (xmm) registers may be saved w/o proper 00831 // alignment in this way. However, currently these regs are saved in 00832 // stack slots (see X86FrameLowering::spillCalleeSavedRegisters()), so 00833 // this shouldn't be a problem. 00834 if (RegInfo->needsStackRealignment(MF)) { 00835 assert(HasFP && "There should be a frame pointer if stack is realigned."); 00836 MachineInstr *MI = 00837 BuildMI(MBB, MBBI, DL, 00838 TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr) 00839 .addReg(StackPtr) 00840 .addImm(-MaxAlign) 00841 .setMIFlag(MachineInstr::FrameSetup); 00842 00843 // The EFLAGS implicit def is dead. 00844 MI->getOperand(3).setIsDead(); 00845 } 00846 00847 // If there is an SUB32ri of ESP immediately before this instruction, merge 00848 // the two. This can be the case when tail call elimination is enabled and 00849 // the callee has more arguments then the caller. 00850 NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); 00851 00852 // If there is an ADD32ri or SUB32ri of ESP immediately after this 00853 // instruction, merge the two instructions. 00854 mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes); 00855 00856 // Adjust stack pointer: ESP -= numbytes. 00857 00858 // Windows and cygwin/mingw require a prologue helper routine when allocating 00859 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw 00860 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the 00861 // stack and adjust the stack pointer in one go. The 64-bit version of 00862 // __chkstk is only responsible for probing the stack. The 64-bit prologue is 00863 // responsible for adjusting the stack pointer. Touching the stack at 4K 00864 // increments is necessary to ensure that the guard pages used by the OS 00865 // virtual memory manager are allocated in correct sequence. 00866 if (NumBytes >= 4096 && STI.isTargetCOFF() && !STI.isTargetEnvMacho()) { 00867 const char *StackProbeSymbol; 00868 bool isSPUpdateNeeded = false; 00869 00870 if (Is64Bit) { 00871 if (STI.isTargetCygMing()) 00872 StackProbeSymbol = "___chkstk"; 00873 else { 00874 StackProbeSymbol = "__chkstk"; 00875 isSPUpdateNeeded = true; 00876 } 00877 } else if (STI.isTargetCygMing()) 00878 StackProbeSymbol = "_alloca"; 00879 else 00880 StackProbeSymbol = "_chkstk"; 00881 00882 // Check whether EAX is livein for this function. 00883 bool isEAXAlive = isEAXLiveIn(MF); 00884 00885 if (isEAXAlive) { 00886 // Sanity check that EAX is not livein for this function. 00887 // It should not be, so throw an assert. 00888 assert(!Is64Bit && "EAX is livein in x64 case!"); 00889 00890 // Save EAX 00891 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) 00892 .addReg(X86::EAX, RegState::Kill) 00893 .setMIFlag(MachineInstr::FrameSetup); 00894 } 00895 00896 if (Is64Bit) { 00897 // Handle the 64-bit Windows ABI case where we need to call __chkstk. 00898 // Function prologue is responsible for adjusting the stack pointer. 00899 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX) 00900 .addImm(NumBytes) 00901 .setMIFlag(MachineInstr::FrameSetup); 00902 } else { 00903 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. 00904 // We'll also use 4 already allocated bytes for EAX. 00905 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) 00906 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes) 00907 .setMIFlag(MachineInstr::FrameSetup); 00908 } 00909 00910 BuildMI(MBB, MBBI, DL, 00911 TII.get(Is64Bit ? X86::W64ALLOCA : X86::CALLpcrel32)) 00912 .addExternalSymbol(StackProbeSymbol) 00913 .addReg(StackPtr, RegState::Define | RegState::Implicit) 00914 .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit) 00915 .setMIFlag(MachineInstr::FrameSetup); 00916 00917 // MSVC x64's __chkstk needs to adjust %rsp. 00918 // FIXME: %rax preserves the offset and should be available. 00919 if (isSPUpdateNeeded) 00920 emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64, 00921 UseLEA, TII, *RegInfo); 00922 00923 if (isEAXAlive) { 00924 // Restore EAX 00925 MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), 00926 X86::EAX), 00927 StackPtr, false, NumBytes - 4); 00928 MI->setFlag(MachineInstr::FrameSetup); 00929 MBB.insert(MBBI, MI); 00930 } 00931 } else if (NumBytes) 00932 emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64, 00933 UseLEA, TII, *RegInfo); 00934 00935 // If we need a base pointer, set it up here. It's whatever the value 00936 // of the stack pointer is at this point. Any variable size objects 00937 // will be allocated after this, so we can still use the base pointer 00938 // to reference locals. 00939 if (RegInfo->hasBasePointer(MF)) { 00940 // Update the frame pointer with the current stack pointer. 00941 unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr; 00942 BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) 00943 .addReg(StackPtr) 00944 .setMIFlag(MachineInstr::FrameSetup); 00945 } 00946 00947 if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) { 00948 // Mark end of stack pointer adjustment. 00949 MCSymbol *Label = MMI.getContext().CreateTempSymbol(); 00950 BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)) 00951 .addSym(Label); 00952 00953 if (!HasFP && NumBytes) { 00954 // Define the current CFA rule to use the provided offset. 00955 assert(StackSize); 00956 MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset( 00957 Label, -StackSize + stackGrowth)); 00958 } 00959 00960 // Emit DWARF info specifying the offsets of the callee-saved registers. 00961 if (PushedRegs) 00962 emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr); 00963 } 00964 00965 // Darwin 10.7 and greater has support for compact unwind encoding. 00966 if (STI.getTargetTriple().isMacOSX() && 00967 !STI.getTargetTriple().isMacOSXVersionLT(10, 7)) 00968 MMI.setCompactUnwindEncoding(getCompactUnwindEncoding(MF)); 00969 } 00970 00971 void X86FrameLowering::emitEpilogue(MachineFunction &MF, 00972 MachineBasicBlock &MBB) const { 00973 const MachineFrameInfo *MFI = MF.getFrameInfo(); 00974 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 00975 const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); 00976 const X86InstrInfo &TII = *TM.getInstrInfo(); 00977 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 00978 assert(MBBI != MBB.end() && "Returning block has no instructions"); 00979 unsigned RetOpcode = MBBI->getOpcode(); 00980 DebugLoc DL = MBBI->getDebugLoc(); 00981 bool Is64Bit = STI.is64Bit(); 00982 bool IsLP64 = STI.isTarget64BitLP64(); 00983 bool UseLEA = STI.useLeaForSP(); 00984 unsigned StackAlign = getStackAlignment(); 00985 unsigned SlotSize = RegInfo->getSlotSize(); 00986 unsigned FramePtr = RegInfo->getFrameRegister(MF); 00987 unsigned StackPtr = RegInfo->getStackRegister(); 00988 00989 switch (RetOpcode) { 00990 default: 00991 llvm_unreachable("Can only insert epilog into returning blocks"); 00992 case X86::RET: 00993 case X86::RETI: 00994 case X86::TCRETURNdi: 00995 case X86::TCRETURNri: 00996 case X86::TCRETURNmi: 00997 case X86::TCRETURNdi64: 00998 case X86::TCRETURNri64: 00999 case X86::TCRETURNmi64: 01000 case X86::EH_RETURN: 01001 case X86::EH_RETURN64: 01002 break; // These are ok 01003 } 01004 01005 // Get the number of bytes to allocate from the FrameInfo. 01006 uint64_t StackSize = MFI->getStackSize(); 01007 uint64_t MaxAlign = MFI->getMaxAlignment(); 01008 unsigned CSSize = X86FI->getCalleeSavedFrameSize(); 01009 uint64_t NumBytes = 0; 01010 01011 // If we're forcing a stack realignment we can't rely on just the frame 01012 // info, we need to know the ABI stack alignment as well in case we 01013 // have a call out. Otherwise just make sure we have some alignment - we'll 01014 // go with the minimum. 01015 if (ForceStackAlign) { 01016 if (MFI->hasCalls()) 01017 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; 01018 else 01019 MaxAlign = MaxAlign ? MaxAlign : 4; 01020 } 01021 01022 if (hasFP(MF)) { 01023 // Calculate required stack adjustment. 01024 uint64_t FrameSize = StackSize - SlotSize; 01025 if (RegInfo->needsStackRealignment(MF)) { 01026 // Callee-saved registers were pushed on stack before the stack 01027 // was realigned. 01028 FrameSize -= CSSize; 01029 NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign; 01030 } else { 01031 NumBytes = FrameSize - CSSize; 01032 } 01033 01034 // Pop EBP. 01035 BuildMI(MBB, MBBI, DL, 01036 TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr); 01037 } else { 01038 NumBytes = StackSize - CSSize; 01039 } 01040 01041 // Skip the callee-saved pop instructions. 01042 while (MBBI != MBB.begin()) { 01043 MachineBasicBlock::iterator PI = prior(MBBI); 01044 unsigned Opc = PI->getOpcode(); 01045 01046 if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE && 01047 !PI->isTerminator()) 01048 break; 01049 01050 --MBBI; 01051 } 01052 MachineBasicBlock::iterator FirstCSPop = MBBI; 01053 01054 DL = MBBI->getDebugLoc(); 01055 01056 // If there is an ADD32ri or SUB32ri of ESP immediately before this 01057 // instruction, merge the two instructions. 01058 if (NumBytes || MFI->hasVarSizedObjects()) 01059 mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); 01060 01061 // If dynamic alloca is used, then reset esp to point to the last callee-saved 01062 // slot before popping them off! Same applies for the case, when stack was 01063 // realigned. 01064 if (RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) { 01065 if (RegInfo->needsStackRealignment(MF)) 01066 MBBI = FirstCSPop; 01067 if (CSSize != 0) { 01068 unsigned Opc = getLEArOpcode(IsLP64); 01069 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), 01070 FramePtr, false, -CSSize); 01071 } else { 01072 unsigned Opc = (Is64Bit ? X86::MOV64rr : X86::MOV32rr); 01073 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 01074 .addReg(FramePtr); 01075 } 01076 } else if (NumBytes) { 01077 // Adjust stack pointer back: ESP += numbytes. 01078 emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, IsLP64, UseLEA, 01079 TII, *RegInfo); 01080 } 01081 01082 // We're returning from function via eh_return. 01083 if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) { 01084 MBBI = MBB.getLastNonDebugInstr(); 01085 MachineOperand &DestAddr = MBBI->getOperand(0); 01086 assert(DestAddr.isReg() && "Offset should be in register!"); 01087 BuildMI(MBB, MBBI, DL, 01088 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), 01089 StackPtr).addReg(DestAddr.getReg()); 01090 } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi || 01091 RetOpcode == X86::TCRETURNmi || 01092 RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 || 01093 RetOpcode == X86::TCRETURNmi64) { 01094 bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64; 01095 // Tail call return: adjust the stack pointer and jump to callee. 01096 MBBI = MBB.getLastNonDebugInstr(); 01097 MachineOperand &JumpTarget = MBBI->getOperand(0); 01098 MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1); 01099 assert(StackAdjust.isImm() && "Expecting immediate value."); 01100 01101 // Adjust stack pointer. 01102 int StackAdj = StackAdjust.getImm(); 01103 int MaxTCDelta = X86FI->getTCReturnAddrDelta(); 01104 int Offset = 0; 01105 assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); 01106 01107 // Incoporate the retaddr area. 01108 Offset = StackAdj-MaxTCDelta; 01109 assert(Offset >= 0 && "Offset should never be negative"); 01110 01111 if (Offset) { 01112 // Check for possible merge with preceding ADD instruction. 01113 Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true); 01114 emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, IsLP64, 01115 UseLEA, TII, *RegInfo); 01116 } 01117 01118 // Jump to label or value in register. 01119 if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) { 01120 MachineInstrBuilder MIB = 01121 BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi) 01122 ? X86::TAILJMPd : X86::TAILJMPd64)); 01123 if (JumpTarget.isGlobal()) 01124 MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), 01125 JumpTarget.getTargetFlags()); 01126 else { 01127 assert(JumpTarget.isSymbol()); 01128 MIB.addExternalSymbol(JumpTarget.getSymbolName(), 01129 JumpTarget.getTargetFlags()); 01130 } 01131 } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) { 01132 MachineInstrBuilder MIB = 01133 BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi) 01134 ? X86::TAILJMPm : X86::TAILJMPm64)); 01135 for (unsigned i = 0; i != 5; ++i) 01136 MIB.addOperand(MBBI->getOperand(i)); 01137 } else if (RetOpcode == X86::TCRETURNri64) { 01138 BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)). 01139 addReg(JumpTarget.getReg(), RegState::Kill); 01140 } else { 01141 BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)). 01142 addReg(JumpTarget.getReg(), RegState::Kill); 01143 } 01144 01145 MachineInstr *NewMI = prior(MBBI); 01146 NewMI->copyImplicitOps(MF, MBBI); 01147 01148 // Delete the pseudo instruction TCRETURN. 01149 MBB.erase(MBBI); 01150 } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) && 01151 (X86FI->getTCReturnAddrDelta() < 0)) { 01152 // Add the return addr area delta back since we are not tail calling. 01153 int delta = -1*X86FI->getTCReturnAddrDelta(); 01154 MBBI = MBB.getLastNonDebugInstr(); 01155 01156 // Check for possible merge with preceding ADD instruction. 01157 delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); 01158 emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, IsLP64, UseLEA, TII, 01159 *RegInfo); 01160 } 01161 } 01162 01163 int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const { 01164 const X86RegisterInfo *RegInfo = 01165 static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo()); 01166 const MachineFrameInfo *MFI = MF.getFrameInfo(); 01167 int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea(); 01168 uint64_t StackSize = MFI->getStackSize(); 01169 01170 if (RegInfo->hasBasePointer(MF)) { 01171 assert (hasFP(MF) && "VLAs and dynamic stack realign, but no FP?!"); 01172 if (FI < 0) { 01173 // Skip the saved EBP. 01174 return Offset + RegInfo->getSlotSize(); 01175 } else { 01176 assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); 01177 return Offset + StackSize; 01178 } 01179 } else if (RegInfo->needsStackRealignment(MF)) { 01180 if (FI < 0) { 01181 // Skip the saved EBP. 01182 return Offset + RegInfo->getSlotSize(); 01183 } else { 01184 assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); 01185 return Offset + StackSize; 01186 } 01187 // FIXME: Support tail calls 01188 } else { 01189 if (!hasFP(MF)) 01190 return Offset + StackSize; 01191 01192 // Skip the saved EBP. 01193 Offset += RegInfo->getSlotSize(); 01194 01195 // Skip the RETADDR move area 01196 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 01197 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 01198 if (TailCallReturnAddrDelta < 0) 01199 Offset -= TailCallReturnAddrDelta; 01200 } 01201 01202 return Offset; 01203 } 01204 01205 int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, 01206 unsigned &FrameReg) const { 01207 const X86RegisterInfo *RegInfo = 01208 static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo()); 01209 // We can't calculate offset from frame pointer if the stack is realigned, 01210 // so enforce usage of stack/base pointer. The base pointer is used when we 01211 // have dynamic allocas in addition to dynamic realignment. 01212 if (RegInfo->hasBasePointer(MF)) 01213 FrameReg = RegInfo->getBaseRegister(); 01214 else if (RegInfo->needsStackRealignment(MF)) 01215 FrameReg = RegInfo->getStackRegister(); 01216 else 01217 FrameReg = RegInfo->getFrameRegister(MF); 01218 return getFrameIndexOffset(MF, FI); 01219 } 01220 01221 bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, 01222 MachineBasicBlock::iterator MI, 01223 const std::vector<CalleeSavedInfo> &CSI, 01224 const TargetRegisterInfo *TRI) const { 01225 if (CSI.empty()) 01226 return false; 01227 01228 DebugLoc DL = MBB.findDebugLoc(MI); 01229 01230 MachineFunction &MF = *MBB.getParent(); 01231 01232 unsigned SlotSize = STI.is64Bit() ? 8 : 4; 01233 unsigned FPReg = TRI->getFrameRegister(MF); 01234 unsigned CalleeFrameSize = 0; 01235 01236 const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); 01237 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 01238 01239 // Push GPRs. It increases frame size. 01240 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; 01241 for (unsigned i = CSI.size(); i != 0; --i) { 01242 unsigned Reg = CSI[i-1].getReg(); 01243 if (!X86::GR64RegClass.contains(Reg) && 01244 !X86::GR32RegClass.contains(Reg)) 01245 continue; 01246 // Add the callee-saved register as live-in. It's killed at the spill. 01247 MBB.addLiveIn(Reg); 01248 if (Reg == FPReg) 01249 // X86RegisterInfo::emitPrologue will handle spilling of frame register. 01250 continue; 01251 CalleeFrameSize += SlotSize; 01252 BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill) 01253 .setMIFlag(MachineInstr::FrameSetup); 01254 } 01255 01256 X86FI->setCalleeSavedFrameSize(CalleeFrameSize); 01257 01258 // Make XMM regs spilled. X86 does not have ability of push/pop XMM. 01259 // It can be done by spilling XMMs to stack frame. 01260 // Note that only Win64 ABI might spill XMMs. 01261 for (unsigned i = CSI.size(); i != 0; --i) { 01262 unsigned Reg = CSI[i-1].getReg(); 01263 if (X86::GR64RegClass.contains(Reg) || 01264 X86::GR32RegClass.contains(Reg)) 01265 continue; 01266 // Add the callee-saved register as live-in. It's killed at the spill. 01267 MBB.addLiveIn(Reg); 01268 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 01269 TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), 01270 RC, TRI); 01271 } 01272 01273 return true; 01274 } 01275 01276 bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 01277 MachineBasicBlock::iterator MI, 01278 const std::vector<CalleeSavedInfo> &CSI, 01279 const TargetRegisterInfo *TRI) const { 01280 if (CSI.empty()) 01281 return false; 01282 01283 DebugLoc DL = MBB.findDebugLoc(MI); 01284 01285 MachineFunction &MF = *MBB.getParent(); 01286 const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); 01287 01288 // Reload XMMs from stack frame. 01289 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 01290 unsigned Reg = CSI[i].getReg(); 01291 if (X86::GR64RegClass.contains(Reg) || 01292 X86::GR32RegClass.contains(Reg)) 01293 continue; 01294 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 01295 TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), 01296 RC, TRI); 01297 } 01298 01299 // POP GPRs. 01300 unsigned FPReg = TRI->getFrameRegister(MF); 01301 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; 01302 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 01303 unsigned Reg = CSI[i].getReg(); 01304 if (!X86::GR64RegClass.contains(Reg) && 01305 !X86::GR32RegClass.contains(Reg)) 01306 continue; 01307 if (Reg == FPReg) 01308 // X86RegisterInfo::emitEpilogue will handle restoring of frame register. 01309 continue; 01310 BuildMI(MBB, MI, DL, TII.get(Opc), Reg); 01311 } 01312 return true; 01313 } 01314 01315 void 01316 X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, 01317 RegScavenger *RS) const { 01318 MachineFrameInfo *MFI = MF.getFrameInfo(); 01319 const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); 01320 unsigned SlotSize = RegInfo->getSlotSize(); 01321 01322 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 01323 int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 01324 01325 if (TailCallReturnAddrDelta < 0) { 01326 // create RETURNADDR area 01327 // arg 01328 // arg 01329 // RETADDR 01330 // { ... 01331 // RETADDR area 01332 // ... 01333 // } 01334 // [EBP] 01335 MFI->CreateFixedObject(-TailCallReturnAddrDelta, 01336 (-1U*SlotSize)+TailCallReturnAddrDelta, true); 01337 } 01338 01339 if (hasFP(MF)) { 01340 assert((TailCallReturnAddrDelta <= 0) && 01341 "The Delta should always be zero or negative"); 01342 const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering(); 01343 01344 // Create a frame entry for the EBP register that must be saved. 01345 int FrameIdx = MFI->CreateFixedObject(SlotSize, 01346 -(int)SlotSize + 01347 TFI.getOffsetOfLocalArea() + 01348 TailCallReturnAddrDelta, 01349 true); 01350 assert(FrameIdx == MFI->getObjectIndexBegin() && 01351 "Slot for EBP register must be last in order to be found!"); 01352 (void)FrameIdx; 01353 } 01354 01355 // Spill the BasePtr if it's used. 01356 if (RegInfo->hasBasePointer(MF)) 01357 MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister()); 01358 } 01359 01360 static bool 01361 HasNestArgument(const MachineFunction *MF) { 01362 const Function *F = MF->getFunction(); 01363 for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); 01364 I != E; I++) { 01365 if (I->hasNestAttr()) 01366 return true; 01367 } 01368 return false; 01369 } 01370 01371 /// GetScratchRegister - Get a temp register for performing work in the 01372 /// segmented stack and the Erlang/HiPE stack prologue. Depending on platform 01373 /// and the properties of the function either one or two registers will be 01374 /// needed. Set primary to true for the first register, false for the second. 01375 static unsigned 01376 GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) { 01377 CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv(); 01378 01379 // Erlang stuff. 01380 if (CallingConvention == CallingConv::HiPE) { 01381 if (Is64Bit) 01382 return Primary ? X86::R14 : X86::R13; 01383 else 01384 return Primary ? X86::EBX : X86::EDI; 01385 } 01386 01387 if (Is64Bit) 01388 return Primary ? X86::R11 : X86::R12; 01389 01390 bool IsNested = HasNestArgument(&MF); 01391 01392 if (CallingConvention == CallingConv::X86_FastCall || 01393 CallingConvention == CallingConv::Fast) { 01394 if (IsNested) 01395 report_fatal_error("Segmented stacks does not support fastcall with " 01396 "nested function."); 01397 return Primary ? X86::EAX : X86::ECX; 01398 } 01399 if (IsNested) 01400 return Primary ? X86::EDX : X86::EAX; 01401 return Primary ? X86::ECX : X86::EAX; 01402 } 01403 01404 // The stack limit in the TCB is set to this many bytes above the actual stack 01405 // limit. 01406 static const uint64_t kSplitStackAvailable = 256; 01407 01408 void 01409 X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { 01410 MachineBasicBlock &prologueMBB = MF.front(); 01411 MachineFrameInfo *MFI = MF.getFrameInfo(); 01412 const X86InstrInfo &TII = *TM.getInstrInfo(); 01413 uint64_t StackSize; 01414 bool Is64Bit = STI.is64Bit(); 01415 unsigned TlsReg, TlsOffset; 01416 DebugLoc DL; 01417 01418 unsigned ScratchReg = GetScratchRegister(Is64Bit, MF, true); 01419 assert(!MF.getRegInfo().isLiveIn(ScratchReg) && 01420 "Scratch register is live-in"); 01421 01422 if (MF.getFunction()->isVarArg()) 01423 report_fatal_error("Segmented stacks do not support vararg functions."); 01424 if (!STI.isTargetLinux() && !STI.isTargetDarwin() && 01425 !STI.isTargetWin32() && !STI.isTargetFreeBSD()) 01426 report_fatal_error("Segmented stacks not supported on this platform."); 01427 01428 MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); 01429 MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock(); 01430 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 01431 bool IsNested = false; 01432 01433 // We need to know if the function has a nest argument only in 64 bit mode. 01434 if (Is64Bit) 01435 IsNested = HasNestArgument(&MF); 01436 01437 // The MOV R10, RAX needs to be in a different block, since the RET we emit in 01438 // allocMBB needs to be last (terminating) instruction. 01439 01440 for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(), 01441 e = prologueMBB.livein_end(); i != e; i++) { 01442 allocMBB->addLiveIn(*i); 01443 checkMBB->addLiveIn(*i); 01444 } 01445 01446 if (IsNested) 01447 allocMBB->addLiveIn(X86::R10); 01448 01449 MF.push_front(allocMBB); 01450 MF.push_front(checkMBB); 01451 01452 // Eventually StackSize will be calculated by a link-time pass; which will 01453 // also decide whether checking code needs to be injected into this particular 01454 // prologue. 01455 StackSize = MFI->getStackSize(); 01456 01457 // When the frame size is less than 256 we just compare the stack 01458 // boundary directly to the value of the stack pointer, per gcc. 01459 bool CompareStackPointer = StackSize < kSplitStackAvailable; 01460 01461 // Read the limit off the current stacklet off the stack_guard location. 01462 if (Is64Bit) { 01463 if (STI.isTargetLinux()) { 01464 TlsReg = X86::FS; 01465 TlsOffset = 0x70; 01466 } else if (STI.isTargetDarwin()) { 01467 TlsReg = X86::GS; 01468 TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90. 01469 } else if (STI.isTargetFreeBSD()) { 01470 TlsReg = X86::FS; 01471 TlsOffset = 0x18; 01472 } else { 01473 report_fatal_error("Segmented stacks not supported on this platform."); 01474 } 01475 01476 if (CompareStackPointer) 01477 ScratchReg = X86::RSP; 01478 else 01479 BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP) 01480 .addImm(1).addReg(0).addImm(-StackSize).addReg(0); 01481 01482 BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg) 01483 .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg); 01484 } else { 01485 if (STI.isTargetLinux()) { 01486 TlsReg = X86::GS; 01487 TlsOffset = 0x30; 01488 } else if (STI.isTargetDarwin()) { 01489 TlsReg = X86::GS; 01490 TlsOffset = 0x48 + 90*4; 01491 } else if (STI.isTargetWin32()) { 01492 TlsReg = X86::FS; 01493 TlsOffset = 0x14; // pvArbitrary, reserved for application use 01494 } else if (STI.isTargetFreeBSD()) { 01495 report_fatal_error("Segmented stacks not supported on FreeBSD i386."); 01496 } else { 01497 report_fatal_error("Segmented stacks not supported on this platform."); 01498 } 01499 01500 if (CompareStackPointer) 01501 ScratchReg = X86::ESP; 01502 else 01503 BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP) 01504 .addImm(1).addReg(0).addImm(-StackSize).addReg(0); 01505 01506 if (STI.isTargetLinux() || STI.isTargetWin32()) { 01507 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg) 01508 .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); 01509 } else if (STI.isTargetDarwin()) { 01510 01511 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register 01512 unsigned ScratchReg2; 01513 bool SaveScratch2; 01514 if (CompareStackPointer) { 01515 // The primary scratch register is available for holding the TLS offset 01516 ScratchReg2 = GetScratchRegister(Is64Bit, MF, true); 01517 SaveScratch2 = false; 01518 } else { 01519 // Need to use a second register to hold the TLS offset 01520 ScratchReg2 = GetScratchRegister(Is64Bit, MF, false); 01521 01522 // Unfortunately, with fastcc the second scratch register may hold an arg 01523 SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2); 01524 } 01525 01526 // If Scratch2 is live-in then it needs to be saved 01527 assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) && 01528 "Scratch register is live-in and not saved"); 01529 01530 if (SaveScratch2) 01531 BuildMI(checkMBB, DL, TII.get(X86::PUSH32r)) 01532 .addReg(ScratchReg2, RegState::Kill); 01533 01534 BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2) 01535 .addImm(TlsOffset); 01536 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)) 01537 .addReg(ScratchReg) 01538 .addReg(ScratchReg2).addImm(1).addReg(0) 01539 .addImm(0) 01540 .addReg(TlsReg); 01541 01542 if (SaveScratch2) 01543 BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2); 01544 } 01545 } 01546 01547 // This jump is taken if SP >= (Stacklet Limit + Stack Space required). 01548 // It jumps to normal execution of the function body. 01549 BuildMI(checkMBB, DL, TII.get(X86::JA_4)).addMBB(&prologueMBB); 01550 01551 // On 32 bit we first push the arguments size and then the frame size. On 64 01552 // bit, we pass the stack frame size in r10 and the argument size in r11. 01553 if (Is64Bit) { 01554 // Functions with nested arguments use R10, so it needs to be saved across 01555 // the call to _morestack 01556 01557 if (IsNested) 01558 BuildMI(allocMBB, DL, TII.get(X86::MOV64rr), X86::RAX).addReg(X86::R10); 01559 01560 BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R10) 01561 .addImm(StackSize); 01562 BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R11) 01563 .addImm(X86FI->getArgumentStackSize()); 01564 MF.getRegInfo().setPhysRegUsed(X86::R10); 01565 MF.getRegInfo().setPhysRegUsed(X86::R11); 01566 } else { 01567 BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) 01568 .addImm(X86FI->getArgumentStackSize()); 01569 BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) 01570 .addImm(StackSize); 01571 } 01572 01573 // __morestack is in libgcc 01574 if (Is64Bit) 01575 BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32)) 01576 .addExternalSymbol("__morestack"); 01577 else 01578 BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32)) 01579 .addExternalSymbol("__morestack"); 01580 01581 if (IsNested) 01582 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10)); 01583 else 01584 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET)); 01585 01586 allocMBB->addSuccessor(&prologueMBB); 01587 01588 checkMBB->addSuccessor(allocMBB); 01589 checkMBB->addSuccessor(&prologueMBB); 01590 01591 #ifdef XDEBUG 01592 MF.verify(); 01593 #endif 01594 } 01595 01596 /// Erlang programs may need a special prologue to handle the stack size they 01597 /// might need at runtime. That is because Erlang/OTP does not implement a C 01598 /// stack but uses a custom implementation of hybrid stack/heap architecture. 01599 /// (for more information see Eric Stenman's Ph.D. thesis: 01600 /// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf) 01601 /// 01602 /// CheckStack: 01603 /// temp0 = sp - MaxStack 01604 /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart 01605 /// OldStart: 01606 /// ... 01607 /// IncStack: 01608 /// call inc_stack # doubles the stack space 01609 /// temp0 = sp - MaxStack 01610 /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart 01611 void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { 01612 const X86InstrInfo &TII = *TM.getInstrInfo(); 01613 MachineFrameInfo *MFI = MF.getFrameInfo(); 01614 const unsigned SlotSize = TM.getRegisterInfo()->getSlotSize(); 01615 const bool Is64Bit = STI.is64Bit(); 01616 DebugLoc DL; 01617 // HiPE-specific values 01618 const unsigned HipeLeafWords = 24; 01619 const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5; 01620 const unsigned Guaranteed = HipeLeafWords * SlotSize; 01621 unsigned CallerStkArity = MF.getFunction()->arg_size() > CCRegisteredArgs ? 01622 MF.getFunction()->arg_size() - CCRegisteredArgs : 0; 01623 unsigned MaxStack = MFI->getStackSize() + CallerStkArity*SlotSize + SlotSize; 01624 01625 assert(STI.isTargetLinux() && 01626 "HiPE prologue is only supported on Linux operating systems."); 01627 01628 // Compute the largest caller's frame that is needed to fit the callees' 01629 // frames. This 'MaxStack' is computed from: 01630 // 01631 // a) the fixed frame size, which is the space needed for all spilled temps, 01632 // b) outgoing on-stack parameter areas, and 01633 // c) the minimum stack space this function needs to make available for the 01634 // functions it calls (a tunable ABI property). 01635 if (MFI->hasCalls()) { 01636 unsigned MoreStackForCalls = 0; 01637 01638 for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end(); 01639 MBBI != MBBE; ++MBBI) 01640 for (MachineBasicBlock::iterator MI = MBBI->begin(), ME = MBBI->end(); 01641 MI != ME; ++MI) { 01642 if (!MI->isCall()) 01643 continue; 01644 01645 // Get callee operand. 01646 const MachineOperand &MO = MI->getOperand(0); 01647 01648 // Only take account of global function calls (no closures etc.). 01649 if (!MO.isGlobal()) 01650 continue; 01651 01652 const Function *F = dyn_cast<Function>(MO.getGlobal()); 01653 if (!F) 01654 continue; 01655 01656 // Do not update 'MaxStack' for primitive and built-in functions 01657 // (encoded with names either starting with "erlang."/"bif_" or not 01658 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an 01659 // "_", such as the BIF "suspend_0") as they are executed on another 01660 // stack. 01661 if (F->getName().find("erlang.") != StringRef::npos || 01662 F->getName().find("bif_") != StringRef::npos || 01663 F->getName().find_first_of("._") == StringRef::npos) 01664 continue; 01665 01666 unsigned CalleeStkArity = 01667 F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0; 01668 if (HipeLeafWords - 1 > CalleeStkArity) 01669 MoreStackForCalls = std::max(MoreStackForCalls, 01670 (HipeLeafWords - 1 - CalleeStkArity) * SlotSize); 01671 } 01672 MaxStack += MoreStackForCalls; 01673 } 01674 01675 // If the stack frame needed is larger than the guaranteed then runtime checks 01676 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue. 01677 if (MaxStack > Guaranteed) { 01678 MachineBasicBlock &prologueMBB = MF.front(); 01679 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock(); 01680 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock(); 01681 01682 for (MachineBasicBlock::livein_iterator I = prologueMBB.livein_begin(), 01683 E = prologueMBB.livein_end(); I != E; I++) { 01684 stackCheckMBB->addLiveIn(*I); 01685 incStackMBB->addLiveIn(*I); 01686 } 01687 01688 MF.push_front(incStackMBB); 01689 MF.push_front(stackCheckMBB); 01690 01691 unsigned ScratchReg, SPReg, PReg, SPLimitOffset; 01692 unsigned LEAop, CMPop, CALLop; 01693 if (Is64Bit) { 01694 SPReg = X86::RSP; 01695 PReg = X86::RBP; 01696 LEAop = X86::LEA64r; 01697 CMPop = X86::CMP64rm; 01698 CALLop = X86::CALL64pcrel32; 01699 SPLimitOffset = 0x90; 01700 } else { 01701 SPReg = X86::ESP; 01702 PReg = X86::EBP; 01703 LEAop = X86::LEA32r; 01704 CMPop = X86::CMP32rm; 01705 CALLop = X86::CALLpcrel32; 01706 SPLimitOffset = 0x4c; 01707 } 01708 01709 ScratchReg = GetScratchRegister(Is64Bit, MF, true); 01710 assert(!MF.getRegInfo().isLiveIn(ScratchReg) && 01711 "HiPE prologue scratch register is live-in"); 01712 01713 // Create new MBB for StackCheck: 01714 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), 01715 SPReg, false, -MaxStack); 01716 // SPLimitOffset is in a fixed heap location (pointed by BP). 01717 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)) 01718 .addReg(ScratchReg), PReg, false, SPLimitOffset); 01719 BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_4)).addMBB(&prologueMBB); 01720 01721 // Create new MBB for IncStack: 01722 BuildMI(incStackMBB, DL, TII.get(CALLop)). 01723 addExternalSymbol("inc_stack_0"); 01724 addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), 01725 SPReg, false, -MaxStack); 01726 addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)) 01727 .addReg(ScratchReg), PReg, false, SPLimitOffset); 01728 BuildMI(incStackMBB, DL, TII.get(X86::JLE_4)).addMBB(incStackMBB); 01729 01730 stackCheckMBB->addSuccessor(&prologueMBB, 99); 01731 stackCheckMBB->addSuccessor(incStackMBB, 1); 01732 incStackMBB->addSuccessor(&prologueMBB, 99); 01733 incStackMBB->addSuccessor(incStackMBB, 1); 01734 } 01735 #ifdef XDEBUG 01736 MF.verify(); 01737 #endif 01738 } 01739 01740 void X86FrameLowering:: 01741 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 01742 MachineBasicBlock::iterator I) const { 01743 const X86InstrInfo &TII = *TM.getInstrInfo(); 01744 const X86RegisterInfo &RegInfo = *TM.getRegisterInfo(); 01745 unsigned StackPtr = RegInfo.getStackRegister(); 01746 bool reseveCallFrame = hasReservedCallFrame(MF); 01747 int Opcode = I->getOpcode(); 01748 bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); 01749 bool IsLP64 = STI.isTarget64BitLP64(); 01750 DebugLoc DL = I->getDebugLoc(); 01751 uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0; 01752 uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0; 01753 I = MBB.erase(I); 01754 01755 if (!reseveCallFrame) { 01756 // If the stack pointer can be changed after prologue, turn the 01757 // adjcallstackup instruction into a 'sub ESP, <amt>' and the 01758 // adjcallstackdown instruction into 'add ESP, <amt>' 01759 // TODO: consider using push / pop instead of sub + store / add 01760 if (Amount == 0) 01761 return; 01762 01763 // We need to keep the stack aligned properly. To do this, we round the 01764 // amount of space needed for the outgoing arguments up to the next 01765 // alignment boundary. 01766 unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); 01767 Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; 01768 01769 MachineInstr *New = 0; 01770 if (Opcode == TII.getCallFrameSetupOpcode()) { 01771 New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)), 01772 StackPtr) 01773 .addReg(StackPtr) 01774 .addImm(Amount); 01775 } else { 01776 assert(Opcode == TII.getCallFrameDestroyOpcode()); 01777 01778 // Factor out the amount the callee already popped. 01779 Amount -= CalleeAmt; 01780 01781 if (Amount) { 01782 unsigned Opc = getADDriOpcode(IsLP64, Amount); 01783 New = BuildMI(MF, DL, TII.get(Opc), StackPtr) 01784 .addReg(StackPtr).addImm(Amount); 01785 } 01786 } 01787 01788 if (New) { 01789 // The EFLAGS implicit def is dead. 01790 New->getOperand(3).setIsDead(); 01791 01792 // Replace the pseudo instruction with a new instruction. 01793 MBB.insert(I, New); 01794 } 01795 01796 return; 01797 } 01798 01799 if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) { 01800 // If we are performing frame pointer elimination and if the callee pops 01801 // something off the stack pointer, add it back. We do this until we have 01802 // more advanced stack pointer tracking ability. 01803 unsigned Opc = getSUBriOpcode(IsLP64, CalleeAmt); 01804 MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr) 01805 .addReg(StackPtr).addImm(CalleeAmt); 01806 01807 // The EFLAGS implicit def is dead. 01808 New->getOperand(3).setIsDead(); 01809 01810 // We are not tracking the stack pointer adjustment by the callee, so make 01811 // sure we restore the stack pointer immediately after the call, there may 01812 // be spill code inserted between the CALL and ADJCALLSTACKUP instructions. 01813 MachineBasicBlock::iterator B = MBB.begin(); 01814 while (I != B && !llvm::prior(I)->isCall()) 01815 --I; 01816 MBB.insert(I, New); 01817 } 01818 } 01819