LLVM 17.0.0git
X86FrameLowering.cpp
Go to the documentation of this file.
1//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86FrameLowering.h"
15#include "X86InstrBuilder.h"
16#include "X86InstrInfo.h"
18#include "X86Subtarget.h"
19#include "X86TargetMachine.h"
20#include "llvm/ADT/SmallSet.h"
21#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/DataLayout.h"
31#include "llvm/IR/Function.h"
32#include "llvm/MC/MCAsmInfo.h"
34#include "llvm/MC/MCSymbol.h"
35#include "llvm/Support/Debug.h"
36#include "llvm/Support/LEB128.h"
38#include <cstdlib>
39
40#define DEBUG_TYPE "x86-fl"
41
42STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
43STATISTIC(NumFrameExtraProbe,
44 "Number of extra stack probes generated in prologue");
45
46using namespace llvm;
47
49 MaybeAlign StackAlignOverride)
50 : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(),
51 STI.is64Bit() ? -8 : -4),
52 STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
53 // Cache a bunch of frame-related predicates for this subtarget.
55 Is64Bit = STI.is64Bit();
57 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
60}
61
63 return !MF.getFrameInfo().hasVarSizedObjects() &&
64 !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
65 !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
66}
67
68/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
69/// call frame pseudos can be simplified. Having a FP, as in the default
70/// implementation, is not sufficient here since we can't always use it.
71/// Use a more nuanced condition.
72bool
74 return hasReservedCallFrame(MF) ||
75 MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
76 (hasFP(MF) && !TRI->hasStackRealignment(MF)) ||
78}
79
80// needsFrameIndexResolution - Do we need to perform FI resolution for
81// this function. Normally, this is required only when the function
82// has any stack objects. However, FI resolution actually has another job,
83// not apparent from the title - it resolves callframesetup/destroy
84// that were not simplified earlier.
85// So, this is required for x86 functions that have push sequences even
86// when there are no stack objects.
87bool
89 return MF.getFrameInfo().hasStackObjects() ||
90 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
91}
92
93/// hasFP - Return true if the specified function should have a dedicated frame
94/// pointer register. This is true if the function has variable sized allocas
95/// or if frame pointer elimination is disabled.
97 const MachineFrameInfo &MFI = MF.getFrameInfo();
98 return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
99 TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
103 MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
104 MFI.hasStackMap() || MFI.hasPatchPoint() ||
105 (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment()));
106}
107
108static unsigned getSUBriOpcode(bool IsLP64) {
109 return IsLP64 ? X86::SUB64ri32 : X86::SUB32ri;
110}
111
112static unsigned getADDriOpcode(bool IsLP64) {
113 return IsLP64 ? X86::ADD64ri32 : X86::ADD32ri;
114}
115
116static unsigned getSUBrrOpcode(bool IsLP64) {
117 return IsLP64 ? X86::SUB64rr : X86::SUB32rr;
118}
119
120static unsigned getADDrrOpcode(bool IsLP64) {
121 return IsLP64 ? X86::ADD64rr : X86::ADD32rr;
122}
123
124static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
125 return IsLP64 ? X86::AND64ri32 : X86::AND32ri;
126}
127
128static unsigned getLEArOpcode(bool IsLP64) {
129 return IsLP64 ? X86::LEA64r : X86::LEA32r;
130}
131
132static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) {
133 if (Use64BitReg) {
134 if (isUInt<32>(Imm))
135 return X86::MOV32ri64;
136 if (isInt<32>(Imm))
137 return X86::MOV64ri32;
138 return X86::MOV64ri;
139 }
140 return X86::MOV32ri;
141}
142
145 unsigned Reg = RegMask.PhysReg;
146
147 if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
148 Reg == X86::AH || Reg == X86::AL)
149 return true;
150 }
151
152 return false;
153}
154
155/// Check if the flags need to be preserved before the terminators.
156/// This would be the case, if the eflags is live-in of the region
157/// composed by the terminators or live-out of that region, without
158/// being defined by a terminator.
159static bool
161 for (const MachineInstr &MI : MBB.terminators()) {
162 bool BreakNext = false;
163 for (const MachineOperand &MO : MI.operands()) {
164 if (!MO.isReg())
165 continue;
166 Register Reg = MO.getReg();
167 if (Reg != X86::EFLAGS)
168 continue;
169
170 // This terminator needs an eflags that is not defined
171 // by a previous another terminator:
172 // EFLAGS is live-in of the region composed by the terminators.
173 if (!MO.isDef())
174 return true;
175 // This terminator defines the eflags, i.e., we don't need to preserve it.
176 // However, we still need to check this specific terminator does not
177 // read a live-in value.
178 BreakNext = true;
179 }
180 // We found a definition of the eflags, no need to preserve them.
181 if (BreakNext)
182 return false;
183 }
184
185 // None of the terminators use or define the eflags.
186 // Check if they are live-out, that would imply we need to preserve them.
187 for (const MachineBasicBlock *Succ : MBB.successors())
188 if (Succ->isLiveIn(X86::EFLAGS))
189 return true;
190
191 return false;
192}
193
194/// emitSPUpdate - Emit a series of instructions to increment / decrement the
195/// stack pointer by a constant value.
198 const DebugLoc &DL,
199 int64_t NumBytes, bool InEpilogue) const {
200 bool isSub = NumBytes < 0;
201 uint64_t Offset = isSub ? -NumBytes : NumBytes;
204
205 uint64_t Chunk = (1LL << 31) - 1;
206
210 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
211
212 // It's ok to not take into account large chunks when probing, as the
213 // allocation is split in smaller chunks anyway.
214 if (EmitInlineStackProbe && !InEpilogue) {
215
216 // This pseudo-instruction is going to be expanded, potentially using a
217 // loop, by inlineStackProbe().
218 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)).addImm(Offset);
219 return;
220 } else if (Offset > Chunk) {
221 // Rather than emit a long series of instructions for large offsets,
222 // load the offset into a register and do one sub/add
223 unsigned Reg = 0;
224 unsigned Rax = (unsigned)(Is64Bit ? X86::RAX : X86::EAX);
225
226 if (isSub && !isEAXLiveIn(MBB))
227 Reg = Rax;
228 else
230
231 unsigned AddSubRROpc =
233 if (Reg) {
235 .addImm(Offset)
236 .setMIFlag(Flag);
237 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr)
239 .addReg(Reg);
240 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
241 return;
242 } else if (Offset > 8 * Chunk) {
243 // If we would need more than 8 add or sub instructions (a >16GB stack
244 // frame), it's worth spilling RAX to materialize this immediate.
245 // pushq %rax
246 // movabsq +-$Offset+-SlotSize, %rax
247 // addq %rsp, %rax
248 // xchg %rax, (%rsp)
249 // movq (%rsp), %rsp
250 assert(Is64Bit && "can't have 32-bit 16GB stack frame");
251 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
253 .setMIFlag(Flag);
254 // Subtract is not commutative, so negate the offset and always use add.
255 // Subtract 8 less and add 8 more to account for the PUSH we just did.
256 if (isSub)
257 Offset = -(Offset - SlotSize);
258 else
261 .addImm(Offset)
262 .setMIFlag(Flag);
263 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax)
264 .addReg(Rax)
266 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
267 // Exchange the new SP in RAX with the top of the stack.
269 BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax),
270 StackPtr, false, 0);
271 // Load new SP from the top of the stack into RSP.
272 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr),
273 StackPtr, false, 0);
274 return;
275 }
276 }
277
278 while (Offset) {
279 uint64_t ThisVal = std::min(Offset, Chunk);
280 if (ThisVal == SlotSize) {
281 // Use push / pop for slot sized adjustments as a size optimization. We
282 // need to find a dead register when using pop.
283 unsigned Reg = isSub
284 ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
286 if (Reg) {
287 unsigned Opc = isSub
288 ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
289 : (Is64Bit ? X86::POP64r : X86::POP32r);
290 BuildMI(MBB, MBBI, DL, TII.get(Opc))
291 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub))
292 .setMIFlag(Flag);
293 Offset -= ThisVal;
294 continue;
295 }
296 }
297
298 BuildStackAdjustment(MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue)
299 .setMIFlag(Flag);
300
301 Offset -= ThisVal;
302 }
303}
304
305MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
307 const DebugLoc &DL, int64_t Offset, bool InEpilogue) const {
308 assert(Offset != 0 && "zero offset stack adjustment requested");
309
310 // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue
311 // is tricky.
312 bool UseLEA;
313 if (!InEpilogue) {
314 // Check if inserting the prologue at the beginning
315 // of MBB would require to use LEA operations.
316 // We need to use LEA operations if EFLAGS is live in, because
317 // it means an instruction will read it before it gets defined.
318 UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS);
319 } else {
320 // If we can use LEA for SP but we shouldn't, check that none
321 // of the terminators uses the eflags. Otherwise we will insert
322 // a ADD that will redefine the eflags and break the condition.
323 // Alternatively, we could move the ADD, but this may not be possible
324 // and is an optimization anyway.
326 if (UseLEA && !STI.useLeaForSP())
328 // If that assert breaks, that means we do not do the right thing
329 // in canUseAsEpilogue.
331 "We shouldn't have allowed this insertion point");
332 }
333
335 if (UseLEA) {
338 StackPtr),
339 StackPtr, false, Offset);
340 } else {
341 bool IsSub = Offset < 0;
342 uint64_t AbsOffset = IsSub ? -Offset : Offset;
343 const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr)
345 MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
347 .addImm(AbsOffset);
348 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
349 }
350 return MI;
351}
352
355 bool doMergeWithPrevious) const {
356 if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
357 (!doMergeWithPrevious && MBBI == MBB.end()))
358 return 0;
359
360 MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
361
363 // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI
364 // instruction, and that there are no DBG_VALUE or other instructions between
365 // ADD/SUB/LEA and its corresponding CFI instruction.
366 /* TODO: Add support for the case where there are multiple CFI instructions
367 below the ADD/SUB/LEA, e.g.:
368 ...
369 add
370 cfi_def_cfa_offset
371 cfi_offset
372 ...
373 */
374 if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
375 PI = std::prev(PI);
376
377 unsigned Opc = PI->getOpcode();
378 int Offset = 0;
379
380 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
381 PI->getOperand(0).getReg() == StackPtr) {
382 assert(PI->getOperand(1).getReg() == StackPtr);
383 Offset = PI->getOperand(2).getImm();
384 } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
385 PI->getOperand(0).getReg() == StackPtr &&
386 PI->getOperand(1).getReg() == StackPtr &&
387 PI->getOperand(2).getImm() == 1 &&
388 PI->getOperand(3).getReg() == X86::NoRegister &&
389 PI->getOperand(5).getReg() == X86::NoRegister) {
390 // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
391 Offset = PI->getOperand(4).getImm();
392 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
393 PI->getOperand(0).getReg() == StackPtr) {
394 assert(PI->getOperand(1).getReg() == StackPtr);
395 Offset = -PI->getOperand(2).getImm();
396 } else
397 return 0;
398
399 PI = MBB.erase(PI);
400 if (PI != MBB.end() && PI->isCFIInstruction()) {
401 auto CIs = MBB.getParent()->getFrameInstructions();
402 MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()];
405 PI = MBB.erase(PI);
406 }
407 if (!doMergeWithPrevious)
409
410 return Offset;
411}
412
415 const DebugLoc &DL,
416 const MCCFIInstruction &CFIInst,
417 MachineInstr::MIFlag Flag) const {
419 unsigned CFIIndex = MF.addFrameInst(CFIInst);
420
422 MF.getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true);
423
424 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
425 .addCFIIndex(CFIIndex)
426 .setMIFlag(Flag);
427}
428
429/// Emits Dwarf Info specifying offsets of callee saved registers and
430/// frame pointer. This is called only when basic block sections are enabled.
434 if (!hasFP(MF)) {
436 return;
437 }
438 const MachineModuleInfo &MMI = MF.getMMI();
441 const Register MachineFramePtr =
443 : FramePtr;
444 unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true);
445 // Offset = space for return address + size of the frame pointer itself.
446 unsigned Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4);
448 MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset));
450}
451
454 const DebugLoc &DL, bool IsPrologue) const {
456 MachineFrameInfo &MFI = MF.getFrameInfo();
457 MachineModuleInfo &MMI = MF.getMMI();
460
461 // Add callee saved registers to move list.
462 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
463
464 // Calculate offsets.
465 for (const CalleeSavedInfo &I : CSI) {
466 int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
467 Register Reg = I.getReg();
468 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
469
470 if (IsPrologue) {
471 if (X86FI->getStackPtrSaveMI()) {
472 // +2*SlotSize because there is return address and ebp at the bottom
473 // of the stack.
474 // | retaddr |
475 // | ebp |
476 // | |<--ebp
477 Offset += 2 * SlotSize;
478 SmallString<64> CfaExpr;
479 CfaExpr.push_back(dwarf::DW_CFA_expression);
480 uint8_t buffer[16];
481 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
482 CfaExpr.push_back(2);
484 const Register MachineFramePtr =
487 : FramePtr;
488 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
489 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
490 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
492 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
494 } else {
496 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
497 }
498 } else {
500 MCCFIInstruction::createRestore(nullptr, DwarfReg));
501 }
502 }
503 if (auto *MI = X86FI->getStackPtrSaveMI()) {
504 int FI = MI->getOperand(1).getIndex();
505 int64_t Offset = MFI.getObjectOffset(FI) + 2 * SlotSize;
506 SmallString<64> CfaExpr;
508 const Register MachineFramePtr =
511 : FramePtr;
512 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
513 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
514 uint8_t buffer[16];
515 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
516 CfaExpr.push_back(dwarf::DW_OP_deref);
517
518 SmallString<64> DefCfaExpr;
519 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
520 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
521 DefCfaExpr.append(CfaExpr.str());
522 // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref
524 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
526 }
527}
528
529void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
530 MachineBasicBlock &MBB) const {
531 const MachineFunction &MF = *MBB.getParent();
532
533 // Insertion point.
535
536 // Fake a debug loc.
537 DebugLoc DL;
538 if (MBBI != MBB.end())
539 DL = MBBI->getDebugLoc();
540
541 // Zero out FP stack if referenced. Do this outside of the loop below so that
542 // it's done only once.
543 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
544 for (MCRegister Reg : RegsToZero.set_bits()) {
545 if (!X86::RFP80RegClass.contains(Reg))
546 continue;
547
548 unsigned NumFPRegs = ST.is64Bit() ? 8 : 7;
549 for (unsigned i = 0; i != NumFPRegs; ++i)
550 BuildMI(MBB, MBBI, DL, TII.get(X86::LD_F0));
551
552 for (unsigned i = 0; i != NumFPRegs; ++i)
553 BuildMI(MBB, MBBI, DL, TII.get(X86::ST_FPrr)).addReg(X86::ST0);
554 break;
555 }
556
557 // For GPRs, we only care to clear out the 32-bit register.
558 BitVector GPRsToZero(TRI->getNumRegs());
559 for (MCRegister Reg : RegsToZero.set_bits())
560 if (TRI->isGeneralPurposeRegister(MF, Reg)) {
561 GPRsToZero.set(getX86SubSuperRegister(Reg, 32));
562 RegsToZero.reset(Reg);
563 }
564
565 for (MCRegister Reg : GPRsToZero.set_bits())
566 BuildMI(MBB, MBBI, DL, TII.get(X86::XOR32rr), Reg)
568 .addReg(Reg, RegState::Undef);
569
570 // Zero out registers.
571 for (MCRegister Reg : RegsToZero.set_bits()) {
572 if (ST.hasMMX() && X86::VR64RegClass.contains(Reg))
573 // FIXME: Ignore MMX registers?
574 continue;
575
576 unsigned XorOp;
577 if (X86::VR128RegClass.contains(Reg)) {
578 // XMM#
579 if (!ST.hasSSE1())
580 continue;
581 XorOp = X86::PXORrr;
582 } else if (X86::VR256RegClass.contains(Reg)) {
583 // YMM#
584 if (!ST.hasAVX())
585 continue;
586 XorOp = X86::VPXORrr;
587 } else if (X86::VR512RegClass.contains(Reg)) {
588 // ZMM#
589 if (!ST.hasAVX512())
590 continue;
591 XorOp = X86::VPXORYrr;
592 } else if (X86::VK1RegClass.contains(Reg) ||
593 X86::VK2RegClass.contains(Reg) ||
594 X86::VK4RegClass.contains(Reg) ||
595 X86::VK8RegClass.contains(Reg) ||
596 X86::VK16RegClass.contains(Reg)) {
597 if (!ST.hasVLX())
598 continue;
599 XorOp = ST.hasBWI() ? X86::KXORQrr : X86::KXORWrr;
600 } else {
601 continue;
602 }
603
604 BuildMI(MBB, MBBI, DL, TII.get(XorOp), Reg)
606 .addReg(Reg, RegState::Undef);
607 }
608}
609
612 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
613 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
616 if (InProlog) {
617 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING))
618 .addImm(0 /* no explicit stack size */);
619 } else {
620 emitStackProbeInline(MF, MBB, MBBI, DL, false);
621 }
622 } else {
623 emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum);
624 }
625}
626
628 return STI.isOSWindows() && !STI.isTargetWin64();
629}
630
632 MachineBasicBlock &PrologMBB) const {
633 auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
634 return MI.getOpcode() == X86::STACKALLOC_W_PROBING;
635 });
636 if (Where != PrologMBB.end()) {
637 DebugLoc DL = PrologMBB.findDebugLoc(Where);
638 emitStackProbeInline(MF, PrologMBB, Where, DL, true);
639 Where->eraseFromParent();
640 }
641}
642
643void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
646 const DebugLoc &DL,
647 bool InProlog) const {
649 if (STI.isTargetWindowsCoreCLR() && STI.is64Bit())
650 emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog);
651 else
652 emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog);
653}
654
655void X86FrameLowering::emitStackProbeInlineGeneric(
657 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
658 MachineInstr &AllocWithProbe = *MBBI;
659 uint64_t Offset = AllocWithProbe.getOperand(0).getImm();
660
663 assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&
664 "different expansion expected for CoreCLR 64 bit");
665
666 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
667 uint64_t ProbeChunk = StackProbeSize * 8;
668
669 uint64_t MaxAlign =
670 TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
671
672 // Synthesize a loop or unroll it, depending on the number of iterations.
673 // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
674 // between the unaligned rsp and current rsp.
675 if (Offset > ProbeChunk) {
676 emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
677 MaxAlign % StackProbeSize);
678 } else {
679 emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
680 MaxAlign % StackProbeSize);
681 }
682}
683
684void X86FrameLowering::emitStackProbeInlineGenericBlock(
687 uint64_t AlignOffset) const {
688
689 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
690 const bool HasFP = hasFP(MF);
693 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
694 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
695
696 uint64_t CurrentOffset = 0;
697
698 assert(AlignOffset < StackProbeSize);
699
700 // If the offset is so small it fits within a page, there's nothing to do.
701 if (StackProbeSize < Offset + AlignOffset) {
702
703 uint64_t StackAdjustment = StackProbeSize - AlignOffset;
704 BuildStackAdjustment(MBB, MBBI, DL, -StackAdjustment, /*InEpilogue=*/false)
706 if (!HasFP && NeedsDwarfCFI) {
707 BuildCFI(
708 MBB, MBBI, DL,
709 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
710 }
711
712 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
714 StackPtr, false, 0)
715 .addImm(0)
717 NumFrameExtraProbe++;
718 CurrentOffset = StackProbeSize - AlignOffset;
719 }
720
721 // For the next N - 1 pages, just probe. I tried to take advantage of
722 // natural probes but it implies much more logic and there was very few
723 // interesting natural probes to interleave.
724 while (CurrentOffset + StackProbeSize < Offset) {
725 BuildStackAdjustment(MBB, MBBI, DL, -StackProbeSize, /*InEpilogue=*/false)
727
728 if (!HasFP && NeedsDwarfCFI) {
729 BuildCFI(
730 MBB, MBBI, DL,
731 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize));
732 }
733 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
735 StackPtr, false, 0)
736 .addImm(0)
738 NumFrameExtraProbe++;
739 CurrentOffset += StackProbeSize;
740 }
741
742 // No need to probe the tail, it is smaller than a Page.
743 uint64_t ChunkSize = Offset - CurrentOffset;
744 if (ChunkSize == SlotSize) {
745 // Use push for slot sized adjustments as a size optimization,
746 // like emitSPUpdate does when not probing.
747 unsigned Reg = Is64Bit ? X86::RAX : X86::EAX;
748 unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
749 BuildMI(MBB, MBBI, DL, TII.get(Opc))
752 } else {
753 BuildStackAdjustment(MBB, MBBI, DL, -ChunkSize, /*InEpilogue=*/false)
755 }
756 // No need to adjust Dwarf CFA offset here, the last position of the stack has
757 // been defined
758}
759
760void X86FrameLowering::emitStackProbeInlineGenericLoop(
763 uint64_t AlignOffset) const {
764 assert(Offset && "null offset");
765
766 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
768 "Inline stack probe loop will clobber live EFLAGS.");
769
770 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
771 const bool HasFP = hasFP(MF);
774 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
775 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
776
777 if (AlignOffset) {
778 if (AlignOffset < StackProbeSize) {
779 // Perform a first smaller allocation followed by a probe.
780 BuildStackAdjustment(MBB, MBBI, DL, -AlignOffset, /*InEpilogue=*/false)
782
783 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
785 StackPtr, false, 0)
786 .addImm(0)
788 NumFrameExtraProbe++;
789 Offset -= AlignOffset;
790 }
791 }
792
793 // Synthesize a loop
794 NumFrameLoopProbe++;
795 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
796
797 MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB);
798 MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB);
799
801 MF.insert(MBBIter, testMBB);
802 MF.insert(MBBIter, tailMBB);
803
804 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
805 : Is64Bit ? X86::R11D
806 : X86::EAX;
807
808 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
811
812 // save loop bound
813 {
814 const unsigned BoundOffset = alignDown(Offset, StackProbeSize);
815 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
816 BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
817 .addReg(FinalStackProbed)
818 .addImm(BoundOffset)
820
821 // while in the loop, use loop-invariant reg for CFI,
822 // instead of the stack pointer, which changes during the loop
823 if (!HasFP && NeedsDwarfCFI) {
824 // x32 uses the same DWARF register numbers as x86-64,
825 // so there isn't a register number for r11d, we must use r11 instead
826 const Register DwarfFinalStackProbed =
828 ? Register(getX86SubSuperRegister(FinalStackProbed, 64))
829 : FinalStackProbed;
830
833 nullptr, TRI->getDwarfRegNum(DwarfFinalStackProbed, true)));
835 MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset));
836 }
837 }
838
839 // allocate a page
840 BuildStackAdjustment(*testMBB, testMBB->end(), DL, -StackProbeSize,
841 /*InEpilogue=*/false)
843
844 // touch the page
845 addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
847 StackPtr, false, 0)
848 .addImm(0)
850
851 // cmp with stack pointer bound
852 BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
854 .addReg(FinalStackProbed)
856
857 // jump
858 BuildMI(testMBB, DL, TII.get(X86::JCC_1))
859 .addMBB(testMBB)
862 testMBB->addSuccessor(testMBB);
863 testMBB->addSuccessor(tailMBB);
864
865 // BB management
866 tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end());
868 MBB.addSuccessor(testMBB);
869
870 // handle tail
871 const uint64_t TailOffset = Offset % StackProbeSize;
872 MachineBasicBlock::iterator TailMBBIter = tailMBB->begin();
873 if (TailOffset) {
874 BuildStackAdjustment(*tailMBB, TailMBBIter, DL, -TailOffset,
875 /*InEpilogue=*/false)
877 }
878
879 // after the loop, switch back to stack pointer for CFI
880 if (!HasFP && NeedsDwarfCFI) {
881 // x32 uses the same DWARF register numbers as x86-64,
882 // so there isn't a register number for esp, we must use rsp instead
883 const Register DwarfStackPtr =
887
888 BuildCFI(*tailMBB, TailMBBIter, DL,
890 nullptr, TRI->getDwarfRegNum(DwarfStackPtr, true)));
891 }
892
893 // Update Live In information
894 recomputeLiveIns(*testMBB);
895 recomputeLiveIns(*tailMBB);
896}
897
898void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
900 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
902 assert(STI.is64Bit() && "different expansion needed for 32 bit");
903 assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
905 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
906
907 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
909 "Inline stack probe loop will clobber live EFLAGS.");
910
911 // RAX contains the number of bytes of desired stack adjustment.
912 // The handling here assumes this value has already been updated so as to
913 // maintain stack alignment.
914 //
915 // We need to exit with RSP modified by this amount and execute suitable
916 // page touches to notify the OS that we're growing the stack responsibly.
917 // All stack probing must be done without modifying RSP.
918 //
919 // MBB:
920 // SizeReg = RAX;
921 // ZeroReg = 0
922 // CopyReg = RSP
923 // Flags, TestReg = CopyReg - SizeReg
924 // FinalReg = !Flags.Ovf ? TestReg : ZeroReg
925 // LimitReg = gs magic thread env access
926 // if FinalReg >= LimitReg goto ContinueMBB
927 // RoundBB:
928 // RoundReg = page address of FinalReg
929 // LoopMBB:
930 // LoopReg = PHI(LimitReg,ProbeReg)
931 // ProbeReg = LoopReg - PageSize
932 // [ProbeReg] = 0
933 // if (ProbeReg > RoundReg) goto LoopMBB
934 // ContinueMBB:
935 // RSP = RSP - RAX
936 // [rest of original MBB]
937
938 // Set up the new basic blocks
939 MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB);
940 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
941 MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB);
942
943 MachineFunction::iterator MBBIter = std::next(MBB.getIterator());
944 MF.insert(MBBIter, RoundMBB);
945 MF.insert(MBBIter, LoopMBB);
946 MF.insert(MBBIter, ContinueMBB);
947
948 // Split MBB and move the tail portion down to ContinueMBB.
949 MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI);
950 ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end());
952
953 // Some useful constants
954 const int64_t ThreadEnvironmentStackLimit = 0x10;
955 const int64_t PageSize = 0x1000;
956 const int64_t PageMask = ~(PageSize - 1);
957
958 // Registers we need. For the normal case we use virtual
959 // registers. For the prolog expansion we use RAX, RCX and RDX.
961 const TargetRegisterClass *RegClass = &X86::GR64RegClass;
962 const Register SizeReg = InProlog ? X86::RAX
963 : MRI.createVirtualRegister(RegClass),
964 ZeroReg = InProlog ? X86::RCX
965 : MRI.createVirtualRegister(RegClass),
966 CopyReg = InProlog ? X86::RDX
967 : MRI.createVirtualRegister(RegClass),
968 TestReg = InProlog ? X86::RDX
969 : MRI.createVirtualRegister(RegClass),
970 FinalReg = InProlog ? X86::RDX
971 : MRI.createVirtualRegister(RegClass),
972 RoundedReg = InProlog ? X86::RDX
973 : MRI.createVirtualRegister(RegClass),
974 LimitReg = InProlog ? X86::RCX
975 : MRI.createVirtualRegister(RegClass),
976 JoinReg = InProlog ? X86::RCX
977 : MRI.createVirtualRegister(RegClass),
978 ProbeReg = InProlog ? X86::RCX
979 : MRI.createVirtualRegister(RegClass);
980
981 // SP-relative offsets where we can save RCX and RDX.
982 int64_t RCXShadowSlot = 0;
983 int64_t RDXShadowSlot = 0;
984
985 // If inlining in the prolog, save RCX and RDX.
986 if (InProlog) {
987 // Compute the offsets. We need to account for things already
988 // pushed onto the stack at this point: return address, frame
989 // pointer (if used), and callee saves.
991 const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
992 const bool HasFP = hasFP(MF);
993
994 // Check if we need to spill RCX and/or RDX.
995 // Here we assume that no earlier prologue instruction changes RCX and/or
996 // RDX, so checking the block live-ins is enough.
997 const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX);
998 const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX);
999 int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
1000 // Assign the initial slot to both registers, then change RDX's slot if both
1001 // need to be spilled.
1002 if (IsRCXLiveIn)
1003 RCXShadowSlot = InitSlot;
1004 if (IsRDXLiveIn)
1005 RDXShadowSlot = InitSlot;
1006 if (IsRDXLiveIn && IsRCXLiveIn)
1007 RDXShadowSlot += 8;
1008 // Emit the saves if needed.
1009 if (IsRCXLiveIn)
1010 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
1011 RCXShadowSlot)
1012 .addReg(X86::RCX);
1013 if (IsRDXLiveIn)
1014 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
1015 RDXShadowSlot)
1016 .addReg(X86::RDX);
1017 } else {
1018 // Not in the prolog. Copy RAX to a virtual reg.
1019 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
1020 }
1021
1022 // Add code to MBB to check for overflow and set the new target stack pointer
1023 // to zero if so.
1024 BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg)
1025 .addReg(ZeroReg, RegState::Undef)
1026 .addReg(ZeroReg, RegState::Undef);
1027 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP);
1028 BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
1029 .addReg(CopyReg)
1030 .addReg(SizeReg);
1031 BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg)
1032 .addReg(TestReg)
1033 .addReg(ZeroReg)
1035
1036 // FinalReg now holds final stack pointer value, or zero if
1037 // allocation would overflow. Compare against the current stack
1038 // limit from the thread environment block. Note this limit is the
1039 // lowest touched page on the stack, not the point at which the OS
1040 // will cause an overflow exception, so this is just an optimization
1041 // to avoid unnecessarily touching pages that are below the current
1042 // SP but already committed to the stack by the OS.
1043 BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg)
1044 .addReg(0)
1045 .addImm(1)
1046 .addReg(0)
1047 .addImm(ThreadEnvironmentStackLimit)
1048 .addReg(X86::GS);
1049 BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
1050 // Jump if the desired stack pointer is at or above the stack limit.
1051 BuildMI(&MBB, DL, TII.get(X86::JCC_1)).addMBB(ContinueMBB).addImm(X86::COND_AE);
1052
1053 // Add code to roundMBB to round the final stack pointer to a page boundary.
1054 RoundMBB->addLiveIn(FinalReg);
1055 BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
1056 .addReg(FinalReg)
1057 .addImm(PageMask);
1058 BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB);
1059
1060 // LimitReg now holds the current stack limit, RoundedReg page-rounded
1061 // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
1062 // and probe until we reach RoundedReg.
1063 if (!InProlog) {
1064 BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg)
1065 .addReg(LimitReg)
1066 .addMBB(RoundMBB)
1067 .addReg(ProbeReg)
1068 .addMBB(LoopMBB);
1069 }
1070
1071 LoopMBB->addLiveIn(JoinReg);
1072 addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
1073 false, -PageSize);
1074
1075 // Probe by storing a byte onto the stack.
1076 BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi))
1077 .addReg(ProbeReg)
1078 .addImm(1)
1079 .addReg(0)
1080 .addImm(0)
1081 .addReg(0)
1082 .addImm(0);
1083
1084 LoopMBB->addLiveIn(RoundedReg);
1085 BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
1086 .addReg(RoundedReg)
1087 .addReg(ProbeReg);
1088 BuildMI(LoopMBB, DL, TII.get(X86::JCC_1)).addMBB(LoopMBB).addImm(X86::COND_NE);
1089
1090 MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
1091
1092 // If in prolog, restore RDX and RCX.
1093 if (InProlog) {
1094 if (RCXShadowSlot) // It means we spilled RCX in the prologue.
1095 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1096 TII.get(X86::MOV64rm), X86::RCX),
1097 X86::RSP, false, RCXShadowSlot);
1098 if (RDXShadowSlot) // It means we spilled RDX in the prologue.
1099 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1100 TII.get(X86::MOV64rm), X86::RDX),
1101 X86::RSP, false, RDXShadowSlot);
1102 }
1103
1104 // Now that the probing is done, add code to continueMBB to update
1105 // the stack pointer for real.
1106 ContinueMBB->addLiveIn(SizeReg);
1107 BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
1108 .addReg(X86::RSP)
1109 .addReg(SizeReg);
1110
1111 // Add the control flow edges we need.
1112 MBB.addSuccessor(ContinueMBB);
1113 MBB.addSuccessor(RoundMBB);
1114 RoundMBB->addSuccessor(LoopMBB);
1115 LoopMBB->addSuccessor(ContinueMBB);
1116 LoopMBB->addSuccessor(LoopMBB);
1117
1118 // Mark all the instructions added to the prolog as frame setup.
1119 if (InProlog) {
1120 for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) {
1121 BeforeMBBI->setFlag(MachineInstr::FrameSetup);
1122 }
1123 for (MachineInstr &MI : *RoundMBB) {
1125 }
1126 for (MachineInstr &MI : *LoopMBB) {
1128 }
1129 for (MachineInstr &MI :
1130 llvm::make_range(ContinueMBB->begin(), ContinueMBBI)) {
1132 }
1133 }
1134}
1135
1136void X86FrameLowering::emitStackProbeCall(
1138 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
1139 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
1140 bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
1141
1142 // FIXME: Add indirect thunk support and remove this.
1143 if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
1144 report_fatal_error("Emitting stack probe calls on 64-bit with the large "
1145 "code model and indirect thunks not yet implemented.");
1146
1147 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
1149 "Stack probe calls will clobber live EFLAGS.");
1150
1151 unsigned CallOp;
1152 if (Is64Bit)
1153 CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
1154 else
1155 CallOp = X86::CALLpcrel32;
1156
1158
1160 MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI);
1161
1162 // All current stack probes take AX and SP as input, clobber flags, and
1163 // preserve all registers. x86_64 probes leave RSP unmodified.
1165 // For the large code model, we have to call through a register. Use R11,
1166 // as it is scratch in all supported calling conventions.
1167 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
1169 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
1170 } else {
1171 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp))
1173 }
1174
1175 unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX;
1176 unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP;
1182
1183 MachineInstr *ModInst = CI;
1184 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1185 // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
1186 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
1187 // themselves. They also does not clobber %rax so we can reuse it when
1188 // adjusting %rsp.
1189 // All other platforms do not specify a particular ABI for the stack probe
1190 // function, so we arbitrarily define it to not adjust %esp/%rsp itself.
1191 ModInst =
1193 .addReg(SP)
1194 .addReg(AX);
1195 }
1196
1197 // DebugInfo variable locations -- if there's an instruction number for the
1198 // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that
1199 // modifies SP.
1200 if (InstrNum) {
1201 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1202 // Label destination operand of the subtract.
1203 MF.makeDebugValueSubstitution(*InstrNum,
1204 {ModInst->getDebugInstrNum(), 0});
1205 } else {
1206 // Label the call. The operand number is the penultimate operand, zero
1207 // based.
1208 unsigned SPDefOperand = ModInst->getNumOperands() - 2;
1210 *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand});
1211 }
1212 }
1213
1214 if (InProlog) {
1215 // Apply the frame setup flag to all inserted instrs.
1216 for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
1217 ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
1218 }
1219}
1220
1221static unsigned calculateSetFPREG(uint64_t SPAdjust) {
1222 // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
1223 // and might require smaller successive adjustments.
1224 const uint64_t Win64MaxSEHOffset = 128;
1225 uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
1226 // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
1227 return SEHFrameOffset & -16;
1228}
1229
1230// If we're forcing a stack realignment we can't rely on just the frame
1231// info, we need to know the ABI stack alignment as well in case we
1232// have a call out. Otherwise just make sure we have some alignment - we'll
1233// go with the minimum SlotSize.
1234uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
1235 const MachineFrameInfo &MFI = MF.getFrameInfo();
1236 Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment.
1238 bool HasRealign = MF.getFunction().hasFnAttribute("stackrealign");
1239 if (HasRealign) {
1240 if (MFI.hasCalls())
1241 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
1242 else if (MaxAlign < SlotSize)
1243 MaxAlign = Align(SlotSize);
1244 }
1245
1247 if (HasRealign)
1248 MaxAlign = (MaxAlign > 16) ? MaxAlign : Align(16);
1249 else
1250 MaxAlign = Align(16);
1251 }
1252 return MaxAlign.value();
1253}
1254
1255void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
1257 const DebugLoc &DL, unsigned Reg,
1258 uint64_t MaxAlign) const {
1259 uint64_t Val = -MaxAlign;
1260 unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
1261
1262 MachineFunction &MF = *MBB.getParent();
1264 const X86TargetLowering &TLI = *STI.getTargetLowering();
1265 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
1266 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
1267
1268 // We want to make sure that (in worst case) less than StackProbeSize bytes
1269 // are not probed after the AND. This assumption is used in
1270 // emitStackProbeInlineGeneric.
1271 if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
1272 {
1273 NumFrameLoopProbe++;
1274 MachineBasicBlock *entryMBB =
1276 MachineBasicBlock *headMBB =
1278 MachineBasicBlock *bodyMBB =
1280 MachineBasicBlock *footMBB =
1282
1284 MF.insert(MBBIter, entryMBB);
1285 MF.insert(MBBIter, headMBB);
1286 MF.insert(MBBIter, bodyMBB);
1287 MF.insert(MBBIter, footMBB);
1288 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
1289 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
1290 : Is64Bit ? X86::R11D
1291 : X86::EAX;
1292
1293 // Setup entry block
1294 {
1295
1296 entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI);
1297 BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
1300 MachineInstr *MI =
1301 BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed)
1302 .addReg(FinalStackProbed)
1303 .addImm(Val)
1305
1306 // The EFLAGS implicit def is dead.
1307 MI->getOperand(3).setIsDead();
1308
1309 BuildMI(entryMBB, DL,
1310 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1311 .addReg(FinalStackProbed)
1314 BuildMI(entryMBB, DL, TII.get(X86::JCC_1))
1315 .addMBB(&MBB)
1318 entryMBB->addSuccessor(headMBB);
1319 entryMBB->addSuccessor(&MBB);
1320 }
1321
1322 // Loop entry block
1323
1324 {
1325 const unsigned SUBOpc =
1327 BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
1329 .addImm(StackProbeSize)
1331
1332 BuildMI(headMBB, DL,
1333 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1335 .addReg(FinalStackProbed)
1337
1338 // jump to the footer if StackPtr < FinalStackProbed
1339 BuildMI(headMBB, DL, TII.get(X86::JCC_1))
1340 .addMBB(footMBB)
1343
1344 headMBB->addSuccessor(bodyMBB);
1345 headMBB->addSuccessor(footMBB);
1346 }
1347
1348 // setup loop body
1349 {
1350 addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc))
1352 StackPtr, false, 0)
1353 .addImm(0)
1355
1356 const unsigned SUBOpc =
1358 BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
1360 .addImm(StackProbeSize)
1362
1363 // cmp with stack pointer bound
1364 BuildMI(bodyMBB, DL,
1365 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1366 .addReg(FinalStackProbed)
1369
1370 // jump back while FinalStackProbed < StackPtr
1371 BuildMI(bodyMBB, DL, TII.get(X86::JCC_1))
1372 .addMBB(bodyMBB)
1375 bodyMBB->addSuccessor(bodyMBB);
1376 bodyMBB->addSuccessor(footMBB);
1377 }
1378
1379 // setup loop footer
1380 {
1381 BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr)
1382 .addReg(FinalStackProbed)
1384 addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc))
1386 StackPtr, false, 0)
1387 .addImm(0)
1389 footMBB->addSuccessor(&MBB);
1390 }
1391
1392 recomputeLiveIns(*headMBB);
1393 recomputeLiveIns(*bodyMBB);
1394 recomputeLiveIns(*footMBB);
1396 }
1397 } else {
1398 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
1399 .addReg(Reg)
1400 .addImm(Val)
1402
1403 // The EFLAGS implicit def is dead.
1404 MI->getOperand(3).setIsDead();
1405 }
1406}
1407
1409 // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
1410 // clobbered by any interrupt handler.
1411 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1412 "MF used frame lowering for wrong subtarget");
1413 const Function &Fn = MF.getFunction();
1414 const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
1415 return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone);
1416}
1417
1418/// Return true if we need to use the restricted Windows x64 prologue and
1419/// epilogue code patterns that can be described with WinCFI (.seh_*
1420/// directives).
1421bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const {
1422 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
1423}
1424
1425bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
1426 return !isWin64Prologue(MF) && MF.needsFrameMoves();
1427}
1428
1429/// emitPrologue - Push callee-saved registers onto the stack, which
1430/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
1431/// space for local variables. Also emit labels used by the exception handler to
1432/// generate the exception handling frames.
1433
1434/*
1435 Here's a gist of what gets emitted:
1436
1437 ; Establish frame pointer, if needed
1438 [if needs FP]
1439 push %rbp
1440 .cfi_def_cfa_offset 16
1441 .cfi_offset %rbp, -16
1442 .seh_pushreg %rpb
1443 mov %rsp, %rbp
1444 .cfi_def_cfa_register %rbp
1445
1446 ; Spill general-purpose registers
1447 [for all callee-saved GPRs]
1448 pushq %<reg>
1449 [if not needs FP]
1450 .cfi_def_cfa_offset (offset from RETADDR)
1451 .seh_pushreg %<reg>
1452
1453 ; If the required stack alignment > default stack alignment
1454 ; rsp needs to be re-aligned. This creates a "re-alignment gap"
1455 ; of unknown size in the stack frame.
1456 [if stack needs re-alignment]
1457 and $MASK, %rsp
1458
1459 ; Allocate space for locals
1460 [if target is Windows and allocated space > 4096 bytes]
1461 ; Windows needs special care for allocations larger
1462 ; than one page.
1463 mov $NNN, %rax
1464 call ___chkstk_ms/___chkstk
1465 sub %rax, %rsp
1466 [else]
1467 sub $NNN, %rsp
1468
1469 [if needs FP]
1470 .seh_stackalloc (size of XMM spill slots)
1471 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
1472 [else]
1473 .seh_stackalloc NNN
1474
1475 ; Spill XMMs
1476 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
1477 ; they may get spilled on any platform, if the current function
1478 ; calls @llvm.eh.unwind.init
1479 [if needs FP]
1480 [for all callee-saved XMM registers]
1481 movaps %<xmm reg>, -MMM(%rbp)
1482 [for all callee-saved XMM registers]
1483 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
1484 ; i.e. the offset relative to (%rbp - SEHFrameOffset)
1485 [else]
1486 [for all callee-saved XMM registers]
1487 movaps %<xmm reg>, KKK(%rsp)
1488 [for all callee-saved XMM registers]
1489 .seh_savexmm %<xmm reg>, KKK
1490
1491 .seh_endprologue
1492
1493 [if needs base pointer]
1494 mov %rsp, %rbx
1495 [if needs to restore base pointer]
1496 mov %rsp, -MMM(%rbp)
1497
1498 ; Emit CFI info
1499 [if needs FP]
1500 [for all callee-saved registers]
1501 .cfi_offset %<reg>, (offset from %rbp)
1502 [else]
1503 .cfi_def_cfa_offset (offset from RETADDR)
1504 [for all callee-saved registers]
1505 .cfi_offset %<reg>, (offset from %rsp)
1506
1507 Notes:
1508 - .seh directives are emitted only for Windows 64 ABI
1509 - .cv_fpo directives are emitted on win32 when emitting CodeView
1510 - .cfi directives are emitted for all other ABIs
1511 - for 32-bit code, substitute %e?? registers for %r??
1512*/
1513
1515 MachineBasicBlock &MBB) const {
1516 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1517 "MF used frame lowering for wrong subtarget");
1519 MachineFrameInfo &MFI = MF.getFrameInfo();
1520 const Function &Fn = MF.getFunction();
1521 MachineModuleInfo &MMI = MF.getMMI();
1523 uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
1524 uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate.
1525 bool IsFunclet = MBB.isEHFuncletEntry();
1527 if (Fn.hasPersonalityFn())
1528 Personality = classifyEHPersonality(Fn.getPersonalityFn());
1529 bool FnHasClrFunclet =
1530 MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
1531 bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
1532 bool HasFP = hasFP(MF);
1533 bool IsWin64Prologue = isWin64Prologue(MF);
1534 bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry();
1535 // FIXME: Emit FPO data for EH funclets.
1536 bool NeedsWinFPO =
1537 !IsFunclet && STI.isTargetWin32() && MMI.getModule()->getCodeViewFlag();
1538 bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO;
1539 bool NeedsDwarfCFI = needsDwarfCFI(MF);
1541 const Register MachineFramePtr =
1544 Register BasePtr = TRI->getBaseRegister();
1545 bool HasWinCFI = false;
1546
1547 // Debug location must be unknown since the first debug location is used
1548 // to determine the end of the prologue.
1549 DebugLoc DL;
1550 Register ArgBaseReg;
1551
1552 // Emit extra prolog for argument stack slot reference.
1553 if (auto *MI = X86FI->getStackPtrSaveMI()) {
1554 // MI is lea instruction that created in X86ArgumentStackSlotPass.
1555 // Creat extra prolog for stack realignment.
1556 ArgBaseReg = MI->getOperand(0).getReg();
1557 // leal 4(%esp), %basereg
1558 // .cfi_def_cfa %basereg, 0
1559 // andl $-128, %esp
1560 // pushl -4(%basereg)
1561 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::LEA64r : X86::LEA32r),
1562 ArgBaseReg)
1564 .addImm(1)
1565 .addUse(X86::NoRegister)
1567 .addUse(X86::NoRegister)
1569 if (NeedsDwarfCFI) {
1570 // .cfi_def_cfa %basereg, 0
1571 unsigned DwarfStackPtr = TRI->getDwarfRegNum(ArgBaseReg, true);
1572 BuildCFI(MBB, MBBI, DL,
1573 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, 0),
1575 }
1576 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1577 int64_t Offset = -(int64_t)SlotSize;
1578 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm: X86::PUSH32rmm))
1579 .addReg(ArgBaseReg)
1580 .addImm(1)
1581 .addReg(X86::NoRegister)
1582 .addImm(Offset)
1583 .addReg(X86::NoRegister)
1585 }
1586
1587 // Space reserved for stack-based arguments when making a (ABI-guaranteed)
1588 // tail call.
1589 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
1590 if (TailCallArgReserveSize && IsWin64Prologue)
1591 report_fatal_error("Can't handle guaranteed tail call under win64 yet");
1592
1593 const bool EmitStackProbeCall =
1595 unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
1596
1597 if (HasFP && X86FI->hasSwiftAsyncContext()) {
1601 // The special symbol below is absolute and has a *value* suitable to be
1602 // combined with the frame pointer directly.
1603 BuildMI(MBB, MBBI, DL, TII.get(X86::OR64rm), MachineFramePtr)
1604 .addUse(MachineFramePtr)
1605 .addUse(X86::RIP)
1606 .addImm(1)
1607 .addUse(X86::NoRegister)
1608 .addExternalSymbol("swift_async_extendedFramePointerFlags",
1610 .addUse(X86::NoRegister);
1611 break;
1612 }
1613 [[fallthrough]];
1614
1616 BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr)
1617 .addUse(MachineFramePtr)
1618 .addImm(60)
1620 break;
1621
1623 break;
1624 }
1625 }
1626
1627 // Re-align the stack on 64-bit if the x86-interrupt calling convention is
1628 // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
1629 // stack alignment.
1631 Fn.arg_size() == 2) {
1632 StackSize += 8;
1633 MFI.setStackSize(StackSize);
1634
1635 // Update the stack pointer by pushing a register. This is the instruction
1636 // emitted that would be end up being emitted by a call to `emitSPUpdate`.
1637 // Hard-coding the update to a push avoids emitting a second
1638 // `STACKALLOC_W_PROBING` instruction in the save block: We know that stack
1639 // probing isn't needed anyways for an 8-byte update.
1640 // Pushing a register leaves us in a similar situation to a regular
1641 // function call where we know that the address at (rsp-8) is writeable.
1642 // That way we avoid any off-by-ones with stack probing for additional
1643 // stack pointer updates later on.
1644 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1645 .addReg(X86::RAX, RegState::Undef)
1647 }
1648
1649 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
1650 // function, and use up to 128 bytes of stack space, don't have a frame
1651 // pointer, calls, or dynamic alloca then we do not need to adjust the
1652 // stack pointer (we fit in the Red Zone). We also check that we don't
1653 // push and pop from the stack.
1654 if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) &&
1655 !MFI.hasVarSizedObjects() && // No dynamic alloca.
1656 !MFI.adjustsStack() && // No calls.
1657 !EmitStackProbeCall && // No stack probes.
1658 !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
1659 !MF.shouldSplitStack()) { // Regular stack
1660 uint64_t MinSize =
1662 if (HasFP) MinSize += SlotSize;
1663 X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
1664 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
1665 MFI.setStackSize(StackSize);
1666 }
1667
1668 // Insert stack pointer adjustment for later moving of return addr. Only
1669 // applies to tail call optimized functions where the callee argument stack
1670 // size is bigger than the callers.
1671 if (TailCallArgReserveSize != 0) {
1672 BuildStackAdjustment(MBB, MBBI, DL, -(int)TailCallArgReserveSize,
1673 /*InEpilogue=*/false)
1675 }
1676
1677 // Mapping for machine moves:
1678 //
1679 // DST: VirtualFP AND
1680 // SRC: VirtualFP => DW_CFA_def_cfa_offset
1681 // ELSE => DW_CFA_def_cfa
1682 //
1683 // SRC: VirtualFP AND
1684 // DST: Register => DW_CFA_def_cfa_register
1685 //
1686 // ELSE
1687 // OFFSET < 0 => DW_CFA_offset_extended_sf
1688 // REG < 64 => DW_CFA_offset + Reg
1689 // ELSE => DW_CFA_offset_extended
1690
1691 uint64_t NumBytes = 0;
1692 int stackGrowth = -SlotSize;
1693
1694 // Find the funclet establisher parameter
1695 Register Establisher = X86::NoRegister;
1696 if (IsClrFunclet)
1697 Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX;
1698 else if (IsFunclet)
1699 Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX;
1700
1701 if (IsWin64Prologue && IsFunclet && !IsClrFunclet) {
1702 // Immediately spill establisher into the home slot.
1703 // The runtime cares about this.
1704 // MOV64mr %rdx, 16(%rsp)
1705 unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
1706 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16)
1707 .addReg(Establisher)
1709 MBB.addLiveIn(Establisher);
1710 }
1711
1712 if (HasFP) {
1713 assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved");
1714
1715 // Calculate required stack adjustment.
1716 uint64_t FrameSize = StackSize - SlotSize;
1717 NumBytes = FrameSize -
1718 (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1719
1720 // Callee-saved registers are pushed on stack before the stack is realigned.
1721 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
1722 NumBytes = alignTo(NumBytes, MaxAlign);
1723
1724 // Save EBP/RBP into the appropriate stack slot.
1725 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
1726 .addReg(MachineFramePtr, RegState::Kill)
1728
1729 if (NeedsDwarfCFI && !ArgBaseReg.isValid()) {
1730 // Mark the place where EBP/RBP was saved.
1731 // Define the current CFA rule to use the provided offset.
1732 assert(StackSize);
1733 BuildCFI(MBB, MBBI, DL,
1735 nullptr, -2 * stackGrowth + (int)TailCallArgReserveSize),
1737
1738 // Change the rule for the FramePtr to be an "offset" rule.
1739 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1740 BuildCFI(MBB, MBBI, DL,
1741 MCCFIInstruction::createOffset(nullptr, DwarfFramePtr,
1742 2 * stackGrowth -
1743 (int)TailCallArgReserveSize),
1745 }
1746
1747 if (NeedsWinCFI) {
1748 HasWinCFI = true;
1749 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1752 }
1753
1754 if (!IsFunclet) {
1755 if (X86FI->hasSwiftAsyncContext()) {
1756 const auto &Attrs = MF.getFunction().getAttributes();
1757
1758 // Before we update the live frame pointer we have to ensure there's a
1759 // valid (or null) asynchronous context in its slot just before FP in
1760 // the frame record, so store it now.
1761 if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) {
1762 // We have an initial context in r14, store it just before the frame
1763 // pointer.
1764 MBB.addLiveIn(X86::R14);
1765 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1766 .addReg(X86::R14)
1768 } else {
1769 // No initial context, store null so that there's no pointer that
1770 // could be misused.
1771 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i32))
1772 .addImm(0)
1774 }
1775
1776 if (NeedsWinCFI) {
1777 HasWinCFI = true;
1778 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1779 .addImm(X86::R14)
1781 }
1782
1783 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr)
1784 .addUse(X86::RSP)
1785 .addImm(1)
1786 .addUse(X86::NoRegister)
1787 .addImm(8)
1788 .addUse(X86::NoRegister)
1790 BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri32), X86::RSP)
1791 .addUse(X86::RSP)
1792 .addImm(8)
1794 }
1795
1796 if (!IsWin64Prologue && !IsFunclet) {
1797 // Update EBP with the new base value.
1798 if (!X86FI->hasSwiftAsyncContext())
1799 BuildMI(MBB, MBBI, DL,
1800 TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
1801 FramePtr)
1804
1805 if (NeedsDwarfCFI) {
1806 if (ArgBaseReg.isValid()) {
1807 SmallString<64> CfaExpr;
1808 CfaExpr.push_back(dwarf::DW_CFA_expression);
1809 uint8_t buffer[16];
1810 unsigned DwarfReg = TRI->getDwarfRegNum(MachineFramePtr, true);
1811 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
1812 CfaExpr.push_back(2);
1813 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
1814 CfaExpr.push_back(0);
1815 // DW_CFA_expression: reg5 DW_OP_breg5 +0
1816 BuildCFI(MBB, MBBI, DL,
1817 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
1819 } else {
1820 // Mark effective beginning of when frame pointer becomes valid.
1821 // Define the current CFA to use the EBP/RBP register.
1822 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1823 BuildCFI(
1824 MBB, MBBI, DL,
1825 MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr),
1827 }
1828 }
1829
1830 if (NeedsWinFPO) {
1831 // .cv_fpo_setframe $FramePtr
1832 HasWinCFI = true;
1833 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
1835 .addImm(0)
1837 }
1838 }
1839 }
1840 } else {
1841 assert(!IsFunclet && "funclets without FPs not yet implemented");
1842 NumBytes = StackSize -
1843 (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1844 }
1845
1846 // Update the offset adjustment, which is mainly used by codeview to translate
1847 // from ESP to VFRAME relative local variable offsets.
1848 if (!IsFunclet) {
1849 if (HasFP && TRI->hasStackRealignment(MF))
1850 MFI.setOffsetAdjustment(-NumBytes);
1851 else
1852 MFI.setOffsetAdjustment(-StackSize);
1853 }
1854
1855 // For EH funclets, only allocate enough space for outgoing calls. Save the
1856 // NumBytes value that we would've used for the parent frame.
1857 unsigned ParentFrameNumBytes = NumBytes;
1858 if (IsFunclet)
1859 NumBytes = getWinEHFuncletFrameSize(MF);
1860
1861 // Skip the callee-saved push instructions.
1862 bool PushedRegs = false;
1863 int StackOffset = 2 * stackGrowth;
1864
1865 while (MBBI != MBB.end() &&
1866 MBBI->getFlag(MachineInstr::FrameSetup) &&
1867 (MBBI->getOpcode() == X86::PUSH32r ||
1868 MBBI->getOpcode() == X86::PUSH64r)) {
1869 PushedRegs = true;
1870 Register Reg = MBBI->getOperand(0).getReg();
1871 ++MBBI;
1872
1873 if (!HasFP && NeedsDwarfCFI) {
1874 // Mark callee-saved push instruction.
1875 // Define the current CFA rule to use the provided offset.
1876 assert(StackSize);
1877 BuildCFI(MBB, MBBI, DL,
1880 StackOffset += stackGrowth;
1881 }
1882
1883 if (NeedsWinCFI) {
1884 HasWinCFI = true;
1885 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1886 .addImm(Reg)
1888 }
1889 }
1890
1891 // Realign stack after we pushed callee-saved registers (so that we'll be
1892 // able to calculate their offsets from the frame pointer).
1893 // Don't do this for Win64, it needs to realign the stack after the prologue.
1894 if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) &&
1895 !ArgBaseReg.isValid()) {
1896 assert(HasFP && "There should be a frame pointer if stack is realigned.");
1897 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1898
1899 if (NeedsWinCFI) {
1900 HasWinCFI = true;
1901 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign))
1902 .addImm(MaxAlign)
1904 }
1905 }
1906
1907 // If there is an SUB32ri of ESP immediately before this instruction, merge
1908 // the two. This can be the case when tail call elimination is enabled and
1909 // the callee has more arguments then the caller.
1910 NumBytes -= mergeSPUpdates(MBB, MBBI, true);
1911
1912 // Adjust stack pointer: ESP -= numbytes.
1913
1914 // Windows and cygwin/mingw require a prologue helper routine when allocating
1915 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
1916 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
1917 // stack and adjust the stack pointer in one go. The 64-bit version of
1918 // __chkstk is only responsible for probing the stack. The 64-bit prologue is
1919 // responsible for adjusting the stack pointer. Touching the stack at 4K
1920 // increments is necessary to ensure that the guard pages used by the OS
1921 // virtual memory manager are allocated in correct sequence.
1922 uint64_t AlignedNumBytes = NumBytes;
1923 if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF))
1924 AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
1925 if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) {
1926 assert(!X86FI->getUsesRedZone() &&
1927 "The Red Zone is not accounted for in stack probes");
1928
1929 // Check whether EAX is livein for this block.
1930 bool isEAXAlive = isEAXLiveIn(MBB);
1931
1932 if (isEAXAlive) {
1933 if (Is64Bit) {
1934 // Save RAX
1935 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1936 .addReg(X86::RAX, RegState::Kill)
1938 } else {
1939 // Save EAX
1940 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
1941 .addReg(X86::EAX, RegState::Kill)
1943 }
1944 }
1945
1946 if (Is64Bit) {
1947 // Handle the 64-bit Windows ABI case where we need to call __chkstk.
1948 // Function prologue is responsible for adjusting the stack pointer.
1949 int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes;
1950 BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Alloc)), X86::RAX)
1951 .addImm(Alloc)
1953 } else {
1954 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
1955 // We'll also use 4 already allocated bytes for EAX.
1956 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
1957 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
1959 }
1960
1961 // Call __chkstk, __chkstk_ms, or __alloca.
1962 emitStackProbe(MF, MBB, MBBI, DL, true);
1963
1964 if (isEAXAlive) {
1965 // Restore RAX/EAX
1967 if (Is64Bit)
1968 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX),
1969 StackPtr, false, NumBytes - 8);
1970 else
1971 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX),
1972 StackPtr, false, NumBytes - 4);
1973 MI->setFlag(MachineInstr::FrameSetup);
1974 MBB.insert(MBBI, MI);
1975 }
1976 } else if (NumBytes) {
1977 emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false);
1978 }
1979
1980 if (NeedsWinCFI && NumBytes) {
1981 HasWinCFI = true;
1982 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
1983 .addImm(NumBytes)
1985 }
1986
1987 int SEHFrameOffset = 0;
1988 unsigned SPOrEstablisher;
1989 if (IsFunclet) {
1990 if (IsClrFunclet) {
1991 // The establisher parameter passed to a CLR funclet is actually a pointer
1992 // to the (mostly empty) frame of its nearest enclosing funclet; we have
1993 // to find the root function establisher frame by loading the PSPSym from
1994 // the intermediate frame.
1995 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
1996 MachinePointerInfo NoInfo;
1997 MBB.addLiveIn(Establisher);
1998 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher),
1999 Establisher, false, PSPSlotOffset)
2002 ;
2003 // Save the root establisher back into the current funclet's (mostly
2004 // empty) frame, in case a sub-funclet or the GC needs it.
2005 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr,
2006 false, PSPSlotOffset)
2007 .addReg(Establisher)
2009 NoInfo,
2012 }
2013 SPOrEstablisher = Establisher;
2014 } else {
2015 SPOrEstablisher = StackPtr;
2016 }
2017
2018 if (IsWin64Prologue && HasFP) {
2019 // Set RBP to a small fixed offset from RSP. In the funclet case, we base
2020 // this calculation on the incoming establisher, which holds the value of
2021 // RSP from the parent frame at the end of the prologue.
2022 SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
2023 if (SEHFrameOffset)
2024 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
2025 SPOrEstablisher, false, SEHFrameOffset);
2026 else
2027 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr)
2028 .addReg(SPOrEstablisher);
2029
2030 // If this is not a funclet, emit the CFI describing our frame pointer.
2031 if (NeedsWinCFI && !IsFunclet) {
2032 assert(!NeedsWinFPO && "this setframe incompatible with FPO data");
2033 HasWinCFI = true;
2034 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
2036 .addImm(SEHFrameOffset)
2038 if (isAsynchronousEHPersonality(Personality))
2039 MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset;
2040 }
2041 } else if (IsFunclet && STI.is32Bit()) {
2042 // Reset EBP / ESI to something good for funclets.
2044 // If we're a catch funclet, we can be returned to via catchret. Save ESP
2045 // into the registration node so that the runtime will restore it for us.
2046 if (!MBB.isCleanupFuncletEntry()) {
2047 assert(Personality == EHPersonality::MSVC_CXX);
2048 Register FrameReg;
2050 int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed();
2051 // ESP is the first field, so no extra displacement is needed.
2052 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg,
2053 false, EHRegOffset)
2054 .addReg(X86::ESP);
2055 }
2056 }
2057
2058 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
2059 const MachineInstr &FrameInstr = *MBBI;
2060 ++MBBI;
2061
2062 if (NeedsWinCFI) {
2063 int FI;
2064 if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
2065 if (X86::FR64RegClass.contains(Reg)) {
2066 int Offset;
2067 Register IgnoredFrameReg;
2068 if (IsWin64Prologue && IsFunclet)
2069 Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg);
2070 else
2071 Offset =
2072 getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() +
2073 SEHFrameOffset;
2074
2075 HasWinCFI = true;
2076 assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
2077 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
2078 .addImm(Reg)
2079 .addImm(Offset)
2081 }
2082 }
2083 }
2084 }
2085
2086 if (NeedsWinCFI && HasWinCFI)
2087 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
2089
2090 if (FnHasClrFunclet && !IsFunclet) {
2091 // Save the so-called Initial-SP (i.e. the value of the stack pointer
2092 // immediately after the prolog) into the PSPSlot so that funclets
2093 // and the GC can recover it.
2094 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2095 auto PSPInfo = MachinePointerInfo::getFixedStack(
2097 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false,
2098 PSPSlotOffset)
2103 }
2104
2105 // Realign stack after we spilled callee-saved registers (so that we'll be
2106 // able to calculate their offsets from the frame pointer).
2107 // Win64 requires aligning the stack after the prologue.
2108 if (IsWin64Prologue && TRI->hasStackRealignment(MF)) {
2109 assert(HasFP && "There should be a frame pointer if stack is realigned.");
2110 BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign);
2111 }
2112
2113 // We already dealt with stack realignment and funclets above.
2114 if (IsFunclet && STI.is32Bit())
2115 return;
2116
2117 // If we need a base pointer, set it up here. It's whatever the value
2118 // of the stack pointer is at this point. Any variable size objects
2119 // will be allocated after this, so we can still use the base pointer
2120 // to reference locals.
2121 if (TRI->hasBasePointer(MF)) {
2122 // Update the base pointer with the current stack pointer.
2123 unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
2124 BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
2125 .addReg(SPOrEstablisher)
2127 if (X86FI->getRestoreBasePointer()) {
2128 // Stash value of base pointer. Saving RSP instead of EBP shortens
2129 // dependence chain. Used by SjLj EH.
2130 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2131 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)),
2132 FramePtr, true, X86FI->getRestoreBasePointerOffset())
2133 .addReg(SPOrEstablisher)
2135 }
2136
2137 if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
2138 // Stash the value of the frame pointer relative to the base pointer for
2139 // Win32 EH. This supports Win32 EH, which does the inverse of the above:
2140 // it recovers the frame pointer from the base pointer rather than the
2141 // other way around.
2142 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2143 Register UsedReg;
2144 int Offset =
2145 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
2146 .getFixed();
2147 assert(UsedReg == BasePtr);
2148 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
2151 }
2152 }
2153 if (ArgBaseReg.isValid()) {
2154 // Save argument base pointer.
2155 auto *MI = X86FI->getStackPtrSaveMI();
2156 int FI = MI->getOperand(1).getIndex();
2157 unsigned MOVmr = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
2158 // movl %basereg, offset(%ebp)
2159 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), FI)
2160 .addReg(ArgBaseReg)
2162 }
2163
2164 if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
2165 // Mark end of stack pointer adjustment.
2166 if (!HasFP && NumBytes) {
2167 // Define the current CFA rule to use the provided offset.
2168 assert(StackSize);
2169 BuildCFI(
2170 MBB, MBBI, DL,
2171 MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth),
2173 }
2174
2175 // Emit DWARF info specifying the offsets of the callee-saved registers.
2177 }
2178
2179 // X86 Interrupt handling function cannot assume anything about the direction
2180 // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
2181 // in each prologue of interrupt handler function.
2182 //
2183 // FIXME: Create "cld" instruction only in these cases:
2184 // 1. The interrupt handling function uses any of the "rep" instructions.
2185 // 2. Interrupt handling function calls another function.
2186 //
2188 BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
2190
2191 // At this point we know if the function has WinCFI or not.
2192 MF.setHasWinCFI(HasWinCFI);
2193}
2194
2196 const MachineFunction &MF) const {
2197 // We can't use LEA instructions for adjusting the stack pointer if we don't
2198 // have a frame pointer in the Win64 ABI. Only ADD instructions may be used
2199 // to deallocate the stack.
2200 // This means that we can use LEA for SP in two situations:
2201 // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
2202 // 2. We *have* a frame pointer which means we are permitted to use LEA.
2203 return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
2204}
2205
2207 switch (MI.getOpcode()) {
2208 case X86::CATCHRET:
2209 case X86::CLEANUPRET:
2210 return true;
2211 default:
2212 return false;
2213 }
2214 llvm_unreachable("impossible");
2215}
2216
2217// CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
2218// stack. It holds a pointer to the bottom of the root function frame. The
2219// establisher frame pointer passed to a nested funclet may point to the
2220// (mostly empty) frame of its parent funclet, but it will need to find
2221// the frame of the root function to access locals. To facilitate this,
2222// every funclet copies the pointer to the bottom of the root function
2223// frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
2224// same offset for the PSPSym in the root function frame that's used in the
2225// funclets' frames allows each funclet to dynamically accept any ancestor
2226// frame as its establisher argument (the runtime doesn't guarantee the
2227// immediate parent for some reason lost to history), and also allows the GC,
2228// which uses the PSPSym for some bookkeeping, to find it in any funclet's
2229// frame with only a single offset reported for the entire method.
2230unsigned
2231X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
2232 const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
2233 Register SPReg;
2234 int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg,
2235 /*IgnoreSPUpdates*/ true)
2236 .getFixed();
2237 assert(Offset >= 0 && SPReg == TRI->getStackRegister());
2238 return static_cast<unsigned>(Offset);
2239}
2240
2241unsigned
2242X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
2244 // This is the size of the pushed CSRs.
2245 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2246 // This is the size of callee saved XMMs.
2247 const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2248 unsigned XMMSize = WinEHXMMSlotInfo.size() *
2249 TRI->getSpillSize(X86::VR128RegClass);
2250 // This is the amount of stack a funclet needs to allocate.
2251 unsigned UsedSize;
2252 EHPersonality Personality =
2254 if (Personality == EHPersonality::CoreCLR) {
2255 // CLR funclets need to hold enough space to include the PSPSym, at the
2256 // same offset from the stack pointer (immediately after the prolog) as it
2257 // resides at in the main function.
2258 UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
2259 } else {
2260 // Other funclets just need enough stack for outgoing call arguments.
2261 UsedSize = MF.getFrameInfo().getMaxCallFrameSize();
2262 }
2263 // RBP is not included in the callee saved register block. After pushing RBP,
2264 // everything is 16 byte aligned. Everything we allocate before an outgoing
2265 // call must also be 16 byte aligned.
2266 unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlign());
2267 // Subtract out the size of the callee saved registers. This is how much stack
2268 // each funclet will allocate.
2269 return FrameSizeMinusRBP + XMMSize - CSSize;
2270}
2271
2272static bool isTailCallOpcode(unsigned Opc) {
2273 return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi ||
2274 Opc == X86::TCRETURNmi ||
2275 Opc == X86::TCRETURNri64 || Opc == X86::TCRETURNdi64 ||
2276 Opc == X86::TCRETURNmi64;
2277}
2278
2280 MachineBasicBlock &MBB) const {
2281 const MachineFrameInfo &MFI = MF.getFrameInfo();
2284 MachineBasicBlock::iterator MBBI = Terminator;
2285 DebugLoc DL;
2286 if (MBBI != MBB.end())
2287 DL = MBBI->getDebugLoc();
2288 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
2289 const bool Is64BitILP32 = STI.isTarget64BitILP32();
2291 Register MachineFramePtr =
2292 Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
2293
2294 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2295 bool NeedsWin64CFI =
2296 IsWin64Prologue && MF.getFunction().needsUnwindTableEntry();
2297 bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI);
2298
2299 // Get the number of bytes to allocate from the FrameInfo.
2300 uint64_t StackSize = MFI.getStackSize();
2301 uint64_t MaxAlign = calculateMaxStackAlign(MF);
2302 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2303 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
2304 bool HasFP = hasFP(MF);
2305 uint64_t NumBytes = 0;
2306
2307 bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() &&
2309 MF.needsFrameMoves();
2310
2311 Register ArgBaseReg;
2312 if (auto *MI = X86FI->getStackPtrSaveMI()) {
2313 unsigned Opc = X86::LEA32r;
2314 Register StackReg = X86::ESP;
2315 ArgBaseReg = MI->getOperand(0).getReg();
2316 if (STI.is64Bit()) {
2317 Opc = X86::LEA64r;
2318 StackReg = X86::RSP;
2319 }
2320 // leal -4(%basereg), %esp
2321 // .cfi_def_cfa %esp, 4
2322 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackReg)
2323 .addUse(ArgBaseReg)
2324 .addImm(1)
2325 .addUse(X86::NoRegister)
2326 .addImm(-(int64_t)SlotSize)
2327 .addUse(X86::NoRegister)
2329 if (NeedsDwarfCFI) {
2330 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackReg, true);
2331 BuildCFI(MBB, MBBI, DL,
2332 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2334 --MBBI;
2335 }
2336 --MBBI;
2337 }
2338
2339 if (IsFunclet) {
2340 assert(HasFP && "EH funclets without FP not yet implemented");
2341 NumBytes = getWinEHFuncletFrameSize(MF);
2342 } else if (HasFP) {
2343 // Calculate required stack adjustment.
2344 uint64_t FrameSize = StackSize - SlotSize;
2345 NumBytes = FrameSize - CSSize - TailCallArgReserveSize;
2346
2347 // Callee-saved registers were pushed on stack before the stack was
2348 // realigned.
2349 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
2350 NumBytes = alignTo(FrameSize, MaxAlign);
2351 } else {
2352 NumBytes = StackSize - CSSize - TailCallArgReserveSize;
2353 }
2354 uint64_t SEHStackAllocAmt = NumBytes;
2355
2356 // AfterPop is the position to insert .cfi_restore.
2358 if (HasFP) {
2359 if (X86FI->hasSwiftAsyncContext()) {
2360 // Discard the context.
2361 int Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
2362 emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/true);
2363 }
2364 // Pop EBP.
2365 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
2366 MachineFramePtr)
2368
2369 // We need to reset FP to its untagged state on return. Bit 60 is currently
2370 // used to show the presence of an extended frame.
2371 if (X86FI->hasSwiftAsyncContext()) {
2372 BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8),
2373 MachineFramePtr)
2374 .addUse(MachineFramePtr)
2375 .addImm(60)
2377 }
2378
2379 if (NeedsDwarfCFI) {
2380 if (!ArgBaseReg.isValid()) {
2381 unsigned DwarfStackPtr =
2382 TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
2383 BuildCFI(MBB, MBBI, DL,
2384 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2386 }
2387 if (!MBB.succ_empty() && !MBB.isReturnBlock()) {
2388 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
2389 BuildCFI(MBB, AfterPop, DL,
2390 MCCFIInstruction::createRestore(nullptr, DwarfFramePtr),
2392 --MBBI;
2393 --AfterPop;
2394 }
2395 --MBBI;
2396 }
2397 }
2398
2399 MachineBasicBlock::iterator FirstCSPop = MBBI;
2400 // Skip the callee-saved pop instructions.
2401 while (MBBI != MBB.begin()) {
2402 MachineBasicBlock::iterator PI = std::prev(MBBI);
2403 unsigned Opc = PI->getOpcode();
2404
2405 if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
2406 if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
2407 (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
2408 (Opc != X86::BTR64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)) &&
2409 (Opc != X86::ADD64ri32 || !PI->getFlag(MachineInstr::FrameDestroy)))
2410 break;
2411 FirstCSPop = PI;
2412 }
2413
2414 --MBBI;
2415 }
2416 if (ArgBaseReg.isValid()) {
2417 // Restore argument base pointer.
2418 auto *MI = X86FI->getStackPtrSaveMI();
2419 int FI = MI->getOperand(1).getIndex();
2420 unsigned MOVrm = Is64Bit ? X86::MOV64rm : X86::MOV32rm;
2421 // movl offset(%ebp), %basereg
2422 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVrm), ArgBaseReg), FI)
2424 }
2425 MBBI = FirstCSPop;
2426
2427 if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET)
2428 emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator);
2429
2430 if (MBBI != MBB.end())
2431 DL = MBBI->getDebugLoc();
2432 // If there is an ADD32ri or SUB32ri of ESP immediately before this
2433 // instruction, merge the two instructions.
2434 if (NumBytes || MFI.hasVarSizedObjects())
2435 NumBytes += mergeSPUpdates(MBB, MBBI, true);
2436
2437 // If dynamic alloca is used, then reset esp to point to the last callee-saved
2438 // slot before popping them off! Same applies for the case, when stack was
2439 // realigned. Don't do this if this was a funclet epilogue, since the funclets
2440 // will not do realignment or dynamic stack allocation.
2441 if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) &&
2442 !IsFunclet) {
2443 if (TRI->hasStackRealignment(MF))
2444 MBBI = FirstCSPop;
2445 unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
2446 uint64_t LEAAmount =
2447 IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
2448
2449 if (X86FI->hasSwiftAsyncContext())
2450 LEAAmount -= 16;
2451
2452 // There are only two legal forms of epilogue:
2453 // - add SEHAllocationSize, %rsp
2454 // - lea SEHAllocationSize(%FramePtr), %rsp
2455 //
2456 // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
2457 // However, we may use this sequence if we have a frame pointer because the
2458 // effects of the prologue can safely be undone.
2459 if (LEAAmount != 0) {
2460 unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
2461 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
2462 FramePtr, false, LEAAmount);
2463 --MBBI;
2464 } else {
2465 unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
2466 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
2467 .addReg(FramePtr);
2468 --MBBI;
2469 }
2470 } else if (NumBytes) {
2471 // Adjust stack pointer back: ESP += numbytes.
2472 emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
2473 if (!HasFP && NeedsDwarfCFI) {
2474 // Define the current CFA rule to use the provided offset.
2475 BuildCFI(MBB, MBBI, DL,
2477 nullptr, CSSize + TailCallArgReserveSize + SlotSize),
2479 }
2480 --MBBI;
2481 }
2482
2483 // Windows unwinder will not invoke function's exception handler if IP is
2484 // either in prologue or in epilogue. This behavior causes a problem when a
2485 // call immediately precedes an epilogue, because the return address points
2486 // into the epilogue. To cope with that, we insert an epilogue marker here,
2487 // then replace it with a 'nop' if it ends up immediately after a CALL in the
2488 // final emitted code.
2489 if (NeedsWin64CFI && MF.hasWinCFI())
2490 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
2491
2492 if (!HasFP && NeedsDwarfCFI) {
2493 MBBI = FirstCSPop;
2494 int64_t Offset = -CSSize - SlotSize;
2495 // Mark callee-saved pop instruction.
2496 // Define the current CFA rule to use the provided offset.
2497 while (MBBI != MBB.end()) {
2499 unsigned Opc = PI->getOpcode();
2500 ++MBBI;
2501 if (Opc == X86::POP32r || Opc == X86::POP64r) {
2502 Offset += SlotSize;
2503 BuildCFI(MBB, MBBI, DL,
2506 }
2507 }
2508 }
2509
2510 // Emit DWARF info specifying the restores of the callee-saved registers.
2511 // For epilogue with return inside or being other block without successor,
2512 // no need to generate .cfi_restore for callee-saved registers.
2513 if (NeedsDwarfCFI && !MBB.succ_empty())
2514 emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false);
2515
2516 if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
2517 // Add the return addr area delta back since we are not tail calling.
2518 int Offset = -1 * X86FI->getTCReturnAddrDelta();
2519 assert(Offset >= 0 && "TCDelta should never be positive");
2520 if (Offset) {
2521 // Check for possible merge with preceding ADD instruction.
2522 Offset += mergeSPUpdates(MBB, Terminator, true);
2523 emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true);
2524 }
2525 }
2526
2527 // Emit tilerelease for AMX kernel.
2528 if (X86FI->hasVirtualTileReg())
2529 BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
2530}
2531
2533 int FI,
2534 Register &FrameReg) const {
2535 const MachineFrameInfo &MFI = MF.getFrameInfo();
2536
2537 bool IsFixed = MFI.isFixedObjectIndex(FI);
2538 // We can't calculate offset from frame pointer if the stack is realigned,
2539 // so enforce usage of stack/base pointer. The base pointer is used when we
2540 // have dynamic allocas in addition to dynamic realignment.
2541 if (TRI->hasBasePointer(MF))
2542 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister();
2543 else if (TRI->hasStackRealignment(MF))
2544 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister();
2545 else
2546 FrameReg = TRI->getFrameRegister(MF);
2547
2548 // Offset will hold the offset from the stack pointer at function entry to the
2549 // object.
2550 // We need to factor in additional offsets applied during the prologue to the
2551 // frame, base, and stack pointer depending on which is used.
2554 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2555 uint64_t StackSize = MFI.getStackSize();
2556 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2557 int64_t FPDelta = 0;
2558
2559 // In an x86 interrupt, remove the offset we added to account for the return
2560 // address from any stack object allocated in the caller's frame. Interrupts
2561 // do not have a standard return address. Fixed objects in the current frame,
2562 // such as SSE register spills, should not get this treatment.
2564 Offset >= 0) {
2566 }
2567
2568 if (IsWin64Prologue) {
2569 assert(!MFI.hasCalls() || (StackSize % 16) == 8);
2570
2571 // Calculate required stack adjustment.
2572 uint64_t FrameSize = StackSize - SlotSize;
2573 // If required, include space for extra hidden slot for stashing base pointer.
2574 if (X86FI->getRestoreBasePointer())
2575 FrameSize += SlotSize;
2576 uint64_t NumBytes = FrameSize - CSSize;
2577
2578 uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
2579 if (FI && FI == X86FI->getFAIndex())
2580 return StackOffset::getFixed(-SEHFrameOffset);
2581
2582 // FPDelta is the offset from the "traditional" FP location of the old base
2583 // pointer followed by return address and the location required by the
2584 // restricted Win64 prologue.
2585 // Add FPDelta to all offsets below that go through the frame pointer.
2586 FPDelta = FrameSize - SEHFrameOffset;
2587 assert((!MFI.hasCalls() || (FPDelta % 16) == 0) &&
2588 "FPDelta isn't aligned per the Win64 ABI!");
2589 }
2590
2591 if (FrameReg == TRI->getFramePtr()) {
2592 // Skip saved EBP/RBP
2593 Offset += SlotSize;
2594
2595 // Account for restricted Windows prologue.
2596 Offset += FPDelta;
2597
2598 // Skip the RETADDR move area
2599 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2600 if (TailCallReturnAddrDelta < 0)
2601 Offset -= TailCallReturnAddrDelta;
2602
2604 }
2605
2606 // FrameReg is either the stack pointer or a base pointer. But the base is
2607 // located at the end of the statically known StackSize so the distinction
2608 // doesn't really matter.
2609 if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF))
2610 assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
2611 return StackOffset::getFixed(Offset + StackSize);
2612}
2613
2615 Register &FrameReg) const {
2616 const MachineFrameInfo &MFI = MF.getFrameInfo();
2618 const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2619 const auto it = WinEHXMMSlotInfo.find(FI);
2620
2621 if (it == WinEHXMMSlotInfo.end())
2622 return getFrameIndexReference(MF, FI, FrameReg).getFixed();
2623
2624 FrameReg = TRI->getStackRegister();
2626 it->second;
2627}
2628
2631 Register &FrameReg,
2632 int Adjustment) const {
2633 const MachineFrameInfo &MFI = MF.getFrameInfo();
2634 FrameReg = TRI->getStackRegister();
2635 return StackOffset::getFixed(MFI.getObjectOffset(FI) -
2636 getOffsetOfLocalArea() + Adjustment);
2637}
2638
2641 int FI, Register &FrameReg,
2642 bool IgnoreSPUpdates) const {
2643
2644 const MachineFrameInfo &MFI = MF.getFrameInfo();
2645 // Does not include any dynamic realign.
2646 const uint64_t StackSize = MFI.getStackSize();
2647 // LLVM arranges the stack as follows:
2648 // ...
2649 // ARG2
2650 // ARG1
2651 // RETADDR
2652 // PUSH RBP <-- RBP points here
2653 // PUSH CSRs
2654 // ~~~~~~~ <-- possible stack realignment (non-win64)
2655 // ...
2656 // STACK OBJECTS
2657 // ... <-- RSP after prologue points here
2658 // ~~~~~~~ <-- possible stack realignment (win64)
2659 //
2660 // if (hasVarSizedObjects()):
2661 // ... <-- "base pointer" (ESI/RBX) points here
2662 // DYNAMIC ALLOCAS
2663 // ... <-- RSP points here
2664 //
2665 // Case 1: In the simple case of no stack realignment and no dynamic
2666 // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
2667 // with fixed offsets from RSP.
2668 //
2669 // Case 2: In the case of stack realignment with no dynamic allocas, fixed
2670 // stack objects are addressed with RBP and regular stack objects with RSP.
2671 //
2672 // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
2673 // to address stack arguments for outgoing calls and nothing else. The "base
2674 // pointer" points to local variables, and RBP points to fixed objects.
2675 //
2676 // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
2677 // answer we give is relative to the SP after the prologue, and not the
2678 // SP in the middle of the function.
2679
2680 if (MFI.isFixedObjectIndex(FI) && TRI->hasStackRealignment(MF) &&
2681 !STI.isTargetWin64())
2682 return getFrameIndexReference(MF, FI, FrameReg);
2683
2684 // If !hasReservedCallFrame the function might have SP adjustement in the
2685 // body. So, even though the offset is statically known, it depends on where
2686 // we are in the function.
2687 if (!IgnoreSPUpdates && !hasReservedCallFrame(MF))
2688 return getFrameIndexReference(MF, FI, FrameReg);
2689
2690 // We don't handle tail calls, and shouldn't be seeing them either.
2692 "we don't handle this case!");
2693
2694 // This is how the math works out:
2695 //
2696 // %rsp grows (i.e. gets lower) left to right. Each box below is
2697 // one word (eight bytes). Obj0 is the stack slot we're trying to
2698 // get to.
2699 //
2700 // ----------------------------------
2701 // | BP | Obj0 | Obj1 | ... | ObjN |
2702 // ----------------------------------
2703 // ^ ^ ^ ^
2704 // A B C E
2705 //
2706 // A is the incoming stack pointer.
2707 // (B - A) is the local area offset (-8 for x86-64) [1]
2708 // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2]
2709 //
2710 // |(E - B)| is the StackSize (absolute value, positive). For a
2711 // stack that grown down, this works out to be (B - E). [3]
2712 //
2713 // E is also the value of %rsp after stack has been set up, and we
2714 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now
2715 // (C - E) == (C - A) - (B - A) + (B - E)
2716 // { Using [1], [2] and [3] above }
2717 // == getObjectOffset - LocalAreaOffset + StackSize
2718
2719 return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize);
2720}
2721
2724 std::vector<CalleeSavedInfo> &CSI) const {
2725 MachineFrameInfo &MFI = MF.getFrameInfo();
2727
2728 unsigned CalleeSavedFrameSize = 0;
2729 unsigned XMMCalleeSavedFrameSize = 0;
2730 auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2731 int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
2732
2733 int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2734
2735 if (TailCallReturnAddrDelta < 0) {
2736 // create RETURNADDR area
2737 // arg
2738 // arg
2739 // RETADDR
2740 // { ...
2741 // RETADDR area
2742 // ...
2743 // }
2744 // [EBP]
2745 MFI.CreateFixedObject(-TailCallReturnAddrDelta,
2746 TailCallReturnAddrDelta - SlotSize, true);
2747 }
2748
2749 // Spill the BasePtr if it's used.
2750 if (this->TRI->hasBasePointer(MF)) {
2751 // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
2752 if (MF.hasEHFunclets()) {
2754 X86FI->setHasSEHFramePtrSave(true);
2755 X86FI->setSEHFramePtrSaveIndex(FI);
2756 }
2757 }
2758
2759 if (hasFP(MF)) {
2760 // emitPrologue always spills frame register the first thing.
2761 SpillSlotOffset -= SlotSize;
2762 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2763
2764 // The async context lives directly before the frame pointer, and we
2765 // allocate a second slot to preserve stack alignment.
2766 if (X86FI->hasSwiftAsyncContext()) {
2767 SpillSlotOffset -= SlotSize;
2768 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2769 SpillSlotOffset -= SlotSize;
2770 }
2771
2772 // Since emitPrologue and emitEpilogue will handle spilling and restoring of
2773 // the frame register, we can delete it from CSI list and not have to worry
2774 // about avoiding it later.
2775 Register FPReg = TRI->getFrameRegister(MF);
2776 for (unsigned i = 0; i < CSI.size(); ++i) {
2777 if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) {
2778 CSI.erase(CSI.begin() + i);
2779 break;
2780 }
2781 }
2782 }
2783
2784 // Assign slots for GPRs. It increases frame size.
2785 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2786 Register Reg = I.getReg();
2787
2788 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2789 continue;
2790
2791 SpillSlotOffset -= SlotSize;
2792 CalleeSavedFrameSize += SlotSize;
2793
2794 int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2795 I.setFrameIdx(SlotIndex);
2796 }
2797
2798 // Adjust the offset of spill slot as we know the accurate callee saved frame
2799 // size.
2800 if (X86FI->getRestoreBasePointer()) {
2801 SpillSlotOffset -= SlotSize;
2802 CalleeSavedFrameSize += SlotSize;
2803
2804 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2805 // TODO: saving the slot index is better?
2806 X86FI->setRestoreBasePointer(CalleeSavedFrameSize);
2807 }
2808 X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
2809 MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
2810
2811 // Assign slots for XMMs.
2812 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2813 Register Reg = I.getReg();
2814 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
2815 continue;
2816
2817 // If this is k-register make sure we lookup via the largest legal type.
2818 MVT VT = MVT::Other;
2819 if (X86::VK16RegClass.contains(Reg))
2820 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
2821
2822 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2823 unsigned Size = TRI->getSpillSize(*RC);
2824 Align Alignment = TRI->getSpillAlign(*RC);
2825 // ensure alignment
2826 assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86");
2827 SpillSlotOffset = -alignTo(-SpillSlotOffset, Alignment);
2828
2829 // spill into slot
2830 SpillSlotOffset -= Size;
2831 int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
2832 I.setFrameIdx(SlotIndex);
2833 MFI.ensureMaxAlignment(Alignment);
2834
2835 // Save the start offset and size of XMM in stack frame for funclets.
2836 if (X86::VR128RegClass.contains(Reg)) {
2837 WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize;
2838 XMMCalleeSavedFrameSize += Size;
2839 }
2840 }
2841
2842 return true;
2843}
2844
2849
2850 // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
2851 // for us, and there are no XMM CSRs on Win32.
2852 if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows())
2853 return true;
2854
2855 // Push GPRs. It increases frame size.
2856 const MachineFunction &MF = *MBB.getParent();
2857 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
2858 for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
2859 Register Reg = I.getReg();
2860
2861 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2862 continue;
2863
2864 const MachineRegisterInfo &MRI = MF.getRegInfo();
2865 bool isLiveIn = MRI.isLiveIn(Reg);
2866 if (!isLiveIn)
2867 MBB.addLiveIn(Reg);
2868
2869 // Decide whether we can add a kill flag to the use.
2870 bool CanKill = !isLiveIn;
2871 // Check if any subregister is live-in
2872 if (CanKill) {
2873 for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg) {
2874 if (MRI.isLiveIn(*AReg)) {
2875 CanKill = false;
2876 break;
2877 }
2878 }
2879 }
2880
2881 // Do not set a kill flag on values that are also marked as live-in. This
2882 // happens with the @llvm-returnaddress intrinsic and with arguments
2883 // passed in callee saved registers.
2884 // Omitting the kill flags is conservatively correct even if the live-in
2885 // is not used after all.
2886 BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, getKillRegState(CanKill))
2888 }
2889
2891 if (X86FI->getRestoreBasePointer()) {
2892 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
2893 Register BaseReg = this->TRI->getBaseRegister();
2894 BuildMI(MBB, MI, DL, TII.get(Opc))
2895 .addReg(BaseReg, getKillRegState(true))
2897 }
2898
2899 // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
2900 // It can be done by spilling XMMs to stack frame.
2901 for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
2902 Register Reg = I.getReg();
2903 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
2904 continue;
2905
2906 // If this is k-register make sure we lookup via the largest legal type.
2907 MVT VT = MVT::Other;
2908 if (X86::VK16RegClass.contains(Reg))
2909 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
2910
2911 // Add the callee-saved register as live-in. It's killed at the spill.
2912 MBB.addLiveIn(Reg);
2913 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2914
2915 TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI,
2916 Register());
2917 --MI;
2918 MI->setFlag(MachineInstr::FrameSetup);
2919 ++MI;
2920 }
2921
2922 return true;
2923}
2924
2925void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB,
2927 MachineInstr *CatchRet) const {
2928 // SEH shouldn't use catchret.
2931 "SEH should not use CATCHRET");
2932 const DebugLoc &DL = CatchRet->getDebugLoc();
2933 MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB();
2934
2935 // Fill EAX/RAX with the address of the target block.
2936 if (STI.is64Bit()) {
2937 // LEA64r CatchRetTarget(%rip), %rax
2938 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX)
2939 .addReg(X86::RIP)
2940 .addImm(0)
2941 .addReg(0)
2942 .addMBB(CatchRetTarget)
2943 .addReg(0);
2944 } else {
2945 // MOV32ri $CatchRetTarget, %eax
2946 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
2947 .addMBB(CatchRetTarget);
2948 }
2949
2950 // Record that we've taken the address of CatchRetTarget and no longer just
2951 // reference it in a terminator.
2952 CatchRetTarget->setMachineBlockAddressTaken();
2953}
2954
2958 if (CSI.empty())
2959 return false;
2960
2961 if (MI != MBB.end() && isFuncletReturnInstr(*MI) && STI.isOSWindows()) {
2962 // Don't restore CSRs in 32-bit EH funclets. Matches
2963 // spillCalleeSavedRegisters.
2964 if (STI.is32Bit())
2965 return true;
2966 // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
2967 // funclets. emitEpilogue transforms these to normal jumps.
2968 if (MI->getOpcode() == X86::CATCHRET) {
2969 const Function &F = MBB.getParent()->getFunction();
2970 bool IsSEH = isAsynchronousEHPersonality(
2971 classifyEHPersonality(F.getPersonalityFn()));
2972 if (IsSEH)
2973 return true;
2974 }
2975 }
2976
2978
2979 // Reload XMMs from stack frame.
2980 for (const CalleeSavedInfo &I : CSI) {
2981 Register Reg = I.getReg();
2982 if (X86::GR64RegClass.contains(Reg) ||
2983 X86::GR32RegClass.contains(Reg))
2984 continue;
2985
2986 // If this is k-register make sure we lookup via the largest legal type.
2987 MVT VT = MVT::Other;
2988 if (X86::VK16RegClass.contains(Reg))
2989 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
2990
2991 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2992 TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, TRI,
2993 Register());
2994 }
2995
2996 // Clear the stack slot for spill base pointer register.
2997 MachineFunction &MF = *MBB.getParent();
2999 if (X86FI->getRestoreBasePointer()) {
3000 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
3001 Register BaseReg = this->TRI->getBaseRegister();
3002 BuildMI(MBB, MI, DL, TII.get(Opc), BaseReg)
3004 }
3005
3006 // POP GPRs.
3007 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
3008 for (const CalleeSavedInfo &I : CSI) {
3009 Register Reg = I.getReg();
3010 if (!X86::GR64RegClass.contains(Reg) &&
3011 !X86::GR32RegClass.contains(Reg))
3012 continue;
3013
3014 BuildMI(MBB, MI, DL, TII.get(Opc), Reg)
3016 }
3017 return true;
3018}
3019
3021 BitVector &SavedRegs,
3022 RegScavenger *RS) const {
3024
3025 // Spill the BasePtr if it's used.
3026 if (TRI->hasBasePointer(MF)){
3027 Register BasePtr = TRI->getBaseRegister();
3028 if (STI.isTarget64BitILP32())
3029 BasePtr = getX86SubSuperRegister(BasePtr, 64);
3030 SavedRegs.set(BasePtr);
3031 }
3032}
3033
3034static bool
3036 const Function &F = MF->getFunction();
3037 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
3038 I != E; I++) {
3039 if (I->hasNestAttr() && !I->use_empty())
3040 return true;
3041 }
3042 return false;
3043}
3044
3045/// GetScratchRegister - Get a temp register for performing work in the
3046/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
3047/// and the properties of the function either one or two registers will be
3048/// needed. Set primary to true for the first register, false for the second.
3049static unsigned
3050GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary) {
3052
3053 // Erlang stuff.
3055 if (Is64Bit)
3056 return Primary ? X86::R14 : X86::R13;
3057 else
3058 return Primary ? X86::EBX : X86::EDI;
3059 }
3060
3061 if (Is64Bit) {
3062 if (IsLP64)
3063 return Primary ? X86::R11 : X86::R12;
3064 else
3065 return Primary ? X86::R11D : X86::R12D;
3066 }
3067
3068 bool IsNested = HasNestArgument(&MF);
3069
3073 if (IsNested)
3074 report_fatal_error("Segmented stacks does not support fastcall with "
3075 "nested function.");
3076 return Primary ? X86::EAX : X86::ECX;
3077 }
3078 if (IsNested)
3079 return Primary ? X86::EDX : X86::EAX;
3080 return Primary ? X86::ECX : X86::EAX;
3081}
3082
3083// The stack limit in the TCB is set to this many bytes above the actual stack
3084// limit.
3086
3088 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3089 MachineFrameInfo &MFI = MF.getFrameInfo();
3090 uint64_t StackSize;
3091 unsigned TlsReg, TlsOffset;
3092 DebugLoc DL;
3093
3094 // To support shrink-wrapping we would need to insert the new blocks
3095 // at the right place and update the branches to PrologueMBB.
3096 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3097
3098 unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3099 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3100 "Scratch register is live-in");
3101
3102 if (MF.getFunction().isVarArg())
3103 report_fatal_error("Segmented stacks do not support vararg functions.");
3104 if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
3107 report_fatal_error("Segmented stacks not supported on this platform.");
3108
3109 // Eventually StackSize will be calculated by a link-time pass; which will
3110 // also decide whether checking code needs to be injected into this particular
3111 // prologue.
3112 StackSize = MFI.getStackSize();
3113
3114 if (!MFI.needsSplitStackProlog())
3115 return;
3116
3120 bool IsNested = false;
3121
3122 // We need to know if the function has a nest argument only in 64 bit mode.
3123 if (Is64Bit)
3124 IsNested = HasNestArgument(&MF);
3125
3126 // The MOV R10, RAX needs to be in a different block, since the RET we emit in
3127 // allocMBB needs to be last (terminating) instruction.
3128
3129 for (const auto &LI : PrologueMBB.liveins()) {
3130 allocMBB->addLiveIn(LI);
3131 checkMBB->addLiveIn(LI);
3132 }
3133
3134 if (IsNested)
3135 allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
3136
3137 MF.push_front(allocMBB);
3138 MF.push_front(checkMBB);
3139
3140 // When the frame size is less than 256 we just compare the stack
3141 // boundary directly to the value of the stack pointer, per gcc.
3142 bool CompareStackPointer = StackSize < kSplitStackAvailable;
3143
3144 // Read the limit off the current stacklet off the stack_guard location.
3145 if (Is64Bit) {
3146 if (STI.isTargetLinux()) {
3147 TlsReg = X86::FS;
3148 TlsOffset = IsLP64 ? 0x70 : 0x40;
3149 } else if (STI.isTargetDarwin()) {
3150 TlsReg = X86::GS;
3151 TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
3152 } else if (STI.isTargetWin64()) {
3153 TlsReg = X86::GS;
3154 TlsOffset = 0x28; // pvArbitrary, reserved for application use
3155 } else if (STI.isTargetFreeBSD()) {
3156 TlsReg = X86::FS;
3157 TlsOffset = 0x18;
3158 } else if (STI.isTargetDragonFly()) {
3159 TlsReg = X86::FS;
3160 TlsOffset = 0x20; // use tls_tcb.tcb_segstack
3161 } else {
3162 report_fatal_error("Segmented stacks not supported on this platform.");
3163 }
3164
3165 if (CompareStackPointer)
3166 ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
3167 else
3168 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r), ScratchReg).addReg(X86::RSP)
3169 .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
3170
3171 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm)).addReg(ScratchReg)
3172 .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
3173 } else {
3174 if (STI.isTargetLinux()) {
3175 TlsReg = X86::GS;
3176 TlsOffset = 0x30;
3177 } else if (STI.isTargetDarwin()) {
3178 TlsReg = X86::GS;
3179 TlsOffset = 0x48 + 90*4;
3180 } else if (STI.isTargetWin32()) {
3181 TlsReg = X86::FS;
3182 TlsOffset = 0x14; // pvArbitrary, reserved for application use
3183 } else if (STI.isTargetDragonFly()) {
3184 TlsReg = X86::FS;
3185 TlsOffset = 0x10; // use tls_tcb.tcb_segstack
3186 } else if (STI.isTargetFreeBSD()) {
3187 report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
3188 } else {
3189 report_fatal_error("Segmented stacks not supported on this platform.");
3190 }
3191
3192 if (CompareStackPointer)
3193 ScratchReg = X86::ESP;
3194 else
3195 BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
3196 .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
3197
3200 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
3201 .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
3202 } else if (STI.isTargetDarwin()) {
3203
3204 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
3205 unsigned ScratchReg2;
3206 bool SaveScratch2;
3207 if (CompareStackPointer) {
3208 // The primary scratch register is available for holding the TLS offset.
3209 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3210 SaveScratch2 = false;
3211 } else {
3212 // Need to use a second register to hold the TLS offset
3213 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
3214
3215 // Unfortunately, with fastcc the second scratch register may hold an
3216 // argument.
3217 SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
3218 }
3219
3220 // If Scratch2 is live-in then it needs to be saved.
3221 assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
3222 "Scratch register is live-in and not saved");
3223
3224 if (SaveScratch2)
3225 BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
3226 .addReg(ScratchReg2, RegState::Kill);
3227
3228 BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
3229 .addImm(TlsOffset);
3230 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3231 .addReg(ScratchReg)
3232 .addReg(ScratchReg2).addImm(1).addReg(0)
3233 .addImm(0)
3234 .addReg(TlsReg);
3235
3236 if (SaveScratch2)
3237 BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
3238 }
3239 }
3240
3241 // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
3242 // It jumps to normal execution of the function body.
3243 BuildMI(checkMBB, DL, TII.get(X86::JCC_1)).addMBB(&PrologueMBB).addImm(X86::COND_A);
3244
3245 // On 32 bit we first push the arguments size and then the frame size. On 64
3246 // bit, we pass the stack frame size in r10 and the argument size in r11.
3247 if (Is64Bit) {
3248 // Functions with nested arguments use R10, so it needs to be saved across
3249 // the call to _morestack
3250
3251 const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
3252 const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
3253 const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
3254 const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
3255
3256 if (IsNested)
3257 BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
3258
3259 BuildMI(allocMBB, DL, TII.get(getMOVriOpcode(IsLP64, StackSize)), Reg10)
3260 .addImm(StackSize);
3261 BuildMI(allocMBB, DL,
3263 Reg11)
3264 .addImm(X86FI->getArgumentStackSize());
3265 } else {
3266 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i))
3267 .addImm(X86FI->getArgumentStackSize());
3268 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i))
3269 .addImm(StackSize);
3270 }
3271
3272 // __morestack is in libgcc
3274 // Under the large code model, we cannot assume that __morestack lives
3275 // within 2^31 bytes of the call site, so we cannot use pc-relative
3276 // addressing. We cannot perform the call via a temporary register,
3277 // as the rax register may be used to store the static chain, and all
3278 // other suitable registers may be either callee-save or used for
3279 // parameter passing. We cannot use the stack at this point either
3280 // because __morestack manipulates the stack directly.
3281 //
3282 // To avoid these issues, perform an indirect call via a read-only memory
3283 // location containing the address.
3284 //
3285 // This solution is not perfect, as it assumes that the .rodata section
3286 // is laid out within 2^31 bytes of each function body, but this seems
3287 // to be sufficient for JIT.
3288 // FIXME: Add retpoline support and remove the error here..
3290 report_fatal_error("Emitting morestack calls on 64-bit with the large "
3291 "code model and thunks not yet implemented.");
3292 BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
3293 .addReg(X86::RIP)
3294 .addImm(0)
3295 .addReg(0)
3296 .addExternalSymbol("__morestack_addr")
3297 .addReg(0);
3298 } else {
3299 if (Is64Bit)
3300 BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
3301 .addExternalSymbol("__morestack");
3302 else
3303 BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
3304 .addExternalSymbol("__morestack");
3305 }
3306
3307 if (IsNested)
3308 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
3309 else
3310 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
3311
3312 allocMBB->addSuccessor(&PrologueMBB);
3313
3314 checkMBB->addSuccessor(allocMBB, BranchProbability::getZero());
3315 checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne());
3316
3317#ifdef EXPENSIVE_CHECKS
3318 MF.verify();
3319#endif
3320}
3321
3322/// Lookup an ERTS parameter in the !hipe.literals named metadata node.
3323/// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets
3324/// to fields it needs, through a named metadata node "hipe.literals" containing
3325/// name-value pairs.
3326static unsigned getHiPELiteral(
3327 NamedMDNode *HiPELiteralsMD, const StringRef LiteralName) {
3328 for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) {
3329 MDNode *Node = HiPELiteralsMD->getOperand(i);
3330 if (Node->getNumOperands() != 2) continue;
3331 MDString *NodeName = dyn_cast<MDString>(Node->getOperand(0));
3332 ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Node->getOperand(1));
3333 if (!NodeName || !NodeVal) continue;
3334 ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(NodeVal->getValue());
3335 if (ValConst && NodeName->getString() == LiteralName) {
3336 return ValConst->getZExtValue();
3337 }
3338 }
3339
3340 report_fatal_error("HiPE literal " + LiteralName
3341 + " required but not provided");
3342}
3343
3344// Return true if there are no non-ehpad successors to MBB and there are no
3345// non-meta instructions between MBBI and MBB.end().
3348 return llvm::all_of(
3349 MBB.successors(),
3350 [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) &&
3351 std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) {
3352 return MI.isMetaInstruction();
3353 });
3354}
3355
3356/// Erlang programs may need a special prologue to handle the stack size they
3357/// might need at runtime. That is because Erlang/OTP does not implement a C
3358/// stack but uses a custom implementation of hybrid stack/heap architecture.
3359/// (for more information see Eric Stenman's Ph.D. thesis:
3360/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
3361///
3362/// CheckStack:
3363/// temp0 = sp - MaxStack
3364/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3365/// OldStart:
3366/// ...
3367/// IncStack:
3368/// call inc_stack # doubles the stack space
3369/// temp0 = sp - MaxStack
3370/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3372 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3373 MachineFrameInfo &MFI = MF.getFrameInfo();
3374 DebugLoc DL;
3375
3376 // To support shrink-wrapping we would need to insert the new blocks
3377 // at the right place and update the branches to PrologueMBB.
3378 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3379
3380 // HiPE-specific values
3381 NamedMDNode *HiPELiteralsMD = MF.getMMI().getModule()
3382 ->getNamedMetadata("hipe.literals");
3383 if (!HiPELiteralsMD)
3385 "Can't generate HiPE prologue without runtime parameters");
3386 const unsigned HipeLeafWords
3387 = getHiPELiteral(HiPELiteralsMD,
3388 Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS");
3389 const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
3390 const unsigned Guaranteed = HipeLeafWords * SlotSize;
3391 unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs ?
3392 MF.getFunction().arg_size() - CCRegisteredArgs : 0;
3393 unsigned MaxStack = MFI.getStackSize() + CallerStkArity*SlotSize + SlotSize;
3394
3396 "HiPE prologue is only supported on Linux operating systems.");
3397
3398 // Compute the largest caller's frame that is needed to fit the callees'
3399 // frames. This 'MaxStack' is computed from:
3400 //
3401 // a) the fixed frame size, which is the space needed for all spilled temps,
3402 // b) outgoing on-stack parameter areas, and
3403 // c) the minimum stack space this function needs to make available for the
3404 // functions it calls (a tunable ABI property).
3405 if (MFI.hasCalls()) {
3406 unsigned MoreStackForCalls = 0;
3407
3408 for (auto &MBB : MF) {
3409 for (auto &MI : MBB) {
3410 if (!MI.isCall())
3411 continue;
3412
3413 // Get callee operand.
3414 const MachineOperand &MO = MI.getOperand(0);
3415
3416 // Only take account of global function calls (no closures etc.).
3417 if (!MO.isGlobal())
3418 continue;
3419
3420 const Function *F = dyn_cast<Function>(MO.getGlobal());
3421 if (!F)
3422 continue;
3423
3424 // Do not update 'MaxStack' for primitive and built-in functions
3425 // (encoded with names either starting with "erlang."/"bif_" or not
3426 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
3427 // "_", such as the BIF "suspend_0") as they are executed on another
3428 // stack.
3429 if (F->getName().contains("erlang.") || F->getName().contains("bif_") ||
3430 F->getName().find_first_of("._") == StringRef::npos)
3431 continue;
3432
3433 unsigned CalleeStkArity =
3434 F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0;
3435 if (HipeLeafWords - 1 > CalleeStkArity)
3436 MoreStackForCalls = std::max(MoreStackForCalls,
3437 (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
3438 }
3439 }
3440 MaxStack += MoreStackForCalls;
3441 }
3442
3443 // If the stack frame needed is larger than the guaranteed then runtime checks
3444 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
3445 if (MaxStack > Guaranteed) {
3446 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
3447 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
3448
3449 for (const auto &LI : PrologueMBB.liveins()) {
3450 stackCheckMBB->addLiveIn(LI);
3451 incStackMBB->addLiveIn(LI);
3452 }
3453
3454 MF.push_front(incStackMBB);
3455 MF.push_front(stackCheckMBB);
3456
3457 unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
3458 unsigned LEAop, CMPop, CALLop;
3459 SPLimitOffset = getHiPELiteral(HiPELiteralsMD, "P_NSP_LIMIT");
3460 if (Is64Bit) {
3461 SPReg = X86::RSP;
3462 PReg = X86::RBP;
3463 LEAop = X86::LEA64r;
3464 CMPop = X86::CMP64rm;
3465 CALLop = X86::CALL64pcrel32;
3466 } else {
3467 SPReg = X86::ESP;
3468 PReg = X86::EBP;
3469 LEAop = X86::LEA32r;
3470 CMPop = X86::CMP32rm;
3471 CALLop = X86::CALLpcrel32;
3472 }
3473
3474 ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3475 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3476 "HiPE prologue scratch register is live-in");
3477
3478 // Create new MBB for StackCheck:
3479 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg),
3480 SPReg, false, -MaxStack);
3481 // SPLimitOffset is in a fixed heap location (pointed by BP).
3482 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop))
3483 .addReg(ScratchReg), PReg, false, SPLimitOffset);
3484 BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1)).addMBB(&PrologueMBB).addImm(X86::COND_AE);
3485
3486 // Create new MBB for IncStack:
3487 BuildMI(incStackMBB, DL, TII.get(CALLop)).
3488 addExternalSymbol("inc_stack_0");
3489 addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg),
3490 SPReg, false, -MaxStack);
3491 addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop))
3492 .addReg(ScratchReg), PReg, false, SPLimitOffset);
3493 BuildMI(incStackMBB, DL, TII.get(X86::JCC_1)).addMBB(incStackMBB).addImm(X86::COND_LE);
3494
3495 stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
3496 stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
3497 incStackMBB->addSuccessor(&PrologueMBB, {99, 100});
3498 incStackMBB->addSuccessor(incStackMBB, {1, 100});
3499 }
3500#ifdef EXPENSIVE_CHECKS
3501 MF.verify();
3502#endif
3503}
3504
3505bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
3507 const DebugLoc &DL,
3508 int Offset) const {
3509 if (Offset <= 0)
3510 return false;
3511
3512 if (Offset % SlotSize)
3513 return false;
3514
3515 int NumPops = Offset / SlotSize;
3516 // This is only worth it if we have at most 2 pops.
3517 if (NumPops != 1 && NumPops != 2)
3518 return false;
3519
3520 // Handle only the trivial case where the adjustment directly follows
3521 // a call. This is the most common one, anyway.
3522 if (MBBI == MBB.begin())
3523 return false;
3524 MachineBasicBlock::iterator Prev = std::prev(MBBI);
3525 if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
3526 return false;
3527
3528 unsigned Regs[2];
3529 unsigned FoundRegs = 0;
3530
3532 const MachineOperand &RegMask = Prev->getOperand(1);
3533
3534 auto &RegClass =
3535 Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass;
3536 // Try to find up to NumPops free registers.
3537 for (auto Candidate : RegClass) {
3538 // Poor man's liveness:
3539 // Since we're immediately after a call, any register that is clobbered
3540 // by the call and not defined by it can be considered dead.
3541 if (!RegMask.clobbersPhysReg(Candidate))
3542 continue;
3543
3544 // Don't clobber reserved registers
3545 if (MRI.isReserved(Candidate))
3546 continue;
3547
3548 bool IsDef = false;
3549 for (const MachineOperand &MO : Prev->implicit_operands()) {
3550 if (MO.isReg() && MO.isDef() &&
3551 TRI->isSuperOrSubRegisterEq(MO.getReg(), Candidate)) {
3552 IsDef = true;
3553 break;
3554 }
3555 }
3556
3557 if (IsDef)
3558 continue;
3559
3560 Regs[FoundRegs++] = Candidate;
3561 if (FoundRegs == (unsigned)NumPops)
3562 break;
3563 }
3564
3565 if (FoundRegs == 0)
3566 return false;
3567
3568 // If we found only one free register, but need two, reuse the same one twice.
3569 while (FoundRegs < (unsigned)NumPops)
3570 Regs[FoundRegs++] = Regs[0];
3571
3572 for (int i = 0; i < NumPops; ++i)
3573 BuildMI(MBB, MBBI, DL,
3574 TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), Regs[i]);
3575
3576 return true;
3577}
3578
3582 bool reserveCallFrame = hasReservedCallFrame(MF);
3583 unsigned Opcode = I->getOpcode();
3584 bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
3585 DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased.
3586 uint64_t Amount = TII.getFrameSize(*I);
3587 uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0;
3588 I = MBB.erase(I);
3589 auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
3590
3591 // Try to avoid emitting dead SP adjustments if the block end is unreachable,
3592 // typically because the function is marked noreturn (abort, throw,
3593 // assert_fail, etc).
3594 if (isDestroy && blockEndIsUnreachable(MBB, I))
3595 return I;
3596
3597 if (!reserveCallFrame) {
3598 // If the stack pointer can be changed after prologue, turn the
3599 // adjcallstackup instruction into a 'sub ESP, <amt>' and the
3600 // adjcallstackdown instruction into 'add ESP, <amt>'
3601
3602 // We need to keep the stack aligned properly. To do this, we round the
3603 // amount of space needed for the outgoing arguments up to the next
3604 // alignment boundary.
3605 Amount = alignTo(Amount, getStackAlign());
3606
3607 const Function &F = MF.getFunction();
3608 bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
3609 bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves();
3610
3611 // If we have any exception handlers in this function, and we adjust
3612 // the SP before calls, we may need to indicate this to the unwinder
3613 // using GNU_ARGS_SIZE. Note that this may be necessary even when
3614 // Amount == 0, because the preceding function may have set a non-0
3615 // GNU_ARGS_SIZE.
3616 // TODO: We don't need to reset this between subsequent functions,
3617 // if it didn't change.
3618 bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty();
3619
3620 if (HasDwarfEHHandlers && !isDestroy &&
3622 BuildCFI(MBB, InsertPos, DL,
3623 MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
3624
3625 if (Amount == 0)
3626 return I;
3627
3628 // Factor out the amount that gets handled inside the sequence
3629 // (Pushes of argument for frame setup, callee pops for frame destroy)
3630 Amount -= InternalAmt;
3631
3632 // TODO: This is needed only if we require precise CFA.
3633 // If this is a callee-pop calling convention, emit a CFA adjust for
3634 // the amount the callee popped.
3635 if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
3636 BuildCFI(MBB, InsertPos, DL,
3637 MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
3638
3639 // Add Amount to SP to destroy a frame, or subtract to setup.
3640 int64_t StackAdjustment = isDestroy ? Amount : -Amount;
3641
3642 if (StackAdjustment) {
3643 // Merge with any previous or following adjustment instruction. Note: the
3644 // instructions merged with here do not have CFI, so their stack
3645 // adjustments do not feed into CfaAdjustment.
3646 StackAdjustment += mergeSPUpdates(MBB, InsertPos, true);
3647 StackAdjustment += mergeSPUpdates(MBB, InsertPos, false);
3648
3649 if (StackAdjustment) {
3650 if (!(F.hasMinSize() &&
3651 adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
3652 BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
3653 /*InEpilogue=*/false);
3654 }
3655 }
3656
3657 if (DwarfCFI && !hasFP(MF)) {
3658 // If we don't have FP, but need to generate unwind information,
3659 // we need to set the correct CFA offset after the stack adjustment.
3660 // How much we adjust the CFA offset depends on whether we're emitting
3661 // CFI only for EH purposes or for debugging. EH only requires the CFA
3662 // offset to be correct at each call site, while for debugging we want
3663 // it to be more precise.
3664
3665 int64_t CfaAdjustment = -StackAdjustment;
3666 // TODO: When not using precise CFA, we also need to adjust for the
3667 // InternalAmt here.
3668 if (CfaAdjustment) {
3669 BuildCFI(MBB, InsertPos, DL,
3671 CfaAdjustment));
3672 }
3673 }
3674
3675 return I;
3676 }
3677
3678 if (InternalAmt) {
3681 while (CI != B && !std::prev(CI)->isCall())
3682 --CI;
3683 BuildStackAdjustment(MBB, CI, DL, -InternalAmt, /*InEpilogue=*/false);
3684 }
3685
3686 return I;
3687}
3688
3690 assert(MBB.getParent() && "Block is not attached to a function!");
3691 const MachineFunction &MF = *MBB.getParent();
3692 if (!MBB.isLiveIn(X86::EFLAGS))
3693 return true;
3694
3695 // If stack probes have to loop inline or call, that will clobber EFLAGS.
3696 // FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock.
3698 const X86TargetLowering &TLI = *STI.getTargetLowering();
3699 if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF))
3700 return false;
3701
3703 return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext();
3704}
3705
3707 assert(MBB.getParent() && "Block is not attached to a function!");
3708
3709 // Win64 has strict requirements in terms of epilogue and we are
3710 // not taking a chance at messing with them.
3711 // I.e., unless this block is already an exit block, we can't use
3712 // it as an epilogue.
3713 if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
3714 return false;
3715
3716 // Swift async context epilogue has a BTR instruction that clobbers parts of
3717 // EFLAGS.
3718 const MachineFunction &MF = *MBB.getParent();
3721
3723 return true;
3724
3725 // If we cannot use LEA to adjust SP, we may need to use ADD, which
3726 // clobbers the EFLAGS. Check that we do not need to preserve it,
3727 // otherwise, conservatively assume this is not
3728 // safe to insert the epilogue here.
3730}
3731
3733 // If we may need to emit frameless compact unwind information, give
3734 // up as this is currently broken: PR25614.
3735 bool CompactUnwind =
3737 nullptr;
3738 return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF) ||
3739 !CompactUnwind) &&
3740 // The lowering of segmented stack and HiPE only support entry
3741 // blocks as prologue blocks: PR26107. This limitation may be
3742 // lifted if we fix:
3743 // - adjustForSegmentedStacks
3744 // - adjustForHiPEPrologue
3746 !MF.shouldSplitStack();
3747}
3748
3751 const DebugLoc &DL, bool RestoreSP) const {
3752 assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
3753 assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
3754 assert(STI.is32Bit() && !Uses64BitFramePtr &&
3755 "restoring EBP/ESI on non-32-bit target");
3756
3757 MachineFunction &MF = *MBB.getParent();
3759 Register BasePtr = TRI->getBaseRegister();
3762 MachineFrameInfo &MFI = MF.getFrameInfo();
3763
3764 // FIXME: Don't set FrameSetup flag in catchret case.
3765
3766 int FI = FuncInfo.EHRegNodeFrameIndex;
3767 int EHRegSize = MFI.getObjectSize(FI);
3768
3769 if (RestoreSP) {
3770 // MOV32rm -EHRegSize(%ebp), %esp
3771 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP),
3772 X86::EBP, true, -EHRegSize)
3774 }
3775
3776 Register UsedReg;
3777 int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg).getFixed();
3778 int EndOffset = -EHRegOffset - EHRegSize;
3779 FuncInfo.EHRegNodeEndOffset = EndOffset;
3780
3781 if (UsedReg == FramePtr) {
3782 // ADD $offset, %ebp
3783 unsigned ADDri = getADDriOpcode(false);
3784 BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
3786 .addImm(EndOffset)
3788 ->getOperand(3)
3789 .setIsDead();
3790 assert(EndOffset >= 0 &&
3791 "end of registration object above normal EBP position!");
3792 } else if (UsedReg == BasePtr) {
3793 // LEA offset(%ebp), %esi
3794 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr),
3795 FramePtr, false, EndOffset)
3797 // MOV32rm SavedEBPOffset(%esi), %ebp
3798 assert(X86FI->getHasSEHFramePtrSave());
3799 int Offset =
3800 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
3801 .getFixed();
3802 assert(UsedReg == BasePtr);
3803 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr),
3804 UsedReg, true, Offset)
3806 } else {
3807 llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
3808 }
3809 return MBBI;
3810}
3811
3813 return TRI->getSlotSize();
3814}
3815
3818 return StackPtr;
3819}
3820
3824 Register FrameRegister = RI->getFrameRegister(MF);
3825 if (getInitialCFARegister(MF) == FrameRegister &&
3827 DwarfFrameBase FrameBase;
3828 FrameBase.Kind = DwarfFrameBase::CFA;
3829 FrameBase.Location.Offset =
3831 return FrameBase;
3832 }
3833
3834 return DwarfFrameBase{DwarfFrameBase::Register, {FrameRegister}};
3835}
3836
3837namespace {
3838// Struct used by orderFrameObjects to help sort the stack objects.
3839struct X86FrameSortingObject {
3840 bool IsValid = false; // true if we care about this Object.
3841 unsigned ObjectIndex = 0; // Index of Object into MFI list.
3842 unsigned ObjectSize = 0; // Size of Object in bytes.
3843 Align ObjectAlignment = Align(1); // Alignment of Object in bytes.
3844 unsigned ObjectNumUses = 0; // Object static number of uses.
3845};
3846
3847// The comparison function we use for std::sort to order our local
3848// stack symbols. The current algorithm is to use an estimated
3849// "density". This takes into consideration the size and number of
3850// uses each object has in order to roughly minimize code size.
3851// So, for example, an object of size 16B that is referenced 5 times
3852// will get higher priority than 4 4B objects referenced 1 time each.
3853// It's not perfect and we may be able to squeeze a few more bytes out of
3854// it (for example : 0(esp) requires fewer bytes, symbols allocated at the
3855// fringe end can have special consideration, given their size is less
3856// important, etc.), but the algorithmic complexity grows too much to be
3857// worth the extra gains we get. This gets us pretty close.
3858// The final order leaves us with objects with highest priority going
3859// at the end of our list.
3860struct X86FrameSortingComparator {
3861 inline bool operator()(const X86FrameSortingObject &A,
3862 const X86FrameSortingObject &B) const {
3863 uint64_t DensityAScaled, DensityBScaled;
3864
3865 // For consistency in our comparison, all invalid objects are placed
3866 // at the end. This also allows us to stop walking when we hit the
3867 // first invalid item after it's all sorted.
3868 if (!A.IsValid)
3869 return false;
3870 if (!B.IsValid)
3871 return true;
3872
3873 // The density is calculated by doing :
3874 // (double)DensityA = A.ObjectNumUses / A.ObjectSize
3875 // (double)DensityB = B.ObjectNumUses / B.ObjectSize
3876 // Since this approach may cause inconsistencies in
3877 // the floating point <, >, == comparisons, depending on the floating
3878 // point model with which the compiler was built, we're going
3879 // to scale both sides by multiplying with
3880 // A.ObjectSize * B.ObjectSize. This ends up factoring away
3881 // the division and, with it, the need for any floating point
3882 // arithmetic.
3883 DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) *
3884 static_cast<uint64_t>(B.ObjectSize);
3885 DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) *
3886 static_cast<uint64_t>(A.ObjectSize);
3887
3888 // If the two densities are equal, prioritize highest alignment
3889 // objects. This allows for similar alignment objects
3890 // to be packed together (given the same density).
3891 // There's room for improvement here, also, since we can pack
3892 // similar alignment (different density) objects next to each
3893 // other to save padding. This will also require further
3894 // complexity/iterations, and the overall gain isn't worth it,
3895 // in general. Something to keep in mind, though.
3896 if (DensityAScaled == DensityBScaled)
3897 return A.ObjectAlignment < B.ObjectAlignment;
3898
3899 return DensityAScaled < DensityBScaled;
3900 }
3901};
3902} // namespace
3903
3904// Order the symbols in the local stack.
3905// We want to place the local stack objects in some sort of sensible order.
3906// The heuristic we use is to try and pack them according to static number
3907// of uses and size of object in order to minimize code size.
3909 const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
3910 const MachineFrameInfo &MFI = MF.getFrameInfo();
3911
3912 // Don't waste time if there's nothing to do.
3913 if (ObjectsToAllocate.empty())
3914 return;
3915
3916 // Create an array of all MFI objects. We won't need all of these
3917 // objects, but we're going to create a full array of them to make
3918 // it easier to index into when we're counting "uses" down below.
3919 // We want to be able to easily/cheaply access an object by simply
3920 // indexing into it, instead of having to search for it every time.
3921 std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd());
3922
3923 // Walk the objects we care about and mark them as such in our working
3924 // struct.
3925 for (auto &Obj : ObjectsToAllocate) {
3926 SortingObjects[Obj].IsValid = true;
3927 SortingObjects[Obj].ObjectIndex = Obj;
3928 SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj);
3929 // Set the size.
3930 int ObjectSize = MFI.getObjectSize(Obj);
3931 if (ObjectSize == 0)
3932 // Variable size. Just use 4.
3933 SortingObjects[Obj].ObjectSize = 4;
3934 else
3935 SortingObjects[Obj].ObjectSize = ObjectSize;
3936 }
3937
3938 // Count the number of uses for each object.
3939 for (auto &MBB : MF) {
3940 for (auto &MI : MBB) {
3941 if (MI.isDebugInstr())
3942 continue;
3943 for (const MachineOperand &MO : MI.operands()) {
3944 // Check to see if it's a local stack symbol.
3945 if (!MO.isFI())
3946 continue;
3947 int Index = MO.getIndex();
3948 // Check to see if it falls within our range, and is tagged
3949 // to require ordering.
3950 if (Index >= 0 && Index < MFI.getObjectIndexEnd() &&
3951 SortingObjects[Index].IsValid)
3952 SortingObjects[Index].ObjectNumUses++;
3953 }
3954 }
3955 }
3956
3957 // Sort the objects using X86FrameSortingAlgorithm (see its comment for
3958 // info).
3959 llvm::stable_sort(SortingObjects, X86FrameSortingComparator());
3960
3961 // Now modify the original list to represent the final order that
3962 // we want. The order will depend on whether we're going to access them
3963 // from the stack pointer or the frame pointer. For SP, the list should
3964 // end up with the END containing objects that we want with smaller offsets.
3965 // For FP, it should be flipped.
3966 int i = 0;
3967 for (auto &Obj : SortingObjects) {
3968 // All invalid items are sorted at the end, so it's safe to stop.
3969 if (!Obj.IsValid)
3970 break;
3971 ObjectsToAllocate[i++] = Obj.ObjectIndex;
3972 }
3973
3974 // Flip it if we're accessing off of the FP.
3975 if (!TRI->hasStackRealignment(MF) && hasFP(MF))
3976 std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end());
3977}
3978
3979
3981 // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
3982 unsigned Offset = 16;
3983 // RBP is immediately pushed.
3984 Offset += SlotSize;
3985 // All callee-saved registers are then pushed.
3986 Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
3987 // Every funclet allocates enough stack space for the largest outgoing call.
3988 Offset += getWinEHFuncletFrameSize(MF);
3989 return Offset;
3990}
3991
3993 MachineFunction &MF, RegScavenger *RS) const {
3994 // Mark the function as not having WinCFI. We will set it back to true in
3995 // emitPrologue if it gets called and emits CFI.
3996 MF.setHasWinCFI(false);
3997
3998 // If we are using Windows x64 CFI, ensure that the stack is always 8 byte
3999 // aligned. The format doesn't support misaligned stack adjustments.
4002
4003 // If this function isn't doing Win64-style C++ EH, we don't need to do
4004 // anything.
4005 if (STI.is64Bit() && MF.hasEHFunclets() &&
4008 adjustFrameForMsvcCxxEh(MF);
4009 }
4010}
4011
4012void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const {
4013 // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
4014 // relative to RSP after the prologue. Find the offset of the last fixed
4015 // object, so that we can allocate a slot immediately following it. If there
4016 // were no fixed objects, use offset -SlotSize, which is immediately after the
4017 // return address. Fixed objects have negative frame indices.
4018 MachineFrameInfo &MFI = MF.getFrameInfo();
4019 WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
4020 int64_t MinFixedObjOffset = -SlotSize;
4021 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I)
4022 MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
4023
4024 for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
4025 for (WinEHHandlerType &H : TBME.HandlerArray) {
4026 int FrameIndex = H.CatchObj.FrameIndex;
4027 if (FrameIndex != INT_MAX) {
4028 // Ensure alignment.
4029 unsigned Align = MFI.getObjectAlign(FrameIndex).value();
4030 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align;
4031 MinFixedObjOffset -= MFI.getObjectSize(FrameIndex);
4032 MFI.setObjectOffset(FrameIndex, MinFixedObjOffset);
4033 }
4034 }
4035 }
4036
4037 // Ensure alignment.
4038 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8;
4039 int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
4040 int UnwindHelpFI =
4041 MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false);
4042 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
4043
4044 // Store -2 into UnwindHelp on function entry. We have to scan forwards past
4045 // other frame setup instructions.
4046 MachineBasicBlock &MBB = MF.front();
4047 auto MBBI = MBB.begin();
4048 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
4049 ++MBBI;
4050
4052 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
4053 UnwindHelpFI)
4054 .addImm(-2);
4055}
4056
4058 MachineFunction &MF, RegScavenger *RS) const {
4059 auto *X86FI = MF.getInfo<X86MachineFunctionInfo>();
4060
4061 if (STI.is32Bit() && MF.hasEHFunclets())
4063 // We have emitted prolog and epilog. Don't need stack pointer saving
4064 // instruction any more.
4065 if (MachineInstr *MI = X86FI->getStackPtrSaveMI()) {
4066 MI->eraseFromParent();
4067 X86FI->setStackPtrSaveMI(nullptr);
4068 }
4069}
4070
4072 MachineFunction &MF) const {
4073 // 32-bit functions have to restore stack pointers when control is transferred
4074 // back to the parent function. These blocks are identified as eh pads that
4075 // are not funclet entries.
4076 bool IsSEH = isAsynchronousEHPersonality(
4078 for (MachineBasicBlock &MBB : MF) {
4079 bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry();
4080 if (NeedsRestore)
4082 /*RestoreSP=*/IsSEH);
4083 }
4084}
unsigned const MachineRegisterInfo * MRI
static bool isFuncletReturnInstr(const MachineInstr &MI)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const uint64_t kSplitStackAvailable
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Given that RA is a live value
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static cl::opt< int > PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), cl::init(4096), cl::Hidden)
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static bool isTailCallOpcode(unsigned Opc)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
unsigned const TargetRegisterInfo * TRI
typename CallsiteContextGraph< DerivedCCG, FuncTy, CallTy >::FuncInfo FuncInfo
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:470
static bool is64Bit(const char *name)
static unsigned calculateSetFPREG(uint64_t SPAdjust)
static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary)
GetScratchRegister - Get a temp register for performing work in the segmented stack and the Erlang/Hi...
static unsigned getADDriOpcode(bool IsLP64)
static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm)
static unsigned getLEArOpcode(bool IsLP64)
static unsigned getSUBriOpcode(bool IsLP64)
static bool flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB)
Check if the flags need to be preserved before the terminators.
static unsigned getANDriOpcode(bool IsLP64, int64_t Imm)
static bool isEAXLiveIn(MachineBasicBlock &MBB)
static unsigned getADDrrOpcode(bool IsLP64)
static bool HasNestArgument(const MachineFunction *MF)
static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD, const StringRef LiteralName)
Lookup an ERTS parameter in the !hipe.literals named metadata node.
static bool blockEndIsUnreachable(const MachineBasicBlock &MBB, MachineBasicBlock::const_iterator MBBI)
static unsigned getSUBrrOpcode(bool IsLP64)
static const unsigned FramePtr
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:158
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
BitVector & reset()
Definition: BitVector.h:392
BitVector & set()
Definition: BitVector.h:351
iterator_range< const_set_bits_iterator > set_bits() const
Definition: BitVector.h:140
static BranchProbability getOne()
static BranchProbability getZero()
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:145
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:237
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition: Function.h:813
Constant * getPersonalityFn() const
Get the personality function associated with this function.
Definition: Function.cpp:1961
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:313
size_t arg_size() const
Definition: Function.h:809
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition: Function.h:620
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:187
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:644
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:797
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int Offset)
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:547
static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register)
.cfi_restore says that the rule for Register is now the same as it was at the beginning of the functi...
Definition: MCDwarf.h:604
static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int Size)
A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE.
Definition: MCDwarf.h:638
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register)
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:540
OpType getOperation() const
Definition: MCDwarf.h:642
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int Adjustment)
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Definition: MCDwarf.h:554
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int Offset)
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:533
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition: MCDwarf.h:632
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset)
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:571
const MCObjectFileInfo * getObjectFileInfo() const
Definition: MCContext.h:450
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:448
MCSection * getCompactUnwindSection() const
MCRegAliasIterator enumerates all registers aliasing Reg.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:24
Metadata node.
Definition: Metadata.h:950
A single uniqued string.
Definition: Metadata.h:611
StringRef getString() const
Definition: Metadata.cpp:509
Machine Value Type.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isReturnBlock() const
Convenience function that returns true if the block ends in a return instruction.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the