LLVM 19.0.0git
X86FrameLowering.cpp
Go to the documentation of this file.
1//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86FrameLowering.h"
15#include "X86InstrBuilder.h"
16#include "X86InstrInfo.h"
18#include "X86Subtarget.h"
19#include "X86TargetMachine.h"
20#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/Function.h"
31#include "llvm/IR/Module.h"
32#include "llvm/MC/MCAsmInfo.h"
34#include "llvm/MC/MCSymbol.h"
35#include "llvm/Support/Debug.h"
36#include "llvm/Support/LEB128.h"
38#include <cstdlib>
39
40#define DEBUG_TYPE "x86-fl"
41
42STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
43STATISTIC(NumFrameExtraProbe,
44 "Number of extra stack probes generated in prologue");
45STATISTIC(NumFunctionUsingPush2Pop2, "Number of funtions using push2/pop2");
46
47using namespace llvm;
48
50 MaybeAlign StackAlignOverride)
51 : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(),
52 STI.is64Bit() ? -8 : -4),
53 STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
54 // Cache a bunch of frame-related predicates for this subtarget.
56 Is64Bit = STI.is64Bit();
58 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
61}
62
64 return !MF.getFrameInfo().hasVarSizedObjects() &&
65 !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
66 !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
67}
68
69/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
70/// call frame pseudos can be simplified. Having a FP, as in the default
71/// implementation, is not sufficient here since we can't always use it.
72/// Use a more nuanced condition.
74 const MachineFunction &MF) const {
75 return hasReservedCallFrame(MF) ||
76 MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
77 (hasFP(MF) && !TRI->hasStackRealignment(MF)) ||
79}
80
81// needsFrameIndexResolution - Do we need to perform FI resolution for
82// this function. Normally, this is required only when the function
83// has any stack objects. However, FI resolution actually has another job,
84// not apparent from the title - it resolves callframesetup/destroy
85// that were not simplified earlier.
86// So, this is required for x86 functions that have push sequences even
87// when there are no stack objects.
89 const MachineFunction &MF) const {
90 return MF.getFrameInfo().hasStackObjects() ||
91 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
92}
93
94/// hasFP - Return true if the specified function should have a dedicated frame
95/// pointer register. This is true if the function has variable sized allocas
96/// or if frame pointer elimination is disabled.
98 const MachineFrameInfo &MFI = MF.getFrameInfo();
99 return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
100 TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
104 MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
105 MFI.hasStackMap() || MFI.hasPatchPoint() ||
106 (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment()));
107}
108
109static unsigned getSUBriOpcode(bool IsLP64) {
110 return IsLP64 ? X86::SUB64ri32 : X86::SUB32ri;
111}
112
113static unsigned getADDriOpcode(bool IsLP64) {
114 return IsLP64 ? X86::ADD64ri32 : X86::ADD32ri;
115}
116
117static unsigned getSUBrrOpcode(bool IsLP64) {
118 return IsLP64 ? X86::SUB64rr : X86::SUB32rr;
119}
120
121static unsigned getADDrrOpcode(bool IsLP64) {
122 return IsLP64 ? X86::ADD64rr : X86::ADD32rr;
123}
124
125static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
126 return IsLP64 ? X86::AND64ri32 : X86::AND32ri;
127}
128
129static unsigned getLEArOpcode(bool IsLP64) {
130 return IsLP64 ? X86::LEA64r : X86::LEA32r;
131}
132
133static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) {
134 if (Use64BitReg) {
135 if (isUInt<32>(Imm))
136 return X86::MOV32ri64;
137 if (isInt<32>(Imm))
138 return X86::MOV64ri32;
139 return X86::MOV64ri;
140 }
141 return X86::MOV32ri;
142}
143
144// Push-Pop Acceleration (PPX) hint is used to indicate that the POP reads the
145// value written by the PUSH from the stack. The processor tracks these marked
146// instructions internally and fast-forwards register data between matching PUSH
147// and POP instructions, without going through memory or through the training
148// loop of the Fast Store Forwarding Predictor (FSFP). Instead, a more efficient
149// memory-renaming optimization can be used.
150//
151// The PPX hint is purely a performance hint. Instructions with this hint have
152// the same functional semantics as those without. PPX hints set by the
153// compiler that violate the balancing rule may turn off the PPX optimization,
154// but they will not affect program semantics.
155//
156// Hence, PPX is used for balanced spill/reloads (Exceptions and setjmp/longjmp
157// are not considered).
158//
159// PUSH2 and POP2 are instructions for (respectively) pushing/popping 2
160// GPRs at a time to/from the stack.
161static unsigned getPUSHOpcode(const X86Subtarget &ST) {
162 return ST.is64Bit() ? (ST.hasPPX() ? X86::PUSHP64r : X86::PUSH64r)
163 : X86::PUSH32r;
164}
165static unsigned getPOPOpcode(const X86Subtarget &ST) {
166 return ST.is64Bit() ? (ST.hasPPX() ? X86::POPP64r : X86::POP64r)
167 : X86::POP32r;
168}
169static unsigned getPUSH2Opcode(const X86Subtarget &ST) {
170 return ST.hasPPX() ? X86::PUSH2P : X86::PUSH2;
171}
172static unsigned getPOP2Opcode(const X86Subtarget &ST) {
173 return ST.hasPPX() ? X86::POP2P : X86::POP2;
174}
175
178 unsigned Reg = RegMask.PhysReg;
179
180 if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
181 Reg == X86::AH || Reg == X86::AL)
182 return true;
183 }
184
185 return false;
186}
187
188/// Check if the flags need to be preserved before the terminators.
189/// This would be the case, if the eflags is live-in of the region
190/// composed by the terminators or live-out of that region, without
191/// being defined by a terminator.
192static bool
194 for (const MachineInstr &MI : MBB.terminators()) {
195 bool BreakNext = false;
196 for (const MachineOperand &MO : MI.operands()) {
197 if (!MO.isReg())
198 continue;
199 Register Reg = MO.getReg();
200 if (Reg != X86::EFLAGS)
201 continue;
202
203 // This terminator needs an eflags that is not defined
204 // by a previous another terminator:
205 // EFLAGS is live-in of the region composed by the terminators.
206 if (!MO.isDef())
207 return true;
208 // This terminator defines the eflags, i.e., we don't need to preserve it.
209 // However, we still need to check this specific terminator does not
210 // read a live-in value.
211 BreakNext = true;
212 }
213 // We found a definition of the eflags, no need to preserve them.
214 if (BreakNext)
215 return false;
216 }
217
218 // None of the terminators use or define the eflags.
219 // Check if they are live-out, that would imply we need to preserve them.
220 for (const MachineBasicBlock *Succ : MBB.successors())
221 if (Succ->isLiveIn(X86::EFLAGS))
222 return true;
223
224 return false;
225}
226
227/// emitSPUpdate - Emit a series of instructions to increment / decrement the
228/// stack pointer by a constant value.
231 const DebugLoc &DL, int64_t NumBytes,
232 bool InEpilogue) const {
233 bool isSub = NumBytes < 0;
234 uint64_t Offset = isSub ? -NumBytes : NumBytes;
237
238 uint64_t Chunk = (1LL << 31) - 1;
239
243 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
244
245 // It's ok to not take into account large chunks when probing, as the
246 // allocation is split in smaller chunks anyway.
247 if (EmitInlineStackProbe && !InEpilogue) {
248
249 // This pseudo-instruction is going to be expanded, potentially using a
250 // loop, by inlineStackProbe().
251 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)).addImm(Offset);
252 return;
253 } else if (Offset > Chunk) {
254 // Rather than emit a long series of instructions for large offsets,
255 // load the offset into a register and do one sub/add
256 unsigned Reg = 0;
257 unsigned Rax = (unsigned)(Is64Bit ? X86::RAX : X86::EAX);
258
259 if (isSub && !isEAXLiveIn(MBB))
260 Reg = Rax;
261 else
263
264 unsigned AddSubRROpc =
266 if (Reg) {
268 .addImm(Offset)
269 .setMIFlag(Flag);
270 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr)
272 .addReg(Reg);
273 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
274 return;
275 } else if (Offset > 8 * Chunk) {
276 // If we would need more than 8 add or sub instructions (a >16GB stack
277 // frame), it's worth spilling RAX to materialize this immediate.
278 // pushq %rax
279 // movabsq +-$Offset+-SlotSize, %rax
280 // addq %rsp, %rax
281 // xchg %rax, (%rsp)
282 // movq (%rsp), %rsp
283 assert(Is64Bit && "can't have 32-bit 16GB stack frame");
284 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
286 .setMIFlag(Flag);
287 // Subtract is not commutative, so negate the offset and always use add.
288 // Subtract 8 less and add 8 more to account for the PUSH we just did.
289 if (isSub)
290 Offset = -(Offset - SlotSize);
291 else
294 .addImm(Offset)
295 .setMIFlag(Flag);
296 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax)
297 .addReg(Rax)
299 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
300 // Exchange the new SP in RAX with the top of the stack.
302 BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax),
303 StackPtr, false, 0);
304 // Load new SP from the top of the stack into RSP.
305 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr),
306 StackPtr, false, 0);
307 return;
308 }
309 }
310
311 while (Offset) {
312 uint64_t ThisVal = std::min(Offset, Chunk);
313 if (ThisVal == SlotSize) {
314 // Use push / pop for slot sized adjustments as a size optimization. We
315 // need to find a dead register when using pop.
316 unsigned Reg = isSub ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
318 if (Reg) {
319 unsigned Opc = isSub ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
320 : (Is64Bit ? X86::POP64r : X86::POP32r);
321 BuildMI(MBB, MBBI, DL, TII.get(Opc))
322 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub))
323 .setMIFlag(Flag);
324 Offset -= ThisVal;
325 continue;
326 }
327 }
328
329 BuildStackAdjustment(MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue)
330 .setMIFlag(Flag);
331
332 Offset -= ThisVal;
333 }
334}
335
336MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
338 const DebugLoc &DL, int64_t Offset, bool InEpilogue) const {
339 assert(Offset != 0 && "zero offset stack adjustment requested");
340
341 // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue
342 // is tricky.
343 bool UseLEA;
344 if (!InEpilogue) {
345 // Check if inserting the prologue at the beginning
346 // of MBB would require to use LEA operations.
347 // We need to use LEA operations if EFLAGS is live in, because
348 // it means an instruction will read it before it gets defined.
349 UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS);
350 } else {
351 // If we can use LEA for SP but we shouldn't, check that none
352 // of the terminators uses the eflags. Otherwise we will insert
353 // a ADD that will redefine the eflags and break the condition.
354 // Alternatively, we could move the ADD, but this may not be possible
355 // and is an optimization anyway.
357 if (UseLEA && !STI.useLeaForSP())
359 // If that assert breaks, that means we do not do the right thing
360 // in canUseAsEpilogue.
362 "We shouldn't have allowed this insertion point");
363 }
364
366 if (UseLEA) {
369 StackPtr),
370 StackPtr, false, Offset);
371 } else {
372 bool IsSub = Offset < 0;
373 uint64_t AbsOffset = IsSub ? -Offset : Offset;
374 const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr)
376 MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
378 .addImm(AbsOffset);
379 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
380 }
381 return MI;
382}
383
386 bool doMergeWithPrevious) const {
387 if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
388 (!doMergeWithPrevious && MBBI == MBB.end()))
389 return 0;
390
391 MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
392
394 // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI
395 // instruction, and that there are no DBG_VALUE or other instructions between
396 // ADD/SUB/LEA and its corresponding CFI instruction.
397 /* TODO: Add support for the case where there are multiple CFI instructions
398 below the ADD/SUB/LEA, e.g.:
399 ...
400 add
401 cfi_def_cfa_offset
402 cfi_offset
403 ...
404 */
405 if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
406 PI = std::prev(PI);
407
408 unsigned Opc = PI->getOpcode();
409 int Offset = 0;
410
411 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
412 PI->getOperand(0).getReg() == StackPtr) {
413 assert(PI->getOperand(1).getReg() == StackPtr);
414 Offset = PI->getOperand(2).getImm();
415 } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
416 PI->getOperand(0).getReg() == StackPtr &&
417 PI->getOperand(1).getReg() == StackPtr &&
418 PI->getOperand(2).getImm() == 1 &&
419 PI->getOperand(3).getReg() == X86::NoRegister &&
420 PI->getOperand(5).getReg() == X86::NoRegister) {
421 // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
422 Offset = PI->getOperand(4).getImm();
423 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
424 PI->getOperand(0).getReg() == StackPtr) {
425 assert(PI->getOperand(1).getReg() == StackPtr);
426 Offset = -PI->getOperand(2).getImm();
427 } else
428 return 0;
429
430 PI = MBB.erase(PI);
431 if (PI != MBB.end() && PI->isCFIInstruction()) {
432 auto CIs = MBB.getParent()->getFrameInstructions();
433 MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()];
436 PI = MBB.erase(PI);
437 }
438 if (!doMergeWithPrevious)
440
441 return Offset;
442}
443
446 const DebugLoc &DL,
447 const MCCFIInstruction &CFIInst,
448 MachineInstr::MIFlag Flag) const {
450 unsigned CFIIndex = MF.addFrameInst(CFIInst);
451
453 MF.getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true);
454
455 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
456 .addCFIIndex(CFIIndex)
457 .setMIFlag(Flag);
458}
459
460/// Emits Dwarf Info specifying offsets of callee saved registers and
461/// frame pointer. This is called only when basic block sections are enabled.
465 if (!hasFP(MF)) {
467 return;
468 }
469 const MachineModuleInfo &MMI = MF.getMMI();
472 const Register MachineFramePtr =
474 : FramePtr;
475 unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true);
476 // Offset = space for return address + size of the frame pointer itself.
477 unsigned Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4);
479 MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset));
481}
482
485 const DebugLoc &DL, bool IsPrologue) const {
487 MachineFrameInfo &MFI = MF.getFrameInfo();
488 MachineModuleInfo &MMI = MF.getMMI();
491
492 // Add callee saved registers to move list.
493 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
494
495 // Calculate offsets.
496 for (const CalleeSavedInfo &I : CSI) {
497 int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
498 Register Reg = I.getReg();
499 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
500
501 if (IsPrologue) {
502 if (X86FI->getStackPtrSaveMI()) {
503 // +2*SlotSize because there is return address and ebp at the bottom
504 // of the stack.
505 // | retaddr |
506 // | ebp |
507 // | |<--ebp
508 Offset += 2 * SlotSize;
509 SmallString<64> CfaExpr;
510 CfaExpr.push_back(dwarf::DW_CFA_expression);
511 uint8_t buffer[16];
512 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
513 CfaExpr.push_back(2);
515 const Register MachineFramePtr =
518 : FramePtr;
519 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
520 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
521 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
523 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
525 } else {
527 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
528 }
529 } else {
531 MCCFIInstruction::createRestore(nullptr, DwarfReg));
532 }
533 }
534 if (auto *MI = X86FI->getStackPtrSaveMI()) {
535 int FI = MI->getOperand(1).getIndex();
536 int64_t Offset = MFI.getObjectOffset(FI) + 2 * SlotSize;
537 SmallString<64> CfaExpr;
539 const Register MachineFramePtr =
542 : FramePtr;
543 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
544 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
545 uint8_t buffer[16];
546 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
547 CfaExpr.push_back(dwarf::DW_OP_deref);
548
549 SmallString<64> DefCfaExpr;
550 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
551 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
552 DefCfaExpr.append(CfaExpr.str());
553 // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref
555 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
557 }
558}
559
560void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
561 MachineBasicBlock &MBB) const {
562 const MachineFunction &MF = *MBB.getParent();
563
564 // Insertion point.
566
567 // Fake a debug loc.
568 DebugLoc DL;
569 if (MBBI != MBB.end())
570 DL = MBBI->getDebugLoc();
571
572 // Zero out FP stack if referenced. Do this outside of the loop below so that
573 // it's done only once.
574 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
575 for (MCRegister Reg : RegsToZero.set_bits()) {
576 if (!X86::RFP80RegClass.contains(Reg))
577 continue;
578
579 unsigned NumFPRegs = ST.is64Bit() ? 8 : 7;
580 for (unsigned i = 0; i != NumFPRegs; ++i)
581 BuildMI(MBB, MBBI, DL, TII.get(X86::LD_F0));
582
583 for (unsigned i = 0; i != NumFPRegs; ++i)
584 BuildMI(MBB, MBBI, DL, TII.get(X86::ST_FPrr)).addReg(X86::ST0);
585 break;
586 }
587
588 // For GPRs, we only care to clear out the 32-bit register.
589 BitVector GPRsToZero(TRI->getNumRegs());
590 for (MCRegister Reg : RegsToZero.set_bits())
591 if (TRI->isGeneralPurposeRegister(MF, Reg)) {
592 GPRsToZero.set(getX86SubSuperRegister(Reg, 32));
593 RegsToZero.reset(Reg);
594 }
595
596 // Zero out the GPRs first.
597 for (MCRegister Reg : GPRsToZero.set_bits())
599
600 // Zero out the remaining registers.
601 for (MCRegister Reg : RegsToZero.set_bits())
603}
604
607 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
608 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
611 if (InProlog) {
612 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING))
613 .addImm(0 /* no explicit stack size */);
614 } else {
615 emitStackProbeInline(MF, MBB, MBBI, DL, false);
616 }
617 } else {
618 emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum);
619 }
620}
621
623 return STI.isOSWindows() && !STI.isTargetWin64();
624}
625
627 MachineBasicBlock &PrologMBB) const {
628 auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
629 return MI.getOpcode() == X86::STACKALLOC_W_PROBING;
630 });
631 if (Where != PrologMBB.end()) {
632 DebugLoc DL = PrologMBB.findDebugLoc(Where);
633 emitStackProbeInline(MF, PrologMBB, Where, DL, true);
634 Where->eraseFromParent();
635 }
636}
637
638void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
641 const DebugLoc &DL,
642 bool InProlog) const {
644 if (STI.isTargetWindowsCoreCLR() && STI.is64Bit())
645 emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog);
646 else
647 emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog);
648}
649
650void X86FrameLowering::emitStackProbeInlineGeneric(
652 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
653 MachineInstr &AllocWithProbe = *MBBI;
654 uint64_t Offset = AllocWithProbe.getOperand(0).getImm();
655
658 assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&
659 "different expansion expected for CoreCLR 64 bit");
660
661 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
662 uint64_t ProbeChunk = StackProbeSize * 8;
663
664 uint64_t MaxAlign =
665 TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
666
667 // Synthesize a loop or unroll it, depending on the number of iterations.
668 // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
669 // between the unaligned rsp and current rsp.
670 if (Offset > ProbeChunk) {
671 emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
672 MaxAlign % StackProbeSize);
673 } else {
674 emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
675 MaxAlign % StackProbeSize);
676 }
677}
678
679void X86FrameLowering::emitStackProbeInlineGenericBlock(
682 uint64_t AlignOffset) const {
683
684 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
685 const bool HasFP = hasFP(MF);
688 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
689 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
690
691 uint64_t CurrentOffset = 0;
692
693 assert(AlignOffset < StackProbeSize);
694
695 // If the offset is so small it fits within a page, there's nothing to do.
696 if (StackProbeSize < Offset + AlignOffset) {
697
698 uint64_t StackAdjustment = StackProbeSize - AlignOffset;
699 BuildStackAdjustment(MBB, MBBI, DL, -StackAdjustment, /*InEpilogue=*/false)
701 if (!HasFP && NeedsDwarfCFI) {
702 BuildCFI(
703 MBB, MBBI, DL,
704 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
705 }
706
707 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
709 StackPtr, false, 0)
710 .addImm(0)
712 NumFrameExtraProbe++;
713 CurrentOffset = StackProbeSize - AlignOffset;
714 }
715
716 // For the next N - 1 pages, just probe. I tried to take advantage of
717 // natural probes but it implies much more logic and there was very few
718 // interesting natural probes to interleave.
719 while (CurrentOffset + StackProbeSize < Offset) {
720 BuildStackAdjustment(MBB, MBBI, DL, -StackProbeSize, /*InEpilogue=*/false)
722
723 if (!HasFP && NeedsDwarfCFI) {
724 BuildCFI(
725 MBB, MBBI, DL,
726 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize));
727 }
728 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
730 StackPtr, false, 0)
731 .addImm(0)
733 NumFrameExtraProbe++;
734 CurrentOffset += StackProbeSize;
735 }
736
737 // No need to probe the tail, it is smaller than a Page.
738 uint64_t ChunkSize = Offset - CurrentOffset;
739 if (ChunkSize == SlotSize) {
740 // Use push for slot sized adjustments as a size optimization,
741 // like emitSPUpdate does when not probing.
742 unsigned Reg = Is64Bit ? X86::RAX : X86::EAX;
743 unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
744 BuildMI(MBB, MBBI, DL, TII.get(Opc))
747 } else {
748 BuildStackAdjustment(MBB, MBBI, DL, -ChunkSize, /*InEpilogue=*/false)
750 }
751 // No need to adjust Dwarf CFA offset here, the last position of the stack has
752 // been defined
753}
754
755void X86FrameLowering::emitStackProbeInlineGenericLoop(
758 uint64_t AlignOffset) const {
759 assert(Offset && "null offset");
760
761 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
763 "Inline stack probe loop will clobber live EFLAGS.");
764
765 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
766 const bool HasFP = hasFP(MF);
769 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
770 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
771
772 if (AlignOffset) {
773 if (AlignOffset < StackProbeSize) {
774 // Perform a first smaller allocation followed by a probe.
775 BuildStackAdjustment(MBB, MBBI, DL, -AlignOffset, /*InEpilogue=*/false)
777
778 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
780 StackPtr, false, 0)
781 .addImm(0)
783 NumFrameExtraProbe++;
784 Offset -= AlignOffset;
785 }
786 }
787
788 // Synthesize a loop
789 NumFrameLoopProbe++;
790 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
791
792 MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB);
793 MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB);
794
796 MF.insert(MBBIter, testMBB);
797 MF.insert(MBBIter, tailMBB);
798
799 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
800 : Is64Bit ? X86::R11D
801 : X86::EAX;
802
803 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
806
807 // save loop bound
808 {
809 const unsigned BoundOffset = alignDown(Offset, StackProbeSize);
810 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
811 BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
812 .addReg(FinalStackProbed)
813 .addImm(BoundOffset)
815
816 // while in the loop, use loop-invariant reg for CFI,
817 // instead of the stack pointer, which changes during the loop
818 if (!HasFP && NeedsDwarfCFI) {
819 // x32 uses the same DWARF register numbers as x86-64,
820 // so there isn't a register number for r11d, we must use r11 instead
821 const Register DwarfFinalStackProbed =
823 ? Register(getX86SubSuperRegister(FinalStackProbed, 64))
824 : FinalStackProbed;
825
828 nullptr, TRI->getDwarfRegNum(DwarfFinalStackProbed, true)));
830 MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset));
831 }
832 }
833
834 // allocate a page
835 BuildStackAdjustment(*testMBB, testMBB->end(), DL, -StackProbeSize,
836 /*InEpilogue=*/false)
838
839 // touch the page
840 addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
842 StackPtr, false, 0)
843 .addImm(0)
845
846 // cmp with stack pointer bound
847 BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
849 .addReg(FinalStackProbed)
851
852 // jump
853 BuildMI(testMBB, DL, TII.get(X86::JCC_1))
854 .addMBB(testMBB)
857 testMBB->addSuccessor(testMBB);
858 testMBB->addSuccessor(tailMBB);
859
860 // BB management
861 tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end());
863 MBB.addSuccessor(testMBB);
864
865 // handle tail
866 const uint64_t TailOffset = Offset % StackProbeSize;
867 MachineBasicBlock::iterator TailMBBIter = tailMBB->begin();
868 if (TailOffset) {
869 BuildStackAdjustment(*tailMBB, TailMBBIter, DL, -TailOffset,
870 /*InEpilogue=*/false)
872 }
873
874 // after the loop, switch back to stack pointer for CFI
875 if (!HasFP && NeedsDwarfCFI) {
876 // x32 uses the same DWARF register numbers as x86-64,
877 // so there isn't a register number for esp, we must use rsp instead
878 const Register DwarfStackPtr =
882
883 BuildCFI(*tailMBB, TailMBBIter, DL,
885 nullptr, TRI->getDwarfRegNum(DwarfStackPtr, true)));
886 }
887
888 // Update Live In information
889 fullyRecomputeLiveIns({tailMBB, testMBB});
890}
891
892void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
894 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
896 assert(STI.is64Bit() && "different expansion needed for 32 bit");
897 assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
899 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
900
901 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
903 "Inline stack probe loop will clobber live EFLAGS.");
904
905 // RAX contains the number of bytes of desired stack adjustment.
906 // The handling here assumes this value has already been updated so as to
907 // maintain stack alignment.
908 //
909 // We need to exit with RSP modified by this amount and execute suitable
910 // page touches to notify the OS that we're growing the stack responsibly.
911 // All stack probing must be done without modifying RSP.
912 //
913 // MBB:
914 // SizeReg = RAX;
915 // ZeroReg = 0
916 // CopyReg = RSP
917 // Flags, TestReg = CopyReg - SizeReg
918 // FinalReg = !Flags.Ovf ? TestReg : ZeroReg
919 // LimitReg = gs magic thread env access
920 // if FinalReg >= LimitReg goto ContinueMBB
921 // RoundBB:
922 // RoundReg = page address of FinalReg
923 // LoopMBB:
924 // LoopReg = PHI(LimitReg,ProbeReg)
925 // ProbeReg = LoopReg - PageSize
926 // [ProbeReg] = 0
927 // if (ProbeReg > RoundReg) goto LoopMBB
928 // ContinueMBB:
929 // RSP = RSP - RAX
930 // [rest of original MBB]
931
932 // Set up the new basic blocks
933 MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB);
934 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
935 MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB);
936
937 MachineFunction::iterator MBBIter = std::next(MBB.getIterator());
938 MF.insert(MBBIter, RoundMBB);
939 MF.insert(MBBIter, LoopMBB);
940 MF.insert(MBBIter, ContinueMBB);
941
942 // Split MBB and move the tail portion down to ContinueMBB.
943 MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI);
944 ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end());
946
947 // Some useful constants
948 const int64_t ThreadEnvironmentStackLimit = 0x10;
949 const int64_t PageSize = 0x1000;
950 const int64_t PageMask = ~(PageSize - 1);
951
952 // Registers we need. For the normal case we use virtual
953 // registers. For the prolog expansion we use RAX, RCX and RDX.
955 const TargetRegisterClass *RegClass = &X86::GR64RegClass;
956 const Register
957 SizeReg = InProlog ? X86::RAX : MRI.createVirtualRegister(RegClass),
958 ZeroReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
959 CopyReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
960 TestReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
961 FinalReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
962 RoundedReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
963 LimitReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
964 JoinReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
965 ProbeReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass);
966
967 // SP-relative offsets where we can save RCX and RDX.
968 int64_t RCXShadowSlot = 0;
969 int64_t RDXShadowSlot = 0;
970
971 // If inlining in the prolog, save RCX and RDX.
972 if (InProlog) {
973 // Compute the offsets. We need to account for things already
974 // pushed onto the stack at this point: return address, frame
975 // pointer (if used), and callee saves.
977 const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
978 const bool HasFP = hasFP(MF);
979
980 // Check if we need to spill RCX and/or RDX.
981 // Here we assume that no earlier prologue instruction changes RCX and/or
982 // RDX, so checking the block live-ins is enough.
983 const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX);
984 const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX);
985 int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
986 // Assign the initial slot to both registers, then change RDX's slot if both
987 // need to be spilled.
988 if (IsRCXLiveIn)
989 RCXShadowSlot = InitSlot;
990 if (IsRDXLiveIn)
991 RDXShadowSlot = InitSlot;
992 if (IsRDXLiveIn && IsRCXLiveIn)
993 RDXShadowSlot += 8;
994 // Emit the saves if needed.
995 if (IsRCXLiveIn)
996 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
997 RCXShadowSlot)
998 .addReg(X86::RCX);
999 if (IsRDXLiveIn)
1000 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
1001 RDXShadowSlot)
1002 .addReg(X86::RDX);
1003 } else {
1004 // Not in the prolog. Copy RAX to a virtual reg.
1005 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
1006 }
1007
1008 // Add code to MBB to check for overflow and set the new target stack pointer
1009 // to zero if so.
1010 BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg)
1011 .addReg(ZeroReg, RegState::Undef)
1012 .addReg(ZeroReg, RegState::Undef);
1013 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP);
1014 BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
1015 .addReg(CopyReg)
1016 .addReg(SizeReg);
1017 BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg)
1018 .addReg(TestReg)
1019 .addReg(ZeroReg)
1021
1022 // FinalReg now holds final stack pointer value, or zero if
1023 // allocation would overflow. Compare against the current stack
1024 // limit from the thread environment block. Note this limit is the
1025 // lowest touched page on the stack, not the point at which the OS
1026 // will cause an overflow exception, so this is just an optimization
1027 // to avoid unnecessarily touching pages that are below the current
1028 // SP but already committed to the stack by the OS.
1029 BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg)
1030 .addReg(0)
1031 .addImm(1)
1032 .addReg(0)
1033 .addImm(ThreadEnvironmentStackLimit)
1034 .addReg(X86::GS);
1035 BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
1036 // Jump if the desired stack pointer is at or above the stack limit.
1037 BuildMI(&MBB, DL, TII.get(X86::JCC_1))
1038 .addMBB(ContinueMBB)
1040
1041 // Add code to roundMBB to round the final stack pointer to a page boundary.
1042 RoundMBB->addLiveIn(FinalReg);
1043 BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
1044 .addReg(FinalReg)
1045 .addImm(PageMask);
1046 BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB);
1047
1048 // LimitReg now holds the current stack limit, RoundedReg page-rounded
1049 // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
1050 // and probe until we reach RoundedReg.
1051 if (!InProlog) {
1052 BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg)
1053 .addReg(LimitReg)
1054 .addMBB(RoundMBB)
1055 .addReg(ProbeReg)
1056 .addMBB(LoopMBB);
1057 }
1058
1059 LoopMBB->addLiveIn(JoinReg);
1060 addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
1061 false, -PageSize);
1062
1063 // Probe by storing a byte onto the stack.
1064 BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi))
1065 .addReg(ProbeReg)
1066 .addImm(1)
1067 .addReg(0)
1068 .addImm(0)
1069 .addReg(0)
1070 .addImm(0);
1071
1072 LoopMBB->addLiveIn(RoundedReg);
1073 BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
1074 .addReg(RoundedReg)
1075 .addReg(ProbeReg);
1076 BuildMI(LoopMBB, DL, TII.get(X86::JCC_1))
1077 .addMBB(LoopMBB)
1079
1080 MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
1081
1082 // If in prolog, restore RDX and RCX.
1083 if (InProlog) {
1084 if (RCXShadowSlot) // It means we spilled RCX in the prologue.
1085 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1086 TII.get(X86::MOV64rm), X86::RCX),
1087 X86::RSP, false, RCXShadowSlot);
1088 if (RDXShadowSlot) // It means we spilled RDX in the prologue.
1089 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1090 TII.get(X86::MOV64rm), X86::RDX),
1091 X86::RSP, false, RDXShadowSlot);
1092 }
1093
1094 // Now that the probing is done, add code to continueMBB to update
1095 // the stack pointer for real.
1096 ContinueMBB->addLiveIn(SizeReg);
1097 BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
1098 .addReg(X86::RSP)
1099 .addReg(SizeReg);
1100
1101 // Add the control flow edges we need.
1102 MBB.addSuccessor(ContinueMBB);
1103 MBB.addSuccessor(RoundMBB);
1104 RoundMBB->addSuccessor(LoopMBB);
1105 LoopMBB->addSuccessor(ContinueMBB);
1106 LoopMBB->addSuccessor(LoopMBB);
1107
1108 // Mark all the instructions added to the prolog as frame setup.
1109 if (InProlog) {
1110 for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) {
1111 BeforeMBBI->setFlag(MachineInstr::FrameSetup);
1112 }
1113 for (MachineInstr &MI : *RoundMBB) {
1115 }
1116 for (MachineInstr &MI : *LoopMBB) {
1118 }
1119 for (MachineInstr &MI :
1120 llvm::make_range(ContinueMBB->begin(), ContinueMBBI)) {
1122 }
1123 }
1124}
1125
1126void X86FrameLowering::emitStackProbeCall(
1128 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
1129 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
1130 bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
1131
1132 // FIXME: Add indirect thunk support and remove this.
1133 if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
1134 report_fatal_error("Emitting stack probe calls on 64-bit with the large "
1135 "code model and indirect thunks not yet implemented.");
1136
1137 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
1139 "Stack probe calls will clobber live EFLAGS.");
1140
1141 unsigned CallOp;
1142 if (Is64Bit)
1143 CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
1144 else
1145 CallOp = X86::CALLpcrel32;
1146
1148
1150 MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI);
1151
1152 // All current stack probes take AX and SP as input, clobber flags, and
1153 // preserve all registers. x86_64 probes leave RSP unmodified.
1155 // For the large code model, we have to call through a register. Use R11,
1156 // as it is scratch in all supported calling conventions.
1157 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
1159 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
1160 } else {
1161 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp))
1163 }
1164
1165 unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX;
1166 unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP;
1172
1173 MachineInstr *ModInst = CI;
1174 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1175 // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
1176 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
1177 // themselves. They also does not clobber %rax so we can reuse it when
1178 // adjusting %rsp.
1179 // All other platforms do not specify a particular ABI for the stack probe
1180 // function, so we arbitrarily define it to not adjust %esp/%rsp itself.
1181 ModInst =
1183 .addReg(SP)
1184 .addReg(AX);
1185 }
1186
1187 // DebugInfo variable locations -- if there's an instruction number for the
1188 // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that
1189 // modifies SP.
1190 if (InstrNum) {
1191 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1192 // Label destination operand of the subtract.
1193 MF.makeDebugValueSubstitution(*InstrNum,
1194 {ModInst->getDebugInstrNum(), 0});
1195 } else {
1196 // Label the call. The operand number is the penultimate operand, zero
1197 // based.
1198 unsigned SPDefOperand = ModInst->getNumOperands() - 2;
1200 *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand});
1201 }
1202 }
1203
1204 if (InProlog) {
1205 // Apply the frame setup flag to all inserted instrs.
1206 for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
1207 ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
1208 }
1209}
1210
1211static unsigned calculateSetFPREG(uint64_t SPAdjust) {
1212 // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
1213 // and might require smaller successive adjustments.
1214 const uint64_t Win64MaxSEHOffset = 128;
1215 uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
1216 // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
1217 return SEHFrameOffset & -16;
1218}
1219
1220// If we're forcing a stack realignment we can't rely on just the frame
1221// info, we need to know the ABI stack alignment as well in case we
1222// have a call out. Otherwise just make sure we have some alignment - we'll
1223// go with the minimum SlotSize.
1225X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
1226 const MachineFrameInfo &MFI = MF.getFrameInfo();
1227 Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment.
1228 Align StackAlign = getStackAlign();
1229 bool HasRealign = MF.getFunction().hasFnAttribute("stackrealign");
1230 if (HasRealign) {
1231 if (MFI.hasCalls())
1232 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
1233 else if (MaxAlign < SlotSize)
1234 MaxAlign = Align(SlotSize);
1235 }
1236
1238 if (HasRealign)
1239 MaxAlign = (MaxAlign > 16) ? MaxAlign : Align(16);
1240 else
1241 MaxAlign = Align(16);
1242 }
1243 return MaxAlign.value();
1244}
1245
1246void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
1248 const DebugLoc &DL, unsigned Reg,
1249 uint64_t MaxAlign) const {
1250 uint64_t Val = -MaxAlign;
1251 unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
1252
1253 MachineFunction &MF = *MBB.getParent();
1255 const X86TargetLowering &TLI = *STI.getTargetLowering();
1256 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
1257 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
1258
1259 // We want to make sure that (in worst case) less than StackProbeSize bytes
1260 // are not probed after the AND. This assumption is used in
1261 // emitStackProbeInlineGeneric.
1262 if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
1263 {
1264 NumFrameLoopProbe++;
1265 MachineBasicBlock *entryMBB =
1267 MachineBasicBlock *headMBB =
1269 MachineBasicBlock *bodyMBB =
1271 MachineBasicBlock *footMBB =
1273
1275 MF.insert(MBBIter, entryMBB);
1276 MF.insert(MBBIter, headMBB);
1277 MF.insert(MBBIter, bodyMBB);
1278 MF.insert(MBBIter, footMBB);
1279 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
1280 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
1281 : Is64Bit ? X86::R11D
1282 : X86::EAX;
1283
1284 // Setup entry block
1285 {
1286
1287 entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI);
1288 BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
1291 MachineInstr *MI =
1292 BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed)
1293 .addReg(FinalStackProbed)
1294 .addImm(Val)
1296
1297 // The EFLAGS implicit def is dead.
1298 MI->getOperand(3).setIsDead();
1299
1300 BuildMI(entryMBB, DL,
1301 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1302 .addReg(FinalStackProbed)
1305 BuildMI(entryMBB, DL, TII.get(X86::JCC_1))
1306 .addMBB(&MBB)
1309 entryMBB->addSuccessor(headMBB);
1310 entryMBB->addSuccessor(&MBB);
1311 }
1312
1313 // Loop entry block
1314
1315 {
1316 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1317 BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
1319 .addImm(StackProbeSize)
1321
1322 BuildMI(headMBB, DL,
1323 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1325 .addReg(FinalStackProbed)
1327
1328 // jump to the footer if StackPtr < FinalStackProbed
1329 BuildMI(headMBB, DL, TII.get(X86::JCC_1))
1330 .addMBB(footMBB)
1333
1334 headMBB->addSuccessor(bodyMBB);
1335 headMBB->addSuccessor(footMBB);
1336 }
1337
1338 // setup loop body
1339 {
1340 addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc))
1342 StackPtr, false, 0)
1343 .addImm(0)
1345
1346 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1347 BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
1349 .addImm(StackProbeSize)
1351
1352 // cmp with stack pointer bound
1353 BuildMI(bodyMBB, DL,
1354 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1355 .addReg(FinalStackProbed)
1358
1359 // jump back while FinalStackProbed < StackPtr
1360 BuildMI(bodyMBB, DL, TII.get(X86::JCC_1))
1361 .addMBB(bodyMBB)
1364 bodyMBB->addSuccessor(bodyMBB);
1365 bodyMBB->addSuccessor(footMBB);
1366 }
1367
1368 // setup loop footer
1369 {
1370 BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr)
1371 .addReg(FinalStackProbed)
1373 addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc))
1375 StackPtr, false, 0)
1376 .addImm(0)
1378 footMBB->addSuccessor(&MBB);
1379 }
1380
1381 fullyRecomputeLiveIns({footMBB, bodyMBB, headMBB, &MBB});
1382 }
1383 } else {
1384 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
1385 .addReg(Reg)
1386 .addImm(Val)
1388
1389 // The EFLAGS implicit def is dead.
1390 MI->getOperand(3).setIsDead();
1391 }
1392}
1393
1395 // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
1396 // clobbered by any interrupt handler.
1397 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1398 "MF used frame lowering for wrong subtarget");
1399 const Function &Fn = MF.getFunction();
1400 const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
1401 return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone);
1402}
1403
1404/// Return true if we need to use the restricted Windows x64 prologue and
1405/// epilogue code patterns that can be described with WinCFI (.seh_*
1406/// directives).
1407bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const {
1408 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
1409}
1410
1411bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
1412 return !isWin64Prologue(MF) && MF.needsFrameMoves();
1413}
1414
1415/// Return true if an opcode is part of the REP group of instructions
1416static bool isOpcodeRep(unsigned Opcode) {
1417 switch (Opcode) {
1418 case X86::REPNE_PREFIX:
1419 case X86::REP_MOVSB_32:
1420 case X86::REP_MOVSB_64:
1421 case X86::REP_MOVSD_32:
1422 case X86::REP_MOVSD_64:
1423 case X86::REP_MOVSQ_32:
1424 case X86::REP_MOVSQ_64:
1425 case X86::REP_MOVSW_32:
1426 case X86::REP_MOVSW_64:
1427 case X86::REP_PREFIX:
1428 case X86::REP_STOSB_32:
1429 case X86::REP_STOSB_64:
1430 case X86::REP_STOSD_32:
1431 case X86::REP_STOSD_64:
1432 case X86::REP_STOSQ_32:
1433 case X86::REP_STOSQ_64:
1434 case X86::REP_STOSW_32:
1435 case X86::REP_STOSW_64:
1436 return true;
1437 default:
1438 break;
1439 }
1440 return false;
1441}
1442
1443/// emitPrologue - Push callee-saved registers onto the stack, which
1444/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
1445/// space for local variables. Also emit labels used by the exception handler to
1446/// generate the exception handling frames.
1447
1448/*
1449 Here's a gist of what gets emitted:
1450
1451 ; Establish frame pointer, if needed
1452 [if needs FP]
1453 push %rbp
1454 .cfi_def_cfa_offset 16
1455 .cfi_offset %rbp, -16
1456 .seh_pushreg %rpb
1457 mov %rsp, %rbp
1458 .cfi_def_cfa_register %rbp
1459
1460 ; Spill general-purpose registers
1461 [for all callee-saved GPRs]
1462 pushq %<reg>
1463 [if not needs FP]
1464 .cfi_def_cfa_offset (offset from RETADDR)
1465 .seh_pushreg %<reg>
1466
1467 ; If the required stack alignment > default stack alignment
1468 ; rsp needs to be re-aligned. This creates a "re-alignment gap"
1469 ; of unknown size in the stack frame.
1470 [if stack needs re-alignment]
1471 and $MASK, %rsp
1472
1473 ; Allocate space for locals
1474 [if target is Windows and allocated space > 4096 bytes]
1475 ; Windows needs special care for allocations larger
1476 ; than one page.
1477 mov $NNN, %rax
1478 call ___chkstk_ms/___chkstk
1479 sub %rax, %rsp
1480 [else]
1481 sub $NNN, %rsp
1482
1483 [if needs FP]
1484 .seh_stackalloc (size of XMM spill slots)
1485 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
1486 [else]
1487 .seh_stackalloc NNN
1488
1489 ; Spill XMMs
1490 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
1491 ; they may get spilled on any platform, if the current function
1492 ; calls @llvm.eh.unwind.init
1493 [if needs FP]
1494 [for all callee-saved XMM registers]
1495 movaps %<xmm reg>, -MMM(%rbp)
1496 [for all callee-saved XMM registers]
1497 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
1498 ; i.e. the offset relative to (%rbp - SEHFrameOffset)
1499 [else]
1500 [for all callee-saved XMM registers]
1501 movaps %<xmm reg>, KKK(%rsp)
1502 [for all callee-saved XMM registers]
1503 .seh_savexmm %<xmm reg>, KKK
1504
1505 .seh_endprologue
1506
1507 [if needs base pointer]
1508 mov %rsp, %rbx
1509 [if needs to restore base pointer]
1510 mov %rsp, -MMM(%rbp)
1511
1512 ; Emit CFI info
1513 [if needs FP]
1514 [for all callee-saved registers]
1515 .cfi_offset %<reg>, (offset from %rbp)
1516 [else]
1517 .cfi_def_cfa_offset (offset from RETADDR)
1518 [for all callee-saved registers]
1519 .cfi_offset %<reg>, (offset from %rsp)
1520
1521 Notes:
1522 - .seh directives are emitted only for Windows 64 ABI
1523 - .cv_fpo directives are emitted on win32 when emitting CodeView
1524 - .cfi directives are emitted for all other ABIs
1525 - for 32-bit code, substitute %e?? registers for %r??
1526*/
1527
1529 MachineBasicBlock &MBB) const {
1530 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1531 "MF used frame lowering for wrong subtarget");
1533 MachineFrameInfo &MFI = MF.getFrameInfo();
1534 const Function &Fn = MF.getFunction();
1535 MachineModuleInfo &MMI = MF.getMMI();
1537 uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
1538 uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate.
1539 bool IsFunclet = MBB.isEHFuncletEntry();
1541 if (Fn.hasPersonalityFn())
1542 Personality = classifyEHPersonality(Fn.getPersonalityFn());
1543 bool FnHasClrFunclet =
1544 MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
1545 bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
1546 bool HasFP = hasFP(MF);
1547 bool IsWin64Prologue = isWin64Prologue(MF);
1548 bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry();
1549 // FIXME: Emit FPO data for EH funclets.
1550 bool NeedsWinFPO =
1551 !IsFunclet && STI.isTargetWin32() && MMI.getModule()->getCodeViewFlag();
1552 bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO;
1553 bool NeedsDwarfCFI = needsDwarfCFI(MF);
1555 const Register MachineFramePtr =
1557 : FramePtr;
1558 Register BasePtr = TRI->getBaseRegister();
1559 bool HasWinCFI = false;
1560
1561 // Debug location must be unknown since the first debug location is used
1562 // to determine the end of the prologue.
1563 DebugLoc DL;
1564 Register ArgBaseReg;
1565
1566 // Emit extra prolog for argument stack slot reference.
1567 if (auto *MI = X86FI->getStackPtrSaveMI()) {
1568 // MI is lea instruction that created in X86ArgumentStackSlotPass.
1569 // Creat extra prolog for stack realignment.
1570 ArgBaseReg = MI->getOperand(0).getReg();
1571 // leal 4(%esp), %basereg
1572 // .cfi_def_cfa %basereg, 0
1573 // andl $-128, %esp
1574 // pushl -4(%basereg)
1575 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::LEA64r : X86::LEA32r),
1576 ArgBaseReg)
1578 .addImm(1)
1579 .addUse(X86::NoRegister)
1581 .addUse(X86::NoRegister)
1583 if (NeedsDwarfCFI) {
1584 // .cfi_def_cfa %basereg, 0
1585 unsigned DwarfStackPtr = TRI->getDwarfRegNum(ArgBaseReg, true);
1586 BuildCFI(MBB, MBBI, DL,
1587 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, 0),
1589 }
1590 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1591 int64_t Offset = -(int64_t)SlotSize;
1592 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm))
1593 .addReg(ArgBaseReg)
1594 .addImm(1)
1595 .addReg(X86::NoRegister)
1596 .addImm(Offset)
1597 .addReg(X86::NoRegister)
1599 }
1600
1601 // Space reserved for stack-based arguments when making a (ABI-guaranteed)
1602 // tail call.
1603 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
1604 if (TailCallArgReserveSize && IsWin64Prologue)
1605 report_fatal_error("Can't handle guaranteed tail call under win64 yet");
1606
1607 const bool EmitStackProbeCall =
1609 unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
1610
1611 if (HasFP && X86FI->hasSwiftAsyncContext()) {
1615 // The special symbol below is absolute and has a *value* suitable to be
1616 // combined with the frame pointer directly.
1617 BuildMI(MBB, MBBI, DL, TII.get(X86::OR64rm), MachineFramePtr)
1618 .addUse(MachineFramePtr)
1619 .addUse(X86::RIP)
1620 .addImm(1)
1621 .addUse(X86::NoRegister)
1622 .addExternalSymbol("swift_async_extendedFramePointerFlags",
1624 .addUse(X86::NoRegister);
1625 break;
1626 }
1627 [[fallthrough]];
1628
1630 assert(
1631 !IsWin64Prologue &&
1632 "win64 prologue does not set the bit 60 in the saved frame pointer");
1633 BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr)
1634 .addUse(MachineFramePtr)
1635 .addImm(60)
1637 break;
1638
1640 break;
1641 }
1642 }
1643
1644 // Re-align the stack on 64-bit if the x86-interrupt calling convention is
1645 // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
1646 // stack alignment.
1648 Fn.arg_size() == 2) {
1649 StackSize += 8;
1650 MFI.setStackSize(StackSize);
1651
1652 // Update the stack pointer by pushing a register. This is the instruction
1653 // emitted that would be end up being emitted by a call to `emitSPUpdate`.
1654 // Hard-coding the update to a push avoids emitting a second
1655 // `STACKALLOC_W_PROBING` instruction in the save block: We know that stack
1656 // probing isn't needed anyways for an 8-byte update.
1657 // Pushing a register leaves us in a similar situation to a regular
1658 // function call where we know that the address at (rsp-8) is writeable.
1659 // That way we avoid any off-by-ones with stack probing for additional
1660 // stack pointer updates later on.
1661 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1662 .addReg(X86::RAX, RegState::Undef)
1664 }
1665
1666 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
1667 // function, and use up to 128 bytes of stack space, don't have a frame
1668 // pointer, calls, or dynamic alloca then we do not need to adjust the
1669 // stack pointer (we fit in the Red Zone). We also check that we don't
1670 // push and pop from the stack.
1671 if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) &&
1672 !MFI.hasVarSizedObjects() && // No dynamic alloca.
1673 !MFI.adjustsStack() && // No calls.
1674 !EmitStackProbeCall && // No stack probes.
1675 !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
1676 !MF.shouldSplitStack()) { // Regular stack
1677 uint64_t MinSize =
1679 if (HasFP)
1680 MinSize += SlotSize;
1681 X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
1682 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
1683 MFI.setStackSize(StackSize);
1684 }
1685
1686 // Insert stack pointer adjustment for later moving of return addr. Only
1687 // applies to tail call optimized functions where the callee argument stack
1688 // size is bigger than the callers.
1689 if (TailCallArgReserveSize != 0) {
1690 BuildStackAdjustment(MBB, MBBI, DL, -(int)TailCallArgReserveSize,
1691 /*InEpilogue=*/false)
1693 }
1694
1695 // Mapping for machine moves:
1696 //
1697 // DST: VirtualFP AND
1698 // SRC: VirtualFP => DW_CFA_def_cfa_offset
1699 // ELSE => DW_CFA_def_cfa
1700 //
1701 // SRC: VirtualFP AND
1702 // DST: Register => DW_CFA_def_cfa_register
1703 //
1704 // ELSE
1705 // OFFSET < 0 => DW_CFA_offset_extended_sf
1706 // REG < 64 => DW_CFA_offset + Reg
1707 // ELSE => DW_CFA_offset_extended
1708
1709 uint64_t NumBytes = 0;
1710 int stackGrowth = -SlotSize;
1711
1712 // Find the funclet establisher parameter
1713 Register Establisher = X86::NoRegister;
1714 if (IsClrFunclet)
1715 Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX;
1716 else if (IsFunclet)
1717 Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX;
1718
1719 if (IsWin64Prologue && IsFunclet && !IsClrFunclet) {
1720 // Immediately spill establisher into the home slot.
1721 // The runtime cares about this.
1722 // MOV64mr %rdx, 16(%rsp)
1723 unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
1724 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16)
1725 .addReg(Establisher)
1727 MBB.addLiveIn(Establisher);
1728 }
1729
1730 if (HasFP) {
1731 assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved");
1732
1733 // Calculate required stack adjustment.
1734 uint64_t FrameSize = StackSize - SlotSize;
1735 NumBytes =
1736 FrameSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1737
1738 // Callee-saved registers are pushed on stack before the stack is realigned.
1739 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
1740 NumBytes = alignTo(NumBytes, MaxAlign);
1741
1742 // Save EBP/RBP into the appropriate stack slot.
1743 BuildMI(MBB, MBBI, DL,
1745 .addReg(MachineFramePtr, RegState::Kill)
1747
1748 if (NeedsDwarfCFI && !ArgBaseReg.isValid()) {
1749 // Mark the place where EBP/RBP was saved.
1750 // Define the current CFA rule to use the provided offset.
1751 assert(StackSize);
1752 BuildCFI(MBB, MBBI, DL,
1754 nullptr, -2 * stackGrowth + (int)TailCallArgReserveSize),
1756
1757 // Change the rule for the FramePtr to be an "offset" rule.
1758 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1759 BuildCFI(MBB, MBBI, DL,
1760 MCCFIInstruction::createOffset(nullptr, DwarfFramePtr,
1761 2 * stackGrowth -
1762 (int)TailCallArgReserveSize),
1764 }
1765
1766 if (NeedsWinCFI) {
1767 HasWinCFI = true;
1768 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1771 }
1772
1773 if (!IsFunclet) {
1774 if (X86FI->hasSwiftAsyncContext()) {
1775 assert(!IsWin64Prologue &&
1776 "win64 prologue does not store async context right below rbp");
1777 const auto &Attrs = MF.getFunction().getAttributes();
1778
1779 // Before we update the live frame pointer we have to ensure there's a
1780 // valid (or null) asynchronous context in its slot just before FP in
1781 // the frame record, so store it now.
1782 if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) {
1783 // We have an initial context in r14, store it just before the frame
1784 // pointer.
1785 MBB.addLiveIn(X86::R14);
1786 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1787 .addReg(X86::R14)
1789 } else {
1790 // No initial context, store null so that there's no pointer that
1791 // could be misused.
1792 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i32))
1793 .addImm(0)
1795 }
1796
1797 if (NeedsWinCFI) {
1798 HasWinCFI = true;
1799 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1800 .addImm(X86::R14)
1802 }
1803
1804 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr)
1805 .addUse(X86::RSP)
1806 .addImm(1)
1807 .addUse(X86::NoRegister)
1808 .addImm(8)
1809 .addUse(X86::NoRegister)
1811 BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri32), X86::RSP)
1812 .addUse(X86::RSP)
1813 .addImm(8)
1815 }
1816
1817 if (!IsWin64Prologue && !IsFunclet) {
1818 // Update EBP with the new base value.
1819 if (!X86FI->hasSwiftAsyncContext())
1820 BuildMI(MBB, MBBI, DL,
1821 TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
1822 FramePtr)
1825
1826 if (NeedsDwarfCFI) {
1827 if (ArgBaseReg.isValid()) {
1828 SmallString<64> CfaExpr;
1829 CfaExpr.push_back(dwarf::DW_CFA_expression);
1830 uint8_t buffer[16];
1831 unsigned DwarfReg = TRI->getDwarfRegNum(MachineFramePtr, true);
1832 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
1833 CfaExpr.push_back(2);
1834 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
1835 CfaExpr.push_back(0);
1836 // DW_CFA_expression: reg5 DW_OP_breg5 +0
1837 BuildCFI(MBB, MBBI, DL,
1838 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
1840 } else {
1841 // Mark effective beginning of when frame pointer becomes valid.
1842 // Define the current CFA to use the EBP/RBP register.
1843 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1844 BuildCFI(
1845 MBB, MBBI, DL,
1846 MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr),
1848 }
1849 }
1850
1851 if (NeedsWinFPO) {
1852 // .cv_fpo_setframe $FramePtr
1853 HasWinCFI = true;
1854 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
1856 .addImm(0)
1858 }
1859 }
1860 }
1861 } else {
1862 assert(!IsFunclet && "funclets without FPs not yet implemented");
1863 NumBytes =
1864 StackSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1865 }
1866
1867 // Update the offset adjustment, which is mainly used by codeview to translate
1868 // from ESP to VFRAME relative local variable offsets.
1869 if (!IsFunclet) {
1870 if (HasFP && TRI->hasStackRealignment(MF))
1871 MFI.setOffsetAdjustment(-NumBytes);
1872 else
1873 MFI.setOffsetAdjustment(-StackSize);
1874 }
1875
1876 // For EH funclets, only allocate enough space for outgoing calls. Save the
1877 // NumBytes value that we would've used for the parent frame.
1878 unsigned ParentFrameNumBytes = NumBytes;
1879 if (IsFunclet)
1880 NumBytes = getWinEHFuncletFrameSize(MF);
1881
1882 // Skip the callee-saved push instructions.
1883 bool PushedRegs = false;
1884 int StackOffset = 2 * stackGrowth;
1886 auto IsCSPush = [&](const MachineBasicBlock::iterator &MBBI) {
1887 if (MBBI == MBB.end() || !MBBI->getFlag(MachineInstr::FrameSetup))
1888 return false;
1889 unsigned Opc = MBBI->getOpcode();
1890 return Opc == X86::PUSH32r || Opc == X86::PUSH64r || Opc == X86::PUSHP64r ||
1891 Opc == X86::PUSH2 || Opc == X86::PUSH2P;
1892 };
1893
1894 while (IsCSPush(MBBI)) {
1895 PushedRegs = true;
1896 Register Reg = MBBI->getOperand(0).getReg();
1897 LastCSPush = MBBI;
1898 ++MBBI;
1899 unsigned Opc = LastCSPush->getOpcode();
1900
1901 if (!HasFP && NeedsDwarfCFI) {
1902 // Mark callee-saved push instruction.
1903 // Define the current CFA rule to use the provided offset.
1904 assert(StackSize);
1905 // Compared to push, push2 introduces more stack offset (one more
1906 // register).
1907 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1908 StackOffset += stackGrowth;
1909 BuildCFI(MBB, MBBI, DL,
1912 StackOffset += stackGrowth;
1913 }
1914
1915 if (NeedsWinCFI) {
1916 HasWinCFI = true;
1917 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1918 .addImm(Reg)
1920 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1921 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1922 .addImm(LastCSPush->getOperand(1).getReg())
1924 }
1925 }
1926
1927 // Realign stack after we pushed callee-saved registers (so that we'll be
1928 // able to calculate their offsets from the frame pointer).
1929 // Don't do this for Win64, it needs to realign the stack after the prologue.
1930 if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) &&
1931 !ArgBaseReg.isValid()) {
1932 assert(HasFP && "There should be a frame pointer if stack is realigned.");
1933 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1934
1935 if (NeedsWinCFI) {
1936 HasWinCFI = true;
1937 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign))
1938 .addImm(MaxAlign)
1940 }
1941 }
1942
1943 // If there is an SUB32ri of ESP immediately before this instruction, merge
1944 // the two. This can be the case when tail call elimination is enabled and
1945 // the callee has more arguments then the caller.
1946 NumBytes -= mergeSPUpdates(MBB, MBBI, true);
1947
1948 // Adjust stack pointer: ESP -= numbytes.
1949
1950 // Windows and cygwin/mingw require a prologue helper routine when allocating
1951 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
1952 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
1953 // stack and adjust the stack pointer in one go. The 64-bit version of
1954 // __chkstk is only responsible for probing the stack. The 64-bit prologue is
1955 // responsible for adjusting the stack pointer. Touching the stack at 4K
1956 // increments is necessary to ensure that the guard pages used by the OS
1957 // virtual memory manager are allocated in correct sequence.
1958 uint64_t AlignedNumBytes = NumBytes;
1959 if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF))
1960 AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
1961 if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) {
1962 assert(!X86FI->getUsesRedZone() &&
1963 "The Red Zone is not accounted for in stack probes");
1964
1965 // Check whether EAX is livein for this block.
1966 bool isEAXAlive = isEAXLiveIn(MBB);
1967
1968 if (isEAXAlive) {
1969 if (Is64Bit) {
1970 // Save RAX
1971 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1972 .addReg(X86::RAX, RegState::Kill)
1974 } else {
1975 // Save EAX
1976 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
1977 .addReg(X86::EAX, RegState::Kill)
1979 }
1980 }
1981
1982 if (Is64Bit) {
1983 // Handle the 64-bit Windows ABI case where we need to call __chkstk.
1984 // Function prologue is responsible for adjusting the stack pointer.
1985 int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes;
1986 BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Alloc)), X86::RAX)
1987 .addImm(Alloc)
1989 } else {
1990 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
1991 // We'll also use 4 already allocated bytes for EAX.
1992 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
1993 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
1995 }
1996
1997 // Call __chkstk, __chkstk_ms, or __alloca.
1998 emitStackProbe(MF, MBB, MBBI, DL, true);
1999
2000 if (isEAXAlive) {
2001 // Restore RAX/EAX
2003 if (Is64Bit)
2004 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX),
2005 StackPtr, false, NumBytes - 8);
2006 else
2007 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX),
2008 StackPtr, false, NumBytes - 4);
2009 MI->setFlag(MachineInstr::FrameSetup);
2010 MBB.insert(MBBI, MI);
2011 }
2012 } else if (NumBytes) {
2013 emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false);
2014 }
2015
2016 if (NeedsWinCFI && NumBytes) {
2017 HasWinCFI = true;
2018 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
2019 .addImm(NumBytes)
2021 }
2022
2023 int SEHFrameOffset = 0;
2024 unsigned SPOrEstablisher;
2025 if (IsFunclet) {
2026 if (IsClrFunclet) {
2027 // The establisher parameter passed to a CLR funclet is actually a pointer
2028 // to the (mostly empty) frame of its nearest enclosing funclet; we have
2029 // to find the root function establisher frame by loading the PSPSym from
2030 // the intermediate frame.
2031 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2032 MachinePointerInfo NoInfo;
2033 MBB.addLiveIn(Establisher);
2034 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher),
2035 Establisher, false, PSPSlotOffset)
2038 ;
2039 // Save the root establisher back into the current funclet's (mostly
2040 // empty) frame, in case a sub-funclet or the GC needs it.
2041 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr,
2042 false, PSPSlotOffset)
2043 .addReg(Establisher)
2045 NoInfo,
2048 }
2049 SPOrEstablisher = Establisher;
2050 } else {
2051 SPOrEstablisher = StackPtr;
2052 }
2053
2054 if (IsWin64Prologue && HasFP) {
2055 // Set RBP to a small fixed offset from RSP. In the funclet case, we base
2056 // this calculation on the incoming establisher, which holds the value of
2057 // RSP from the parent frame at the end of the prologue.
2058 SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
2059 if (SEHFrameOffset)
2060 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
2061 SPOrEstablisher, false, SEHFrameOffset);
2062 else
2063 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr)
2064 .addReg(SPOrEstablisher);
2065
2066 // If this is not a funclet, emit the CFI describing our frame pointer.
2067 if (NeedsWinCFI && !IsFunclet) {
2068 assert(!NeedsWinFPO && "this setframe incompatible with FPO data");
2069 HasWinCFI = true;
2070 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
2072 .addImm(SEHFrameOffset)
2074 if (isAsynchronousEHPersonality(Personality))
2075 MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset;
2076 }
2077 } else if (IsFunclet && STI.is32Bit()) {
2078 // Reset EBP / ESI to something good for funclets.
2080 // If we're a catch funclet, we can be returned to via catchret. Save ESP
2081 // into the registration node so that the runtime will restore it for us.
2082 if (!MBB.isCleanupFuncletEntry()) {
2083 assert(Personality == EHPersonality::MSVC_CXX);
2084 Register FrameReg;
2086 int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed();
2087 // ESP is the first field, so no extra displacement is needed.
2088 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg,
2089 false, EHRegOffset)
2090 .addReg(X86::ESP);
2091 }
2092 }
2093
2094 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
2095 const MachineInstr &FrameInstr = *MBBI;
2096 ++MBBI;
2097
2098 if (NeedsWinCFI) {
2099 int FI;
2100 if (Register Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
2101 if (X86::FR64RegClass.contains(Reg)) {
2102 int Offset;
2103 Register IgnoredFrameReg;
2104 if (IsWin64Prologue && IsFunclet)
2105 Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg);
2106 else
2107 Offset =
2108 getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() +
2109 SEHFrameOffset;
2110
2111 HasWinCFI = true;
2112 assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
2113 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
2114 .addImm(Reg)
2115 .addImm(Offset)
2117 }
2118 }
2119 }
2120 }
2121
2122 if (NeedsWinCFI && HasWinCFI)
2123 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
2125
2126 if (FnHasClrFunclet && !IsFunclet) {
2127 // Save the so-called Initial-SP (i.e. the value of the stack pointer
2128 // immediately after the prolog) into the PSPSlot so that funclets
2129 // and the GC can recover it.
2130 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2131 auto PSPInfo = MachinePointerInfo::getFixedStack(
2133 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false,
2134 PSPSlotOffset)
2139 }
2140
2141 // Realign stack after we spilled callee-saved registers (so that we'll be
2142 // able to calculate their offsets from the frame pointer).
2143 // Win64 requires aligning the stack after the prologue.
2144 if (IsWin64Prologue && TRI->hasStackRealignment(MF)) {
2145 assert(HasFP && "There should be a frame pointer if stack is realigned.");
2146 BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign);
2147 }
2148
2149 // We already dealt with stack realignment and funclets above.
2150 if (IsFunclet && STI.is32Bit())
2151 return;
2152
2153 // If we need a base pointer, set it up here. It's whatever the value
2154 // of the stack pointer is at this point. Any variable size objects
2155 // will be allocated after this, so we can still use the base pointer
2156 // to reference locals.
2157 if (TRI->hasBasePointer(MF)) {
2158 // Update the base pointer with the current stack pointer.
2159 unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
2160 BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
2161 .addReg(SPOrEstablisher)
2163 if (X86FI->getRestoreBasePointer()) {
2164 // Stash value of base pointer. Saving RSP instead of EBP shortens
2165 // dependence chain. Used by SjLj EH.
2166 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2167 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), FramePtr, true,
2169 .addReg(SPOrEstablisher)
2171 }
2172
2173 if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
2174 // Stash the value of the frame pointer relative to the base pointer for
2175 // Win32 EH. This supports Win32 EH, which does the inverse of the above:
2176 // it recovers the frame pointer from the base pointer rather than the
2177 // other way around.
2178 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2179 Register UsedReg;
2180 int Offset =
2181 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
2182 .getFixed();
2183 assert(UsedReg == BasePtr);
2184 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
2187 }
2188 }
2189 if (ArgBaseReg.isValid()) {
2190 // Save argument base pointer.
2191 auto *MI = X86FI->getStackPtrSaveMI();
2192 int FI = MI->getOperand(1).getIndex();
2193 unsigned MOVmr = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
2194 // movl %basereg, offset(%ebp)
2195 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), FI)
2196 .addReg(ArgBaseReg)
2198 }
2199
2200 if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
2201 // Mark end of stack pointer adjustment.
2202 if (!HasFP && NumBytes) {
2203 // Define the current CFA rule to use the provided offset.
2204 assert(StackSize);
2205 BuildCFI(
2206 MBB, MBBI, DL,
2207 MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth),
2209 }
2210
2211 // Emit DWARF info specifying the offsets of the callee-saved registers.
2213 }
2214
2215 // X86 Interrupt handling function cannot assume anything about the direction
2216 // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
2217 // in each prologue of interrupt handler function.
2218 //
2219 // Create "cld" instruction only in these cases:
2220 // 1. The interrupt handling function uses any of the "rep" instructions.
2221 // 2. Interrupt handling function calls another function.
2222 // 3. If there are any inline asm blocks, as we do not know what they do
2223 //
2224 // TODO: We should also emit cld if we detect the use of std, but as of now,
2225 // the compiler does not even emit that instruction or even define it, so in
2226 // practice, this would only happen with inline asm, which we cover anyway.
2228 bool NeedsCLD = false;
2229
2230 for (const MachineBasicBlock &B : MF) {
2231 for (const MachineInstr &MI : B) {
2232 if (MI.isCall()) {
2233 NeedsCLD = true;
2234 break;
2235 }
2236
2237 if (isOpcodeRep(MI.getOpcode())) {
2238 NeedsCLD = true;
2239 break;
2240 }
2241
2242 if (MI.isInlineAsm()) {
2243 // TODO: Parse asm for rep instructions or call sites?
2244 // For now, let's play it safe and emit a cld instruction
2245 // just in case.
2246 NeedsCLD = true;
2247 break;
2248 }
2249 }
2250 }
2251
2252 if (NeedsCLD) {
2253 BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
2255 }
2256 }
2257
2258 // At this point we know if the function has WinCFI or not.
2259 MF.setHasWinCFI(HasWinCFI);
2260}
2261
2263 const MachineFunction &MF) const {
2264 // We can't use LEA instructions for adjusting the stack pointer if we don't
2265 // have a frame pointer in the Win64 ABI. Only ADD instructions may be used
2266 // to deallocate the stack.
2267 // This means that we can use LEA for SP in two situations:
2268 // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
2269 // 2. We *have* a frame pointer which means we are permitted to use LEA.
2270 return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
2271}
2272
2274 switch (MI.getOpcode()) {
2275 case X86::CATCHRET:
2276 case X86::CLEANUPRET:
2277 return true;
2278 default:
2279 return false;
2280 }
2281 llvm_unreachable("impossible");
2282}
2283
2284// CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
2285// stack. It holds a pointer to the bottom of the root function frame. The
2286// establisher frame pointer passed to a nested funclet may point to the
2287// (mostly empty) frame of its parent funclet, but it will need to find
2288// the frame of the root function to access locals. To facilitate this,
2289// every funclet copies the pointer to the bottom of the root function
2290// frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
2291// same offset for the PSPSym in the root function frame that's used in the
2292// funclets' frames allows each funclet to dynamically accept any ancestor
2293// frame as its establisher argument (the runtime doesn't guarantee the
2294// immediate parent for some reason lost to history), and also allows the GC,
2295// which uses the PSPSym for some bookkeeping, to find it in any funclet's
2296// frame with only a single offset reported for the entire method.
2297unsigned
2298X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
2299 const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
2300 Register SPReg;
2301 int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg,
2302 /*IgnoreSPUpdates*/ true)
2303 .getFixed();
2304 assert(Offset >= 0 && SPReg == TRI->getStackRegister());
2305 return static_cast<unsigned>(Offset);
2306}
2307
2308unsigned
2309X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
2311 // This is the size of the pushed CSRs.
2312 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2313 // This is the size of callee saved XMMs.
2314 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2315 unsigned XMMSize =
2316 WinEHXMMSlotInfo.size() * TRI->getSpillSize(X86::VR128RegClass);
2317 // This is the amount of stack a funclet needs to allocate.
2318 unsigned UsedSize;
2319 EHPersonality Personality =
2321 if (Personality == EHPersonality::CoreCLR) {
2322 // CLR funclets need to hold enough space to include the PSPSym, at the
2323 // same offset from the stack pointer (immediately after the prolog) as it
2324 // resides at in the main function.
2325 UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
2326 } else {
2327 // Other funclets just need enough stack for outgoing call arguments.
2328 UsedSize = MF.getFrameInfo().getMaxCallFrameSize();
2329 }
2330 // RBP is not included in the callee saved register block. After pushing RBP,
2331 // everything is 16 byte aligned. Everything we allocate before an outgoing
2332 // call must also be 16 byte aligned.
2333 unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlign());
2334 // Subtract out the size of the callee saved registers. This is how much stack
2335 // each funclet will allocate.
2336 return FrameSizeMinusRBP + XMMSize - CSSize;
2337}
2338
2339static bool isTailCallOpcode(unsigned Opc) {
2340 return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi ||
2341 Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 ||
2342 Opc == X86::TCRETURNdi64 || Opc == X86::TCRETURNmi64;
2343}
2344
2346 MachineBasicBlock &MBB) const {
2347 const MachineFrameInfo &MFI = MF.getFrameInfo();
2350 MachineBasicBlock::iterator MBBI = Terminator;
2351 DebugLoc DL;
2352 if (MBBI != MBB.end())
2353 DL = MBBI->getDebugLoc();
2354 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
2355 const bool Is64BitILP32 = STI.isTarget64BitILP32();
2357 Register MachineFramePtr =
2358 Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
2359
2360 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2361 bool NeedsWin64CFI =
2362 IsWin64Prologue && MF.getFunction().needsUnwindTableEntry();
2363 bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI);
2364
2365 // Get the number of bytes to allocate from the FrameInfo.
2366 uint64_t StackSize = MFI.getStackSize();
2367 uint64_t MaxAlign = calculateMaxStackAlign(MF);
2368 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2369 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
2370 bool HasFP = hasFP(MF);
2371 uint64_t NumBytes = 0;
2372
2373 bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() &&
2375 MF.needsFrameMoves();
2376
2377 Register ArgBaseReg;
2378 if (auto *MI = X86FI->getStackPtrSaveMI()) {
2379 unsigned Opc = X86::LEA32r;
2380 Register StackReg = X86::ESP;
2381 ArgBaseReg = MI->getOperand(0).getReg();
2382 if (STI.is64Bit()) {
2383 Opc = X86::LEA64r;
2384 StackReg = X86::RSP;
2385 }
2386 // leal -4(%basereg), %esp
2387 // .cfi_def_cfa %esp, 4
2388 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackReg)
2389 .addUse(ArgBaseReg)
2390 .addImm(1)
2391 .addUse(X86::NoRegister)
2392 .addImm(-(int64_t)SlotSize)
2393 .addUse(X86::NoRegister)
2395 if (NeedsDwarfCFI) {
2396 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackReg, true);
2397 BuildCFI(MBB, MBBI, DL,
2398 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2400 --MBBI;
2401 }
2402 --MBBI;
2403 }
2404
2405 if (IsFunclet) {
2406 assert(HasFP && "EH funclets without FP not yet implemented");
2407 NumBytes = getWinEHFuncletFrameSize(MF);
2408 } else if (HasFP) {
2409 // Calculate required stack adjustment.
2410 uint64_t FrameSize = StackSize - SlotSize;
2411 NumBytes = FrameSize - CSSize - TailCallArgReserveSize;
2412
2413 // Callee-saved registers were pushed on stack before the stack was
2414 // realigned.
2415 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
2416 NumBytes = alignTo(FrameSize, MaxAlign);
2417 } else {
2418 NumBytes = StackSize - CSSize - TailCallArgReserveSize;
2419 }
2420 uint64_t SEHStackAllocAmt = NumBytes;
2421
2422 // AfterPop is the position to insert .cfi_restore.
2424 if (HasFP) {
2425 if (X86FI->hasSwiftAsyncContext()) {
2426 // Discard the context.
2427 int Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
2428 emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/ true);
2429 }
2430 // Pop EBP.
2431 BuildMI(MBB, MBBI, DL,
2433 MachineFramePtr)
2435
2436 // We need to reset FP to its untagged state on return. Bit 60 is currently
2437 // used to show the presence of an extended frame.
2438 if (X86FI->hasSwiftAsyncContext()) {
2439 BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8), MachineFramePtr)
2440 .addUse(MachineFramePtr)
2441 .addImm(60)
2443 }
2444
2445 if (NeedsDwarfCFI) {
2446 if (!ArgBaseReg.isValid()) {
2447 unsigned DwarfStackPtr =
2448 TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
2449 BuildCFI(MBB, MBBI, DL,
2450 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2452 }
2453 if (!MBB.succ_empty() && !MBB.isReturnBlock()) {
2454 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
2455 BuildCFI(MBB, AfterPop, DL,
2456 MCCFIInstruction::createRestore(nullptr, DwarfFramePtr),
2458 --MBBI;
2459 --AfterPop;
2460 }
2461 --MBBI;
2462 }
2463 }
2464
2465 MachineBasicBlock::iterator FirstCSPop = MBBI;
2466 // Skip the callee-saved pop instructions.
2467 while (MBBI != MBB.begin()) {
2468 MachineBasicBlock::iterator PI = std::prev(MBBI);
2469 unsigned Opc = PI->getOpcode();
2470
2471 if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
2472 if (!PI->getFlag(MachineInstr::FrameDestroy) ||
2473 (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::BTR64ri8 &&
2474 Opc != X86::ADD64ri32 && Opc != X86::POPP64r && Opc != X86::POP2 &&
2475 Opc != X86::POP2P && Opc != X86::LEA64r))
2476 break;
2477 FirstCSPop = PI;
2478 }
2479
2480 --MBBI;
2481 }
2482 if (ArgBaseReg.isValid()) {
2483 // Restore argument base pointer.
2484 auto *MI = X86FI->getStackPtrSaveMI();
2485 int FI = MI->getOperand(1).getIndex();
2486 unsigned MOVrm = Is64Bit ? X86::MOV64rm : X86::MOV32rm;
2487 // movl offset(%ebp), %basereg
2488 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVrm), ArgBaseReg), FI)
2490 }
2491 MBBI = FirstCSPop;
2492
2493 if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET)
2494 emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator);
2495
2496 if (MBBI != MBB.end())
2497 DL = MBBI->getDebugLoc();
2498 // If there is an ADD32ri or SUB32ri of ESP immediately before this
2499 // instruction, merge the two instructions.
2500 if (NumBytes || MFI.hasVarSizedObjects())
2501 NumBytes += mergeSPUpdates(MBB, MBBI, true);
2502
2503 // If dynamic alloca is used, then reset esp to point to the last callee-saved
2504 // slot before popping them off! Same applies for the case, when stack was
2505 // realigned. Don't do this if this was a funclet epilogue, since the funclets
2506 // will not do realignment or dynamic stack allocation.
2507 if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) &&
2508 !IsFunclet) {
2509 if (TRI->hasStackRealignment(MF))
2510 MBBI = FirstCSPop;
2511 unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
2512 uint64_t LEAAmount =
2513 IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
2514
2515 if (X86FI->hasSwiftAsyncContext())
2516 LEAAmount -= 16;
2517
2518 // There are only two legal forms of epilogue:
2519 // - add SEHAllocationSize, %rsp
2520 // - lea SEHAllocationSize(%FramePtr), %rsp
2521 //
2522 // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
2523 // However, we may use this sequence if we have a frame pointer because the
2524 // effects of the prologue can safely be undone.
2525 if (LEAAmount != 0) {
2526 unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
2528 false, LEAAmount);
2529 --MBBI;
2530 } else {
2531 unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
2532 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr).addReg(FramePtr);
2533 --MBBI;
2534 }
2535 } else if (NumBytes) {
2536 // Adjust stack pointer back: ESP += numbytes.
2537 emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
2538 if (!HasFP && NeedsDwarfCFI) {
2539 // Define the current CFA rule to use the provided offset.
2540 BuildCFI(MBB, MBBI, DL,
2542 nullptr, CSSize + TailCallArgReserveSize + SlotSize),
2544 }
2545 --MBBI;
2546 }
2547
2548 // Windows unwinder will not invoke function's exception handler if IP is
2549 // either in prologue or in epilogue. This behavior causes a problem when a
2550 // call immediately precedes an epilogue, because the return address points
2551 // into the epilogue. To cope with that, we insert an epilogue marker here,
2552 // then replace it with a 'nop' if it ends up immediately after a CALL in the
2553 // final emitted code.
2554 if (NeedsWin64CFI && MF.hasWinCFI())
2555 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
2556
2557 if (!HasFP && NeedsDwarfCFI) {
2558 MBBI = FirstCSPop;
2559 int64_t Offset = -CSSize - SlotSize;
2560 // Mark callee-saved pop instruction.
2561 // Define the current CFA rule to use the provided offset.
2562 while (MBBI != MBB.end()) {
2564 unsigned Opc = PI->getOpcode();
2565 ++MBBI;
2566 if (Opc == X86::POP32r || Opc == X86::POP64r || Opc == X86::POPP64r ||
2567 Opc == X86::POP2 || Opc == X86::POP2P) {
2568 Offset += SlotSize;
2569 // Compared to pop, pop2 introduces more stack offset (one more
2570 // register).
2571 if (Opc == X86::POP2 || Opc == X86::POP2P)
2572 Offset += SlotSize;
2573 BuildCFI(MBB, MBBI, DL,
2576 }
2577 }
2578 }
2579
2580 // Emit DWARF info specifying the restores of the callee-saved registers.
2581 // For epilogue with return inside or being other block without successor,
2582 // no need to generate .cfi_restore for callee-saved registers.
2583 if (NeedsDwarfCFI && !MBB.succ_empty())
2584 emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false);
2585
2586 if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
2587 // Add the return addr area delta back since we are not tail calling.
2588 int Offset = -1 * X86FI->getTCReturnAddrDelta();
2589 assert(Offset >= 0 && "TCDelta should never be positive");
2590 if (Offset) {
2591 // Check for possible merge with preceding ADD instruction.
2592 Offset += mergeSPUpdates(MBB, Terminator, true);
2593 emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true);
2594 }
2595 }
2596
2597 // Emit tilerelease for AMX kernel.
2599 BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
2600}
2601
2603 int FI,
2604 Register &FrameReg) const {
2605 const MachineFrameInfo &MFI = MF.getFrameInfo();
2606
2607 bool IsFixed = MFI.isFixedObjectIndex(FI);
2608 // We can't calculate offset from frame pointer if the stack is realigned,
2609 // so enforce usage of stack/base pointer. The base pointer is used when we
2610 // have dynamic allocas in addition to dynamic realignment.
2611 if (TRI->hasBasePointer(MF))
2612 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister();
2613 else if (TRI->hasStackRealignment(MF))
2614 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister();
2615 else
2616 FrameReg = TRI->getFrameRegister(MF);
2617
2618 // Offset will hold the offset from the stack pointer at function entry to the
2619 // object.
2620 // We need to factor in additional offsets applied during the prologue to the
2621 // frame, base, and stack pointer depending on which is used.
2624 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2625 uint64_t StackSize = MFI.getStackSize();
2626 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2627 int64_t FPDelta = 0;
2628
2629 // In an x86 interrupt, remove the offset we added to account for the return
2630 // address from any stack object allocated in the caller's frame. Interrupts
2631 // do not have a standard return address. Fixed objects in the current frame,
2632 // such as SSE register spills, should not get this treatment.
2634 Offset >= 0) {
2636 }
2637
2638 if (IsWin64Prologue) {
2639 assert(!MFI.hasCalls() || (StackSize % 16) == 8);
2640
2641 // Calculate required stack adjustment.
2642 uint64_t FrameSize = StackSize - SlotSize;
2643 // If required, include space for extra hidden slot for stashing base
2644 // pointer.
2645 if (X86FI->getRestoreBasePointer())
2646 FrameSize += SlotSize;
2647 uint64_t NumBytes = FrameSize - CSSize;
2648
2649 uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
2650 if (FI && FI == X86FI->getFAIndex())
2651 return StackOffset::getFixed(-SEHFrameOffset);
2652
2653 // FPDelta is the offset from the "traditional" FP location of the old base
2654 // pointer followed by return address and the location required by the
2655 // restricted Win64 prologue.
2656 // Add FPDelta to all offsets below that go through the frame pointer.
2657 FPDelta = FrameSize - SEHFrameOffset;
2658 assert((!MFI.hasCalls() || (FPDelta % 16) == 0) &&
2659 "FPDelta isn't aligned per the Win64 ABI!");
2660 }
2661
2662 if (FrameReg == TRI->getFramePtr()) {
2663 // Skip saved EBP/RBP
2664 Offset += SlotSize;
2665
2666 // Account for restricted Windows prologue.
2667 Offset += FPDelta;
2668
2669 // Skip the RETADDR move area
2670 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2671 if (TailCallReturnAddrDelta < 0)
2672 Offset -= TailCallReturnAddrDelta;
2673
2675 }
2676
2677 // FrameReg is either the stack pointer or a base pointer. But the base is
2678 // located at the end of the statically known StackSize so the distinction
2679 // doesn't really matter.
2680 if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF))
2681 assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
2682 return StackOffset::getFixed(Offset + StackSize);
2683}
2684
2686 Register &FrameReg) const {
2687 const MachineFrameInfo &MFI = MF.getFrameInfo();
2689 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2690 const auto it = WinEHXMMSlotInfo.find(FI);
2691
2692 if (it == WinEHXMMSlotInfo.end())
2693 return getFrameIndexReference(MF, FI, FrameReg).getFixed();
2694
2695 FrameReg = TRI->getStackRegister();
2697 it->second;
2698}
2699
2702 Register &FrameReg,
2703 int Adjustment) const {
2704 const MachineFrameInfo &MFI = MF.getFrameInfo();
2705 FrameReg = TRI->getStackRegister();
2706 return StackOffset::getFixed(MFI.getObjectOffset(FI) -
2707 getOffsetOfLocalArea() + Adjustment);
2708}
2709
2712 int FI, Register &FrameReg,
2713 bool IgnoreSPUpdates) const {
2714
2715 const MachineFrameInfo &MFI = MF.getFrameInfo();
2716 // Does not include any dynamic realign.
2717 const uint64_t StackSize = MFI.getStackSize();
2718 // LLVM arranges the stack as follows:
2719 // ...
2720 // ARG2
2721 // ARG1
2722 // RETADDR
2723 // PUSH RBP <-- RBP points here
2724 // PUSH CSRs
2725 // ~~~~~~~ <-- possible stack realignment (non-win64)
2726 // ...
2727 // STACK OBJECTS
2728 // ... <-- RSP after prologue points here
2729 // ~~~~~~~ <-- possible stack realignment (win64)
2730 //
2731 // if (hasVarSizedObjects()):
2732 // ... <-- "base pointer" (ESI/RBX) points here
2733 // DYNAMIC ALLOCAS
2734 // ... <-- RSP points here
2735 //
2736 // Case 1: In the simple case of no stack realignment and no dynamic
2737 // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
2738 // with fixed offsets from RSP.
2739 //
2740 // Case 2: In the case of stack realignment with no dynamic allocas, fixed
2741 // stack objects are addressed with RBP and regular stack objects with RSP.
2742 //
2743 // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
2744 // to address stack arguments for outgoing calls and nothing else. The "base
2745 // pointer" points to local variables, and RBP points to fixed objects.
2746 //
2747 // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
2748 // answer we give is relative to the SP after the prologue, and not the
2749 // SP in the middle of the function.
2750
2751 if (MFI.isFixedObjectIndex(FI) && TRI->hasStackRealignment(MF) &&
2752 !STI.isTargetWin64())
2753 return getFrameIndexReference(MF, FI, FrameReg);
2754
2755 // If !hasReservedCallFrame the function might have SP adjustement in the
2756 // body. So, even though the offset is statically known, it depends on where
2757 // we are in the function.
2758 if (!IgnoreSPUpdates && !hasReservedCallFrame(MF))
2759 return getFrameIndexReference(MF, FI, FrameReg);
2760
2761 // We don't handle tail calls, and shouldn't be seeing them either.
2763 "we don't handle this case!");
2764
2765 // This is how the math works out:
2766 //
2767 // %rsp grows (i.e. gets lower) left to right. Each box below is
2768 // one word (eight bytes). Obj0 is the stack slot we're trying to
2769 // get to.
2770 //
2771 // ----------------------------------
2772 // | BP | Obj0 | Obj1 | ... | ObjN |
2773 // ----------------------------------
2774 // ^ ^ ^ ^
2775 // A B C E
2776 //
2777 // A is the incoming stack pointer.
2778 // (B - A) is the local area offset (-8 for x86-64) [1]
2779 // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2]
2780 //
2781 // |(E - B)| is the StackSize (absolute value, positive). For a
2782 // stack that grown down, this works out to be (B - E). [3]
2783 //
2784 // E is also the value of %rsp after stack has been set up, and we
2785 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now
2786 // (C - E) == (C - A) - (B - A) + (B - E)
2787 // { Using [1], [2] and [3] above }
2788 // == getObjectOffset - LocalAreaOffset + StackSize
2789
2790 return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize);
2791}
2792
2795 std::vector<CalleeSavedInfo> &CSI) const {
2796 MachineFrameInfo &MFI = MF.getFrameInfo();
2798
2799 unsigned CalleeSavedFrameSize = 0;
2800 unsigned XMMCalleeSavedFrameSize = 0;
2801 auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2802 int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
2803
2804 int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2805
2806 if (TailCallReturnAddrDelta < 0) {
2807 // create RETURNADDR area
2808 // arg
2809 // arg
2810 // RETADDR
2811 // { ...
2812 // RETADDR area
2813 // ...
2814 // }
2815 // [EBP]
2816 MFI.CreateFixedObject(-TailCallReturnAddrDelta,
2817 TailCallReturnAddrDelta - SlotSize, true);
2818 }
2819
2820 // Spill the BasePtr if it's used.
2821 if (this->TRI->hasBasePointer(MF)) {
2822 // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
2823 if (MF.hasEHFunclets()) {
2825 X86FI->setHasSEHFramePtrSave(true);
2826 X86FI->setSEHFramePtrSaveIndex(FI);
2827 }
2828 }
2829
2830 if (hasFP(MF)) {
2831 // emitPrologue always spills frame register the first thing.
2832 SpillSlotOffset -= SlotSize;
2833 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2834
2835 // The async context lives directly before the frame pointer, and we
2836 // allocate a second slot to preserve stack alignment.
2837 if (X86FI->hasSwiftAsyncContext()) {
2838 SpillSlotOffset -= SlotSize;
2839 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2840 SpillSlotOffset -= SlotSize;
2841 }
2842
2843 // Since emitPrologue and emitEpilogue will handle spilling and restoring of
2844 // the frame register, we can delete it from CSI list and not have to worry
2845 // about avoiding it later.
2846 Register FPReg = TRI->getFrameRegister(MF);
2847 for (unsigned i = 0; i < CSI.size(); ++i) {
2848 if (TRI->regsOverlap(CSI[i].getReg(), FPReg)) {
2849 CSI.erase(CSI.begin() + i);
2850 break;
2851 }
2852 }
2853 }
2854
2855 // Strategy:
2856 // 1. Use push2 when
2857 // a) number of CSR > 1 if no need padding
2858 // b) number of CSR > 2 if need padding
2859 // 2. When the number of CSR push is odd
2860 // a. Start to use push2 from the 1st push if stack is 16B aligned.
2861 // b. Start to use push2 from the 2nd push if stack is not 16B aligned.
2862 // 3. When the number of CSR push is even, start to use push2 from the 1st
2863 // push and make the stack 16B aligned before the push
2864 unsigned NumRegsForPush2 = 0;
2865 if (STI.hasPush2Pop2()) {
2866 unsigned NumCSGPR = llvm::count_if(CSI, [](const CalleeSavedInfo &I) {
2867 return X86::GR64RegClass.contains(I.getReg());
2868 });
2869 bool NeedPadding = (SpillSlotOffset % 16 != 0) && (NumCSGPR % 2 == 0);
2870 bool UsePush2Pop2 = NeedPadding ? NumCSGPR > 2 : NumCSGPR > 1;
2871 X86FI->setPadForPush2Pop2(NeedPadding && UsePush2Pop2);
2872 NumRegsForPush2 = UsePush2Pop2 ? alignDown(NumCSGPR, 2) : 0;
2873 if (X86FI->padForPush2Pop2()) {
2874 SpillSlotOffset -= SlotSize;
2875 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2876 }
2877 }
2878
2879 // Assign slots for GPRs. It increases frame size.
2880 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2881 Register Reg = I.getReg();
2882
2883 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2884 continue;
2885
2886 // A CSR is a candidate for push2/pop2 when it's slot offset is 16B aligned
2887 // or only an odd number of registers in the candidates.
2888 if (X86FI->getNumCandidatesForPush2Pop2() < NumRegsForPush2 &&
2889 (SpillSlotOffset % 16 == 0 ||
2890 X86FI->getNumCandidatesForPush2Pop2() % 2))
2891 X86FI->addCandidateForPush2Pop2(Reg);
2892
2893 SpillSlotOffset -= SlotSize;
2894 CalleeSavedFrameSize += SlotSize;
2895
2896 int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2897 I.setFrameIdx(SlotIndex);
2898 }
2899
2900 // Adjust the offset of spill slot as we know the accurate callee saved frame
2901 // size.
2902 if (X86FI->getRestoreBasePointer()) {
2903 SpillSlotOffset -= SlotSize;
2904 CalleeSavedFrameSize += SlotSize;
2905
2906 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2907 // TODO: saving the slot index is better?
2908 X86FI->setRestoreBasePointer(CalleeSavedFrameSize);
2909 }
2910 assert(X86FI->getNumCandidatesForPush2Pop2() % 2 == 0 &&
2911 "Expect even candidates for push2/pop2");
2912 if (X86FI->getNumCandidatesForPush2Pop2())
2913 ++NumFunctionUsingPush2Pop2;
2914 X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
2915 MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
2916
2917 // Assign slots for XMMs.
2918 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2919 Register Reg = I.getReg();
2920 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
2921 continue;
2922
2923 // If this is k-register make sure we lookup via the largest legal type.
2924 MVT VT = MVT::Other;
2925 if (X86::VK16RegClass.contains(Reg))
2926 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
2927
2928 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2929 unsigned Size = TRI->getSpillSize(*RC);
2930 Align Alignment = TRI->getSpillAlign(*RC);
2931 // ensure alignment
2932 assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86");
2933 SpillSlotOffset = -alignTo(-SpillSlotOffset, Alignment);
2934
2935 // spill into slot
2936 SpillSlotOffset -= Size;
2937 int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
2938 I.setFrameIdx(SlotIndex);
2939 MFI.ensureMaxAlignment(Alignment);
2940
2941 // Save the start offset and size of XMM in stack frame for funclets.
2942 if (X86::VR128RegClass.contains(Reg)) {
2943 WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize;
2944 XMMCalleeSavedFrameSize += Size;
2945 }
2946 }
2947
2948 return true;
2949}
2950
2955
2956 // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
2957 // for us, and there are no XMM CSRs on Win32.
2958 if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows())
2959 return true;
2960
2961 // Push GPRs. It increases frame size.
2962 const MachineFunction &MF = *MBB.getParent();
2964 if (X86FI->padForPush2Pop2())
2965 emitSPUpdate(MBB, MI, DL, -(int64_t)SlotSize, /*InEpilogue=*/false);
2966
2967 // Update LiveIn of the basic block and decide whether we can add a kill flag
2968 // to the use.
2969 auto UpdateLiveInCheckCanKill = [&](Register Reg) {
2970 const MachineRegisterInfo &MRI = MF.getRegInfo();
2971 // Do not set a kill flag on values that are also marked as live-in. This
2972 // happens with the @llvm-returnaddress intrinsic and with arguments
2973 // passed in callee saved registers.
2974 // Omitting the kill flags is conservatively correct even if the live-in
2975 // is not used after all.
2976 if (MRI.isLiveIn(Reg))
2977 return false;
2978 MBB.addLiveIn(Reg);
2979 // Check if any subregister is live-in
2980 for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg)
2981 if (MRI.isLiveIn(*AReg))
2982 return false;
2983 return true;
2984 };
2985 auto UpdateLiveInGetKillRegState = [&](Register Reg) {
2986 return getKillRegState(UpdateLiveInCheckCanKill(Reg));
2987 };
2988
2989 for (auto RI = CSI.rbegin(), RE = CSI.rend(); RI != RE; ++RI) {
2990 Register Reg = RI->getReg();
2991 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2992 continue;
2993
2994 if (X86FI->isCandidateForPush2Pop2(Reg)) {
2995 Register Reg2 = (++RI)->getReg();
2997 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
2998 .addReg(Reg2, UpdateLiveInGetKillRegState(Reg2))
3000 } else {
3001 BuildMI(MBB, MI, DL, TII.get(getPUSHOpcode(STI)))
3002 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
3004 }
3005 }
3006
3007 if (X86FI->getRestoreBasePointer()) {
3008 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
3009 Register BaseReg = this->TRI->getBaseRegister();
3010 BuildMI(MBB, MI, DL, TII.get(Opc))
3011 .addReg(BaseReg, getKillRegState(true))
3013 }
3014
3015 // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
3016 // It can be done by spilling XMMs to stack frame.
3017 for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
3018 Register Reg = I.getReg();
3019 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3020 continue;
3021
3022 // If this is k-register make sure we lookup via the largest legal type.
3023 MVT VT = MVT::Other;
3024 if (X86::VK16RegClass.contains(Reg))
3025 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3026
3027 // Add the callee-saved register as live-in. It's killed at the spill.
3028 MBB.addLiveIn(Reg);
3029 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3030
3031 TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI,
3032 Register());
3033 --MI;
3034 MI->setFlag(MachineInstr::FrameSetup);
3035 ++MI;
3036 }
3037
3038 return true;
3039}
3040
3041void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB,
3043 MachineInstr *CatchRet) const {
3044 // SEH shouldn't use catchret.
3047 "SEH should not use CATCHRET");
3048 const DebugLoc &DL = CatchRet->getDebugLoc();
3049 MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB();
3050
3051 // Fill EAX/RAX with the address of the target block.
3052 if (STI.is64Bit()) {
3053 // LEA64r CatchRetTarget(%rip), %rax
3054 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX)
3055 .addReg(X86::RIP)
3056 .addImm(0)
3057 .addReg(0)
3058 .addMBB(CatchRetTarget)
3059 .addReg(0);
3060 } else {
3061 // MOV32ri $CatchRetTarget, %eax
3062 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
3063 .addMBB(CatchRetTarget);
3064 }
3065
3066 // Record that we've taken the address of CatchRetTarget and no longer just
3067 // reference it in a terminator.
3068 CatchRetTarget->setMachineBlockAddressTaken();
3069}
3070
3074 if (CSI.empty())
3075 return false;
3076
3077 if (MI != MBB.end() && isFuncletReturnInstr(*MI) && STI.isOSWindows()) {
3078 // Don't restore CSRs in 32-bit EH funclets. Matches
3079 // spillCalleeSavedRegisters.
3080 if (STI.is32Bit())
3081 return true;
3082 // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
3083 // funclets. emitEpilogue transforms these to normal jumps.
3084 if (MI->getOpcode() == X86::CATCHRET) {
3085 const Function &F = MBB.getParent()->getFunction();
3086 bool IsSEH = isAsynchronousEHPersonality(
3087 classifyEHPersonality(F.getPersonalityFn()));
3088 if (IsSEH)
3089 return true;
3090 }
3091 }
3092
3094
3095 // Reload XMMs from stack frame.
3096 for (const CalleeSavedInfo &I : CSI) {
3097 Register Reg = I.getReg();
3098 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3099 continue;
3100
3101 // If this is k-register make sure we lookup via the largest legal type.
3102 MVT VT = MVT::Other;
3103 if (X86::VK16RegClass.contains(Reg))
3104 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3105
3106 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3107 TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, TRI,
3108 Register());
3109 }
3110
3111 // Clear the stack slot for spill base pointer register.
3112 MachineFunction &MF = *MBB.getParent();
3114 if (X86FI->getRestoreBasePointer()) {
3115 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
3116 Register BaseReg = this->TRI->getBaseRegister();
3117 BuildMI(MBB, MI, DL, TII.get(Opc), BaseReg)
3119 }
3120
3121 // POP GPRs.
3122 for (auto I = CSI.begin(), E = CSI.end(); I != E; ++I) {
3123 Register Reg = I->getReg();
3124 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3125 continue;
3126
3127 if (X86FI->isCandidateForPush2Pop2(Reg))
3128 BuildMI(MBB, MI, DL, TII.get(getPOP2Opcode(STI)), Reg)
3131 else
3132 BuildMI(MBB, MI, DL, TII.get(getPOPOpcode(STI)), Reg)
3134 }
3135 if (X86FI->padForPush2Pop2())
3136 emitSPUpdate(MBB, MI, DL, SlotSize, /*InEpilogue=*/true);
3137
3138 return true;
3139}
3140
3142 BitVector &SavedRegs,
3143 RegScavenger *RS) const {
3145
3146 // Spill the BasePtr if it's used.
3147 if (TRI->hasBasePointer(MF)) {
3148 Register BasePtr = TRI->getBaseRegister();
3149 if (STI.isTarget64BitILP32())
3150 BasePtr = getX86SubSuperRegister(BasePtr, 64);
3151 SavedRegs.set(BasePtr);
3152 }
3153}
3154
3155static bool HasNestArgument(const MachineFunction *MF) {
3156 const Function &F = MF->getFunction();
3157 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
3158 I++) {
3159 if (I->hasNestAttr() && !I->use_empty())
3160 return true;
3161 }
3162 return false;
3163}
3164
3165/// GetScratchRegister - Get a temp register for performing work in the
3166/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
3167/// and the properties of the function either one or two registers will be
3168/// needed. Set primary to true for the first register, false for the second.
3169static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64,
3170 const MachineFunction &MF, bool Primary) {
3172
3173 // Erlang stuff.
3175 if (Is64Bit)
3176 return Primary ? X86::R14 : X86::R13;
3177 else
3178 return Primary ? X86::EBX : X86::EDI;
3179 }
3180
3181 if (Is64Bit) {
3182 if (IsLP64)
3183 return Primary ? X86::R11 : X86::R12;
3184 else
3185 return Primary ? X86::R11D : X86::R12D;
3186 }
3187
3188 bool IsNested = HasNestArgument(&MF);
3189
3193 if (IsNested)
3194 report_fatal_error("Segmented stacks does not support fastcall with "
3195 "nested function.");
3196 return Primary ? X86::EAX : X86::ECX;
3197 }
3198 if (IsNested)
3199 return Primary ? X86::EDX : X86::EAX;
3200 return Primary ? X86::ECX : X86::EAX;
3201}
3202
3203// The stack limit in the TCB is set to this many bytes above the actual stack
3204// limit.
3206
3208 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3209 MachineFrameInfo &MFI = MF.getFrameInfo();
3210 uint64_t StackSize;
3211 unsigned TlsReg, TlsOffset;
3212 DebugLoc DL;
3213
3214 // To support shrink-wrapping we would need to insert the new blocks
3215 // at the right place and update the branches to PrologueMBB.
3216 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3217
3218 unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3219 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3220 "Scratch register is live-in");
3221
3222 if (MF.getFunction().isVarArg())
3223 report_fatal_error("Segmented stacks do not support vararg functions.");
3224 if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
3227 report_fatal_error("Segmented stacks not supported on this platform.");
3228
3229 // Eventually StackSize will be calculated by a link-time pass; which will
3230 // also decide whether checking code needs to be injected into this particular
3231 // prologue.
3232 StackSize = MFI.getStackSize();
3233
3234 if (!MFI.needsSplitStackProlog())
3235 return;
3236
3240 bool IsNested = false;
3241
3242 // We need to know if the function has a nest argument only in 64 bit mode.
3243 if (Is64Bit)
3244 IsNested = HasNestArgument(&MF);
3245
3246 // The MOV R10, RAX needs to be in a different block, since the RET we emit in
3247 // allocMBB needs to be last (terminating) instruction.
3248
3249 for (const auto &LI : PrologueMBB.liveins()) {
3250 allocMBB->addLiveIn(LI);
3251 checkMBB->addLiveIn(LI);
3252 }
3253
3254 if (IsNested)
3255 allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
3256
3257 MF.push_front(allocMBB);
3258 MF.push_front(checkMBB);
3259
3260 // When the frame size is less than 256 we just compare the stack
3261 // boundary directly to the value of the stack pointer, per gcc.
3262 bool CompareStackPointer = StackSize < kSplitStackAvailable;
3263
3264 // Read the limit off the current stacklet off the stack_guard location.
3265 if (Is64Bit) {
3266 if (STI.isTargetLinux()) {
3267 TlsReg = X86::FS;
3268 TlsOffset = IsLP64 ? 0x70 : 0x40;
3269 } else if (STI.isTargetDarwin()) {
3270 TlsReg = X86::GS;
3271 TlsOffset = 0x60 + 90 * 8; // See pthread_machdep.h. Steal TLS slot 90.
3272 } else if (STI.isTargetWin64()) {
3273 TlsReg = X86::GS;
3274 TlsOffset = 0x28; // pvArbitrary, reserved for application use
3275 } else if (STI.isTargetFreeBSD()) {
3276 TlsReg = X86::FS;
3277 TlsOffset = 0x18;
3278 } else if (STI.isTargetDragonFly()) {
3279 TlsReg = X86::FS;
3280 TlsOffset = 0x20; // use tls_tcb.tcb_segstack
3281 } else {
3282 report_fatal_error("Segmented stacks not supported on this platform.");
3283 }
3284
3285 if (CompareStackPointer)
3286 ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
3287 else
3288 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r),
3289 ScratchReg)
3290 .addReg(X86::RSP)
3291 .addImm(1)
3292 .addReg(0)
3293 .addImm(-StackSize)
3294 .addReg(0);
3295
3296 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm))
3297 .addReg(ScratchReg)
3298 .addReg(0)
3299 .addImm(1)
3300 .addReg(0)
3301 .addImm(TlsOffset)
3302 .addReg(TlsReg);
3303 } else {
3304 if (STI.isTargetLinux()) {
3305 TlsReg = X86::GS;
3306 TlsOffset = 0x30;
3307 } else if (STI.isTargetDarwin()) {
3308 TlsReg = X86::GS;
3309 TlsOffset = 0x48 + 90 * 4;
3310 } else if (STI.isTargetWin32()) {
3311 TlsReg = X86::FS;
3312 TlsOffset = 0x14; // pvArbitrary, reserved for application use
3313 } else if (STI.isTargetDragonFly()) {
3314 TlsReg = X86::FS;
3315 TlsOffset = 0x10; // use tls_tcb.tcb_segstack
3316 } else if (STI.isTargetFreeBSD()) {
3317 report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
3318 } else {
3319 report_fatal_error("Segmented stacks not supported on this platform.");
3320 }
3321
3322 if (CompareStackPointer)
3323 ScratchReg = X86::ESP;
3324 else
3325 BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg)
3326 .addReg(X86::ESP)
3327 .addImm(1)
3328 .addReg(0)
3329 .addImm(-StackSize)
3330 .addReg(0);
3331
3334 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3335 .addReg(ScratchReg)
3336 .addReg(0)
3337 .addImm(0)
3338 .addReg(0)
3339 .addImm(TlsOffset)
3340 .addReg(TlsReg);
3341 } else if (STI.isTargetDarwin()) {
3342
3343 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
3344 unsigned ScratchReg2;
3345 bool SaveScratch2;
3346 if (CompareStackPointer) {
3347 // The primary scratch register is available for holding the TLS offset.
3348 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3349 SaveScratch2 = false;
3350 } else {
3351 // Need to use a second register to hold the TLS offset
3352 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
3353
3354 // Unfortunately, with fastcc the second scratch register may hold an
3355 // argument.
3356 SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
3357 }
3358
3359 // If Scratch2 is live-in then it needs to be saved.
3360 assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
3361 "Scratch register is live-in and not saved");
3362
3363 if (SaveScratch2)
3364 BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
3365 .addReg(ScratchReg2, RegState::Kill);
3366
3367 BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
3368 .addImm(TlsOffset);
3369 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3370 .addReg(ScratchReg)
3371 .addReg(ScratchReg2)
3372 .addImm(1)
3373 .addReg(0)
3374 .addImm(0)
3375 .addReg(TlsReg);
3376
3377 if (SaveScratch2)
3378 BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
3379 }
3380 }
3381
3382 // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
3383 // It jumps to normal execution of the function body.
3384 BuildMI(checkMBB, DL, TII.get(X86::JCC_1))
3385 .addMBB(&PrologueMBB)
3387
3388 // On 32 bit we first push the arguments size and then the frame size. On 64
3389 // bit, we pass the stack frame size in r10 and the argument size in r11.
3390 if (Is64Bit) {
3391 // Functions with nested arguments use R10, so it needs to be saved across
3392 // the call to _morestack
3393
3394 const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
3395 const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
3396 const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
3397 const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
3398
3399 if (IsNested)
3400 BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
3401
3402 BuildMI(allocMBB, DL, TII.get(getMOVriOpcode(IsLP64, StackSize)), Reg10)
3403 .addImm(StackSize);
3404 BuildMI(allocMBB, DL,
3406 Reg11)
3407 .addImm(X86FI->getArgumentStackSize());
3408 } else {
3409 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i))
3410 .addImm(X86FI->getArgumentStackSize());
3411 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i)).addImm(StackSize);
3412 }
3413
3414 // __morestack is in libgcc
3416 // Under the large code model, we cannot assume that __morestack lives
3417 // within 2^31 bytes of the call site, so we cannot use pc-relative
3418 // addressing. We cannot perform the call via a temporary register,
3419 // as the rax register may be used to store the static chain, and all
3420 // other suitable registers may be either callee-save or used for
3421 // parameter passing. We cannot use the stack at this point either
3422 // because __morestack manipulates the stack directly.
3423 //
3424 // To avoid these issues, perform an indirect call via a read-only memory
3425 // location containing the address.
3426 //
3427 // This solution is not perfect, as it assumes that the .rodata section
3428 // is laid out within 2^31 bytes of each function body, but this seems
3429 // to be sufficient for JIT.
3430 // FIXME: Add retpoline support and remove the error here..
3432 report_fatal_error("Emitting morestack calls on 64-bit with the large "
3433 "code model and thunks not yet implemented.");
3434 BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
3435 .addReg(X86::RIP)
3436 .addImm(0)
3437 .addReg(0)
3438 .addExternalSymbol("__morestack_addr")
3439 .addReg(0);
3440 } else {
3441 if (Is64Bit)
3442 BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
3443 .addExternalSymbol("__morestack");
3444 else
3445 BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
3446 .addExternalSymbol("__morestack");
3447 }
3448
3449 if (IsNested)
3450 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
3451 else
3452 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
3453
3454 allocMBB->addSuccessor(&PrologueMBB);
3455
3456 checkMBB->addSuccessor(allocMBB, BranchProbability::getZero());
3457 checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne());
3458
3459#ifdef EXPENSIVE_CHECKS
3460 MF.verify();
3461#endif
3462}
3463
3464/// Lookup an ERTS parameter in the !hipe.literals named metadata node.
3465/// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets
3466/// to fields it needs, through a named metadata node "hipe.literals" containing
3467/// name-value pairs.
3468static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD,
3469 const StringRef LiteralName) {
3470 for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) {
3471 MDNode *Node = HiPELiteralsMD->getOperand(i);
3472 if (Node->getNumOperands() != 2)
3473 continue;
3474 MDString *NodeName = dyn_cast<MDString>(Node->getOperand(0));
3475 ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Node->getOperand(1));
3476 if (!NodeName || !NodeVal)
3477 continue;
3478 ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(NodeVal->getValue());
3479 if (ValConst && NodeName->getString() == LiteralName) {
3480 return ValConst->getZExtValue();
3481 }
3482 }
3483
3484 report_fatal_error("HiPE literal " + LiteralName +
3485 " required but not provided");
3486}
3487
3488// Return true if there are no non-ehpad successors to MBB and there are no
3489// non-meta instructions between MBBI and MBB.end().
3492 return llvm::all_of(
3493 MBB.successors(),
3494 [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) &&
3495 std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) {
3496 return MI.isMetaInstruction();
3497 });
3498}
3499
3500/// Erlang programs may need a special prologue to handle the stack size they
3501/// might need at runtime. That is because Erlang/OTP does not implement a C
3502/// stack but uses a custom implementation of hybrid stack/heap architecture.
3503/// (for more information see Eric Stenman's Ph.D. thesis:
3504/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
3505///
3506/// CheckStack:
3507/// temp0 = sp - MaxStack
3508/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3509/// OldStart:
3510/// ...
3511/// IncStack:
3512/// call inc_stack # doubles the stack space
3513/// temp0 = sp - MaxStack
3514/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3516 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3517 MachineFrameInfo &MFI = MF.getFrameInfo();
3518 DebugLoc DL;
3519
3520 // To support shrink-wrapping we would need to insert the new blocks
3521 // at the right place and update the branches to PrologueMBB.
3522 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3523
3524 // HiPE-specific values
3525 NamedMDNode *HiPELiteralsMD =
3526 MF.getMMI().getModule()->getNamedMetadata("hipe.literals");
3527 if (!HiPELiteralsMD)
3529 "Can't generate HiPE prologue without runtime parameters");
3530 const unsigned HipeLeafWords = getHiPELiteral(
3531 HiPELiteralsMD, Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS");
3532 const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
3533 const unsigned Guaranteed = HipeLeafWords * SlotSize;
3534 unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs
3535 ? MF.getFunction().arg_size() - CCRegisteredArgs
3536 : 0;
3537 unsigned MaxStack = MFI.getStackSize() + CallerStkArity * SlotSize + SlotSize;
3538
3540 "HiPE prologue is only supported on Linux operating systems.");
3541
3542 // Compute the largest caller's frame that is needed to fit the callees'
3543 // frames. This 'MaxStack' is computed from:
3544 //
3545 // a) the fixed frame size, which is the space needed for all spilled temps,
3546 // b) outgoing on-stack parameter areas, and
3547 // c) the minimum stack space this function needs to make available for the
3548 // functions it calls (a tunable ABI property).
3549 if (MFI.hasCalls()) {
3550 unsigned MoreStackForCalls = 0;
3551
3552 for (auto &MBB : MF) {
3553 for (auto &MI : MBB) {
3554 if (!MI.isCall())
3555 continue;
3556
3557 // Get callee operand.
3558 const MachineOperand &MO = MI.getOperand(0);
3559
3560 // Only take account of global function calls (no closures etc.).
3561 if (!MO.isGlobal())
3562 continue;
3563
3564 const Function *F = dyn_cast<Function>(MO.getGlobal());
3565 if (!F)
3566 continue;
3567
3568 // Do not update 'MaxStack' for primitive and built-in functions
3569 // (encoded with names either starting with "erlang."/"bif_" or not
3570 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
3571 // "_", such as the BIF "suspend_0") as they are executed on another
3572 // stack.
3573 if (F->getName().contains("erlang.") || F->getName().contains("bif_") ||
3574 F->getName().find_first_of("._") == StringRef::npos)
3575 continue;
3576
3577 unsigned CalleeStkArity = F->arg_size() > CCRegisteredArgs
3578 ? F->arg_size() - CCRegisteredArgs
3579 : 0;
3580 if (HipeLeafWords - 1 > CalleeStkArity)
3581 MoreStackForCalls =
3582 std::max(MoreStackForCalls,
3583 (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
3584 }
3585 }
3586 MaxStack += MoreStackForCalls;
3587 }
3588
3589 // If the stack frame needed is larger than the guaranteed then runtime checks
3590 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
3591 if (MaxStack > Guaranteed) {
3592 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
3593 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
3594
3595 for (const auto &LI : PrologueMBB.liveins()) {
3596 stackCheckMBB->addLiveIn(LI);
3597 incStackMBB->addLiveIn(LI);
3598 }
3599
3600 MF.push_front(incStackMBB);
3601 MF.push_front(stackCheckMBB);
3602
3603 unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
3604 unsigned LEAop, CMPop, CALLop;
3605 SPLimitOffset = getHiPELiteral(HiPELiteralsMD, "P_NSP_LIMIT");
3606 if (Is64Bit) {
3607 SPReg = X86::RSP;
3608 PReg = X86::RBP;
3609 LEAop = X86::LEA64r;
3610 CMPop = X86::CMP64rm;
3611 CALLop = X86::CALL64pcrel32;
3612 } else {
3613 SPReg = X86::ESP;
3614 PReg = X86::EBP;
3615 LEAop = X86::LEA32r;
3616 CMPop = X86::CMP32rm;
3617 CALLop = X86::CALLpcrel32;
3618 }
3619
3620 ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3621 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3622 "HiPE prologue scratch register is live-in");
3623
3624 // Create new MBB for StackCheck:
3625 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3626 false, -MaxStack);
3627 // SPLimitOffset is in a fixed heap location (pointed by BP).
3628 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3629 PReg, false, SPLimitOffset);
3630 BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1))
3631 .addMBB(&PrologueMBB)
3633
3634 // Create new MBB for IncStack:
3635 BuildMI(incStackMBB, DL, TII.get(CALLop)).addExternalSymbol("inc_stack_0");
3636 addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3637 false, -MaxStack);
3638 addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3639 PReg, false, SPLimitOffset);
3640 BuildMI(incStackMBB, DL, TII.get(X86::JCC_1))
3641 .addMBB(incStackMBB)
3643
3644 stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
3645 stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
3646 incStackMBB->addSuccessor(&PrologueMBB, {99, 100});
3647 incStackMBB->addSuccessor(incStackMBB, {1, 100});
3648 }
3649#ifdef EXPENSIVE_CHECKS
3650 MF.verify();
3651#endif
3652}
3653
3654bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
3656 const DebugLoc &DL,
3657 int Offset) const {
3658 if (Offset <= 0)
3659 return false;
3660
3661 if (Offset % SlotSize)
3662 return false;
3663
3664 int NumPops = Offset / SlotSize;
3665 // This is only worth it if we have at most 2 pops.
3666 if (NumPops != 1 && NumPops != 2)
3667 return false;
3668
3669 // Handle only the trivial case where the adjustment directly follows
3670 // a call. This is the most common one, anyway.
3671 if (MBBI == MBB.begin())
3672 return false;
3673 MachineBasicBlock::iterator Prev = std::prev(MBBI);
3674 if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
3675 return false;
3676
3677 unsigned Regs[2];
3678 unsigned FoundRegs = 0;
3679
3681 const MachineOperand &RegMask = Prev->getOperand(1);
3682
3683 auto &RegClass =
3684 Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass;
3685 // Try to find up to NumPops free registers.
3686 for (auto Candidate : RegClass) {
3687 // Poor man's liveness:
3688 // Since we're immediately after a call, any register that is clobbered
3689 // by the call and not defined by it can be considered dead.
3690 if (!RegMask.clobbersPhysReg(Candidate))
3691 continue;
3692
3693 // Don't clobber reserved registers
3694 if (MRI.isReserved(Candidate))
3695 continue;
3696
3697 bool IsDef = false;
3698 for (const MachineOperand &MO : Prev->implicit_operands()) {
3699 if (MO.isReg() && MO.isDef() &&
3700 TRI->isSuperOrSubRegisterEq(MO.getReg(), Candidate)) {
3701 IsDef = true;
3702 break;
3703 }
3704 }
3705
3706 if (IsDef)
3707 continue;
3708
3709 Regs[FoundRegs++] = Candidate;
3710 if (FoundRegs == (unsigned)NumPops)
3711 break;
3712 }
3713
3714 if (FoundRegs == 0)
3715 return false;
3716
3717 // If we found only one free register, but need two, reuse the same one twice.
3718 while (FoundRegs < (unsigned)NumPops)
3719 Regs[FoundRegs++] = Regs[0];
3720
3721 for (int i = 0; i < NumPops; ++i)
3722 BuildMI(MBB, MBBI, DL, TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r),
3723 Regs[i]);
3724
3725 return true;
3726}
3727
3731 bool reserveCallFrame = hasReservedCallFrame(MF);
3732 unsigned Opcode = I->getOpcode();
3733 bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
3734 DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased.
3735 uint64_t Amount = TII.getFrameSize(*I);
3736 uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0;
3737 I = MBB.erase(I);
3738 auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
3739
3740 // Try to avoid emitting dead SP adjustments if the block end is unreachable,
3741 // typically because the function is marked noreturn (abort, throw,
3742 // assert_fail, etc).
3743 if (isDestroy && blockEndIsUnreachable(MBB, I))
3744 return I;
3745
3746 if (!reserveCallFrame) {
3747 // If the stack pointer can be changed after prologue, turn the
3748 // adjcallstackup instruction into a 'sub ESP, <amt>' and the
3749 // adjcallstackdown instruction into 'add ESP, <amt>'
3750
3751 // We need to keep the stack aligned properly. To do this, we round the
3752 // amount of space needed for the outgoing arguments up to the next
3753 // alignment boundary.
3754 Amount = alignTo(Amount, getStackAlign());
3755
3756 const Function &F = MF.getFunction();
3757 bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
3758 bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves();
3759
3760 // If we have any exception handlers in this function, and we adjust
3761 // the SP before calls, we may need to indicate this to the unwinder
3762 // using GNU_ARGS_SIZE. Note that this may be necessary even when
3763 // Amount == 0, because the preceding function may have set a non-0
3764 // GNU_ARGS_SIZE.
3765 // TODO: We don't need to reset this between subsequent functions,
3766 // if it didn't change.
3767 bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty();
3768
3769 if (HasDwarfEHHandlers && !isDestroy &&
3771 BuildCFI(MBB, InsertPos, DL,
3772 MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
3773
3774 if (Amount == 0)
3775 return I;
3776
3777 // Factor out the amount that gets handled inside the sequence
3778 // (Pushes of argument for frame setup, callee pops for frame destroy)
3779 Amount -= InternalAmt;
3780
3781 // TODO: This is needed only if we require precise CFA.
3782 // If this is a callee-pop calling convention, emit a CFA adjust for
3783 // the amount the callee popped.
3784 if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
3785 BuildCFI(MBB, InsertPos, DL,
3786 MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
3787
3788 // Add Amount to SP to destroy a frame, or subtract to setup.
3789 int64_t StackAdjustment = isDestroy ? Amount : -Amount;
3790
3791 if (StackAdjustment) {
3792 // Merge with any previous or following adjustment instruction. Note: the
3793 // instructions merged with here do not have CFI, so their stack
3794 // adjustments do not feed into CfaAdjustment.
3795 StackAdjustment += mergeSPUpdates(MBB, InsertPos, true);
3796 StackAdjustment += mergeSPUpdates(MBB, InsertPos, false);
3797
3798 if (StackAdjustment) {
3799 if (!(F.hasMinSize() &&
3800 adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
3801 BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
3802 /*InEpilogue=*/false);
3803 }
3804 }
3805
3806 if (DwarfCFI && !hasFP(MF)) {
3807 // If we don't have FP, but need to generate unwind information,
3808 // we need to set the correct CFA offset after the stack adjustment.
3809 // How much we adjust the CFA offset depends on whether we're emitting
3810 // CFI only for EH purposes or for debugging. EH only requires the CFA
3811 // offset to be correct at each call site, while for debugging we want
3812 // it to be more precise.
3813
3814 int64_t CfaAdjustment = -StackAdjustment;
3815 // TODO: When not using precise CFA, we also need to adjust for the
3816 // InternalAmt here.
3817 if (CfaAdjustment) {
3818 BuildCFI(
3819 MBB, InsertPos, DL,
3820 MCCFIInstruction::createAdjustCfaOffset(nullptr, CfaAdjustment));
3821 }
3822 }
3823
3824 return I;
3825 }
3826
3827 if (InternalAmt) {
3830 while (CI != B && !std::prev(CI)->isCall())
3831 --CI;
3832 BuildStackAdjustment(MBB, CI, DL, -InternalAmt, /*InEpilogue=*/false);
3833 }
3834
3835 return I;
3836}
3837
3839 assert(MBB.getParent() && "Block is not attached to a function!");
3840 const MachineFunction &MF = *MBB.getParent();
3841 if (!MBB.isLiveIn(X86::EFLAGS))
3842 return true;
3843
3844 // If stack probes have to loop inline or call, that will clobber EFLAGS.
3845 // FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock.
3847 const X86TargetLowering &TLI = *STI.getTargetLowering();
3848 if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF))
3849 return false;
3850
3852 return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext();
3853}
3854
3856 assert(MBB.getParent() && "Block is not attached to a function!");
3857
3858 // Win64 has strict requirements in terms of epilogue and we are
3859 // not taking a chance at messing with them.
3860 // I.e., unless this block is already an exit block, we can't use
3861 // it as an epilogue.
3862 if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
3863 return false;
3864
3865 // Swift async context epilogue has a BTR instruction that clobbers parts of
3866 // EFLAGS.
3867 const MachineFunction &MF = *MBB.getParent();
3870
3872 return true;
3873
3874 // If we cannot use LEA to adjust SP, we may need to use ADD, which
3875 // clobbers the EFLAGS. Check that we do not need to preserve it,
3876 // otherwise, conservatively assume this is not
3877 // safe to insert the epilogue here.
3879}
3880
3882 // If we may need to emit frameless compact unwind information, give
3883 // up as this is currently broken: PR25614.
3884 bool CompactUnwind =
3886 nullptr;
3887 return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF) ||
3888 !CompactUnwind) &&
3889 // The lowering of segmented stack and HiPE only support entry
3890 // blocks as prologue blocks: PR26107. This limitation may be
3891 // lifted if we fix:
3892 // - adjustForSegmentedStacks
3893 // - adjustForHiPEPrologue
3895 !MF.shouldSplitStack();
3896}
3897
3900 const DebugLoc &DL, bool RestoreSP) const {
3901 assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
3902 assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
3903 assert(STI.is32Bit() && !Uses64BitFramePtr &&
3904 "restoring EBP/ESI on non-32-bit target");
3905
3906 MachineFunction &MF = *MBB.getParent();
3908 Register BasePtr = TRI->getBaseRegister();
3909 WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo();
3911 MachineFrameInfo &MFI = MF.getFrameInfo();
3912
3913 // FIXME: Don't set FrameSetup flag in catchret case.
3914
3915 int FI = FuncInfo.EHRegNodeFrameIndex;
3916 int EHRegSize = MFI.getObjectSize(FI);
3917
3918 if (RestoreSP) {
3919 // MOV32rm -EHRegSize(%ebp), %esp
3920 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP),
3921 X86::EBP, true, -EHRegSize)
3923 }
3924
3925 Register UsedReg;
3926 int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg).getFixed();
3927 int EndOffset = -EHRegOffset - EHRegSize;
3928 FuncInfo.EHRegNodeEndOffset = EndOffset;
3929
3930 if (UsedReg == FramePtr) {
3931 // ADD $offset, %ebp
3932 unsigned ADDri = getADDriOpcode(false);
3933 BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
3935 .addImm(EndOffset)
3937 ->getOperand(3)
3938 .setIsDead();
3939 assert(EndOffset >= 0 &&
3940 "end of registration object above normal EBP position!");
3941 } else if (UsedReg == BasePtr) {
3942 // LEA offset(%ebp), %esi
3943 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr),
3944 FramePtr, false, EndOffset)
3946 // MOV32rm SavedEBPOffset(%esi), %ebp
3947 assert(X86FI->getHasSEHFramePtrSave());
3948 int Offset =
3949 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
3950 .getFixed();
3951 assert(UsedReg == BasePtr);
3952 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr),
3953 UsedReg, true, Offset)
3955 } else {
3956 llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
3957 }
3958 return MBBI;
3959}
3960
3962 return TRI->getSlotSize();
3963}
3964
3967 return StackPtr;
3968}
3969
3973 Register FrameRegister = RI->getFrameRegister(MF);
3974 if (getInitialCFARegister(MF) == FrameRegister &&
3976 DwarfFrameBase FrameBase;
3977 FrameBase.Kind = DwarfFrameBase::CFA;
3978 FrameBase.Location.Offset =
3980 return FrameBase;
3981 }
3982
3983 return DwarfFrameBase{DwarfFrameBase::Register, {FrameRegister}};
3984}
3985
3986namespace {
3987// Struct used by orderFrameObjects to help sort the stack objects.
3988struct X86FrameSortingObject {
3989 bool IsValid = false; // true if we care about this Object.
3990 unsigned ObjectIndex = 0; // Index of Object into MFI list.
3991 unsigned ObjectSize = 0; // Size of Object in bytes.
3992 Align ObjectAlignment = Align(1); // Alignment of Object in bytes.
3993 unsigned ObjectNumUses = 0; // Object static number of uses.
3994};
3995
3996// The comparison function we use for std::sort to order our local
3997// stack symbols. The current algorithm is to use an estimated
3998// "density". This takes into consideration the size and number of
3999// uses each object has in order to roughly minimize code size.
4000// So, for example, an object of size 16B that is referenced 5 times
4001// will get higher priority than 4 4B objects referenced 1 time each.
4002// It's not perfect and we may be able to squeeze a few more bytes out of
4003// it (for example : 0(esp) requires fewer bytes, symbols allocated at the
4004// fringe end can have special consideration, given their size is less
4005// important, etc.), but the algorithmic complexity grows too much to be
4006// worth the extra gains we get. This gets us pretty close.
4007// The final order leaves us with objects with highest priority going
4008// at the end of our list.
4009struct X86FrameSortingComparator {
4010 inline bool operator()(const X86FrameSortingObject &A,
4011 const X86FrameSortingObject &B) const {
4012 uint64_t DensityAScaled, DensityBScaled;
4013
4014 // For consistency in our comparison, all invalid objects are placed
4015 // at the end. This also allows us to stop walking when we hit the
4016 // first invalid item after it's all sorted.
4017 if (!A.IsValid)
4018 return false;
4019 if (!B.IsValid)
4020 return true;
4021
4022 // The density is calculated by doing :
4023 // (double)DensityA = A.ObjectNumUses / A.ObjectSize
4024 // (double)DensityB = B.ObjectNumUses / B.ObjectSize
4025 // Since this approach may cause inconsistencies in
4026 // the floating point <, >, == comparisons, depending on the floating
4027 // point model with which the compiler was built, we're going
4028 // to scale both sides by multiplying with
4029 // A.ObjectSize * B.ObjectSize. This ends up factoring away
4030 // the division and, with it, the need for any floating point
4031 // arithmetic.
4032 DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) *
4033 static_cast<uint64_t>(B.ObjectSize);
4034 DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) *
4035 static_cast<uint64_t>(A.ObjectSize);
4036
4037 // If the two densities are equal, prioritize highest alignment
4038 // objects. This allows for similar alignment objects
4039 // to be packed together (given the same density).
4040 // There's room for improvement here, also, since we can pack
4041 // similar alignment (different density) objects next to each
4042 // other to save padding. This will also require further
4043 // complexity/iterations, and the overall gain isn't worth it,
4044 // in general. Something to keep in mind, though.
4045 if (DensityAScaled == DensityBScaled)
4046 return A.ObjectAlignment < B.ObjectAlignment;
4047
4048 return DensityAScaled < DensityBScaled;
4049 }
4050};
4051} // namespace
4052
4053// Order the symbols in the local stack.
4054// We want to place the local stack objects in some sort of sensible order.
4055// The heuristic we use is to try and pack them according to static number
4056// of uses and size of object in order to minimize code size.
4058 const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
4059 const MachineFrameInfo &MFI = MF.getFrameInfo();
4060
4061 // Don't waste time if there's nothing to do.
4062 if (ObjectsToAllocate.empty())
4063 return;
4064
4065 // Create an array of all MFI objects. We won't need all of these
4066 // objects, but we're going to create a full array of them to make
4067 // it easier to index into when we're counting "uses" down below.
4068 // We want to be able to easily/cheaply access an object by simply
4069 // indexing into it, instead of having to search for it every time.
4070 std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd());
4071
4072 // Walk the objects we care about and mark them as such in our working
4073 // struct.
4074 for (auto &Obj : ObjectsToAllocate) {
4075 SortingObjects[Obj].IsValid = true;
4076 SortingObjects[Obj].ObjectIndex = Obj;
4077 SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj);
4078 // Set the size.
4079 int ObjectSize = MFI.getObjectSize(Obj);
4080 if (ObjectSize == 0)
4081 // Variable size. Just use 4.
4082 SortingObjects[Obj].ObjectSize = 4;
4083 else
4084 SortingObjects[Obj].ObjectSize = ObjectSize;
4085 }
4086
4087 // Count the number of uses for each object.
4088 for (auto &MBB : MF) {
4089 for (auto &MI : MBB) {
4090 if (MI.isDebugInstr())
4091 continue;
4092 for (const MachineOperand &MO : MI.operands()) {
4093 // Check to see if it's a local stack symbol.
4094 if (!MO.isFI())
4095 continue;
4096 int Index = MO.getIndex();
4097 // Check to see if it falls within our range, and is tagged
4098 // to require ordering.
4099 if (Index >= 0 && Index < MFI.getObjectIndexEnd() &&
4100 SortingObjects[Index].IsValid)
4101 SortingObjects[Index].ObjectNumUses++;
4102 }
4103 }
4104 }
4105
4106 // Sort the objects using X86FrameSortingAlgorithm (see its comment for
4107 // info).
4108 llvm::stable_sort(SortingObjects, X86FrameSortingComparator());
4109
4110 // Now modify the original list to represent the final order that
4111 // we want. The order will depend on whether we're going to access them
4112 // from the stack pointer or the frame pointer. For SP, the list should
4113 // end up with the END containing objects that we want with smaller offsets.
4114 // For FP, it should be flipped.
4115 int i = 0;
4116 for (auto &Obj : SortingObjects) {
4117 // All invalid items are sorted at the end, so it's safe to stop.
4118 if (!Obj.IsValid)
4119 break;
4120 ObjectsToAllocate[i++] = Obj.ObjectIndex;
4121 }
4122
4123 // Flip it if we're accessing off of the FP.
4124 if (!TRI->hasStackRealignment(MF) && hasFP(MF))
4125 std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end());
4126}
4127
4128unsigned
4130 // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
4131 unsigned Offset = 16;
4132 // RBP is immediately pushed.
4133 Offset += SlotSize;
4134 // All callee-saved registers are then pushed.
4135 Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
4136 // Every funclet allocates enough stack space for the largest outgoing call.
4137 Offset += getWinEHFuncletFrameSize(MF);
4138 return Offset;
4139}
4140
4142 MachineFunction &MF, RegScavenger *RS) const {
4143 // Mark the function as not having WinCFI. We will set it back to true in
4144 // emitPrologue if it gets called and emits CFI.
4145 MF.setHasWinCFI(false);
4146
4147 // If we are using Windows x64 CFI, ensure that the stack is always 8 byte
4148 // aligned. The format doesn't support misaligned stack adjustments.
4151
4152 // If this function isn't doing Win64-style C++ EH, we don't need to do
4153 // anything.
4154 if (STI.is64Bit() && MF.hasEHFunclets() &&
4157 adjustFrameForMsvcCxxEh(MF);
4158 }
4159}
4160
4161void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const {
4162 // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
4163 // relative to RSP after the prologue. Find the offset of the last fixed
4164 // object, so that we can allocate a slot immediately following it. If there
4165 // were no fixed objects, use offset -SlotSize, which is immediately after the
4166 // return address. Fixed objects have negative frame indices.
4167 MachineFrameInfo &MFI = MF.getFrameInfo();
4168 WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
4169 int64_t MinFixedObjOffset = -SlotSize;
4170 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I)
4171 MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
4172
4173 for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
4174 for (WinEHHandlerType &H : TBME.HandlerArray) {
4175 int FrameIndex = H.CatchObj.FrameIndex;
4176 if (FrameIndex != INT_MAX) {
4177 // Ensure alignment.
4178 unsigned Align = MFI.getObjectAlign(FrameIndex).value();
4179 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align;
4180 MinFixedObjOffset -= MFI.getObjectSize(FrameIndex);
4181 MFI.setObjectOffset(FrameIndex, MinFixedObjOffset);
4182 }
4183 }
4184 }
4185
4186 // Ensure alignment.
4187 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8;
4188 int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
4189 int UnwindHelpFI =
4190 MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false);
4191 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
4192
4193 // Store -2 into UnwindHelp on function entry. We have to scan forwards past
4194 // other frame setup instructions.
4195 MachineBasicBlock &MBB = MF.front();
4196 auto MBBI = MBB.begin();
4197 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
4198 ++MBBI;
4199
4201 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
4202 UnwindHelpFI)
4203 .addImm(-2);
4204}
4205
4207 MachineFunction &MF, RegScavenger *RS) const {
4208 auto *X86FI = MF.getInfo<X86MachineFunctionInfo>();
4209
4210 if (STI.is32Bit() && MF.hasEHFunclets())
4212 // We have emitted prolog and epilog. Don't need stack pointer saving
4213 // instruction any more.
4214 if (MachineInstr *MI = X86FI->getStackPtrSaveMI()) {
4215 MI->eraseFromParent();
4216 X86FI->setStackPtrSaveMI(nullptr);
4217 }
4218}
4219
4221 MachineFunction &MF) const {
4222 // 32-bit functions have to restore stack pointers when control is transferred
4223 // back to the parent function. These blocks are identified as eh pads that
4224 // are not funclet entries.
4225 bool IsSEH = isAsynchronousEHPersonality(
4227 for (MachineBasicBlock &MBB : MF) {
4228 bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry();
4229 if (NeedsRestore)
4231 /*RestoreSP=*/IsSEH);
4232 }
4233}
unsigned const MachineRegisterInfo * MRI
static bool isFuncletReturnInstr(const MachineInstr &MI)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const uint64_t kSplitStackAvailable
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Given that RA is a live value
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static cl::opt< int > PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), cl::init(4096), cl::Hidden)
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static bool isTailCallOpcode(unsigned Opc)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Module.h This file contains the declarations for the Module class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static bool is64Bit(const char *name)
static unsigned calculateSetFPREG(uint64_t SPAdjust)
static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary)
GetScratchRegister - Get a temp register for performing work in the segmented stack and the Erlang/Hi...
static unsigned getADDriOpcode(bool IsLP64)
static unsigned getPUSH2Opcode(const X86Subtarget &ST)
static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm)
static unsigned getLEArOpcode(bool IsLP64)
static unsigned getSUBriOpcode(bool IsLP64)
static bool flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB)
Check if the flags need to be preserved before the terminators.
static bool isOpcodeRep(unsigned Opcode)
Return true if an opcode is part of the REP group of instructions.
static unsigned getANDriOpcode(bool IsLP64, int64_t Imm)
static bool isEAXLiveIn(MachineBasicBlock &MBB)
static unsigned getADDrrOpcode(bool IsLP64)
static bool HasNestArgument(const MachineFunction *MF)
static unsigned getPOPOpcode(const X86Subtarget &ST)
static unsigned getPOP2Opcode(const X86Subtarget &ST)
static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD, const StringRef LiteralName)
Lookup an ERTS parameter in the !hipe.literals named metadata node.
static bool blockEndIsUnreachable(const MachineBasicBlock &MBB, MachineBasicBlock::const_iterator MBBI)
static unsigned getSUBrrOpcode(bool IsLP64)
static unsigned getPUSHOpcode(const X86Subtarget &ST)
static const unsigned FramePtr
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
reverse_iterator rend() const
Definition: ArrayRef.h:157
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
reverse_iterator rbegin() const
Definition: ArrayRef.h:156
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
BitVector & reset()
Definition: BitVector.h:392
BitVector & set()
Definition: BitVector.h:351
iterator_range< const_set_bits_iterator > set_bits() const
Definition: BitVector.h:140
static BranchProbability getOne()
static BranchProbability getZero()
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:274
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition: Function.h:868
Constant * getPersonalityFn() const
Get the personality function associated with this function.
Definition: Function.cpp:1934
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:350
size_t arg_size() const
Definition: Function.h:864
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition: Function.h:673
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:225
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:690
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:799
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:548
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:583
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:556
static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_restore says that the rule for Register is now the same as it was at the beginning of the functi...
Definition: MCDwarf.h:616
OpType getOperation() const
Definition: MCDwarf.h:658
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:541
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int Adjustment, SMLoc Loc={})
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Definition: MCDwarf.h:564
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition: MCDwarf.h:647
static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int Size, SMLoc Loc={})
A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE.
Definition: MCDwarf.h:653
const MCObjectFileInfo * getObjectFileInfo() const
Definition: MCContext.h:416
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:414
MCSection * getCompactUnwindSection() const
MCRegAliasIterator enumerates all registers aliasing Reg.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
Metadata node.
Definition: Metadata.h:1067
A single uniqued string.
Definition: Metadata.h:720
StringRef getString() const
Definition: Metadata.cpp:610
Machine Value Type.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isReturnBlock() const
Convenience function that returns true if the block ends in a return instruction.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
@ LQR_Live
Register is known to be (at least partially) live.
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
bool isCleanupFuncletEntry() const
Returns true if this is the entry block of a cleanup funclet.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool adjustsStack() const
Return true if this function adjusts the stack – e.g., when calling another function.
void ensureMaxAlignment(Align Alignment)
Make sure the function is at least Align bytes aligned.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
void setObjectOffset(int ObjectIdx, int64_t SPOffset)
Set the stack frame offset of the specified object.
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool hasOpaqueSPAdjustment() const
Returns true if the function contains opaque dynamic stack adjustments.
void setCVBytesOfCalleeSavedRegisters(unsigned S)
int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
void setOffsetAdjustment(int Adj)
Set the correction for frame offsets.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasCopyImplyingStackAdjustment() const
Returns true if the function contains operations which will lower down to instructions which manipula...
bool hasStackObjects() const
Return true if there are any stack objects in this function.
int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset, bool IsImmutable=false)
Create a spill slot at a fixed location on the stack.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
void setStackSize(uint64_t Size)
Set the size of the stack.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
bool needsFrameMoves() const
True if this function needs frame moves for debug or exceptions.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
bool callsUnwindInit() const
void push_front(MachineBasicBlock *MBB)
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const std::vector< LandingPadInfo > & getLandingPads() const
Return a reference to the landing pad info for the current function.
MachineModuleInfo & getMMI() const
bool shouldSplitStack() const
Should we be emitting segmented stack stuff for the function.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:572
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:498
unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
const MCContext & getContext() const
const Module * getModule() const
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
bool isLiveIn(Register Reg) const
NamedMDNode * getNamedMetadata(const Twine &Name) const
Return the first NamedMDNode in the module with the specified name.
Definition: Module.cpp:262
unsigned getCodeViewFlag() const
Returns the CodeView Version by checking module flags.
Definition: Module.cpp:586
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
iterator end() const
Definition: ArrayRef.h:357
iterator begin() const
Definition: ArrayRef.h:356
A tuple of MDNodes.
Definition: Metadata.h:1729
MDNode * getOperand(unsigned i) const
Definition: Metadata.cpp:1381
unsigned getNumOperands() const
Definition: Metadata.cpp:1377
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:64
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:254
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:49
static StackOffset getFixed(int64_t Fixed)
Definition: TypeSize.h:42
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
static constexpr size_t npos
Definition: StringRef.h:52
Information about stack frame layout on the target.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
int getOffsetOfLocalArea() const
getOffsetOfLocalArea - This method returns the offset of the local area from the stack pointer on ent...
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
const Triple & getTargetTriple() const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
SwiftAsyncFramePointerMode SwiftAsyncFramePointer
Control when and how the Swift async frame pointer bit should be set.
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual Register getFrameRegister(const MachineFunction &MF) const =0
Debug information queries.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
bool isOSWindows() const
Tests whether the OS is Windows.
Definition: Triple.h:624
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Definition: Triple.h:558
Value wrapper in the Metadata hierarchy.
Definition: Metadata.h:450
Value * getValue() const
Definition: Metadata.h:490
bool has128ByteRedZone(const MachineFunction &MF) const
Return true if the function has a redzone (accessible bytes past the frame of the top of stack functi...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
bool needsFrameIndexResolution(const MachineFunction &MF) const override
X86FrameLowering(const X86Subtarget &STI, MaybeAlign StackAlignOverride)
const X86RegisterInfo * TRI
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
MachineBasicBlock::iterator restoreWin32EHStackPointers(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool RestoreSP=false) const
Sets up EBP and optionally ESI based on the incoming EBP value.
int getInitialCFAOffset(const MachineFunction &MF) const override
Return initial CFA offset value i.e.
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a prologue for the target.
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
void emitStackProbe(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, std::optional< MachineFunction::DebugInstrOperandPair > InstrNum=std::nullopt) const
Emit target stack probe code.
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsPrologue) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
StackOffset getFrameIndexReferenceSP(const MachineFunction &MF, int FI, Register &SPReg, int Adjustment) const
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const override
Replace a StackProbe inline-stub with the actual probe code inline.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
const X86InstrInfo & TII
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &DL, int64_t NumBytes, bool InEpilogue) const
Emit a series of instructions to increment / decrement the stack pointer by a constant value.
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a epilogue for the target.
bool Is64Bit
Is64Bit implies that x86_64 instructions are available.
Register getInitialCFARegister(const MachineFunction &MF) const override
Return initial CFA register value i.e.
bool Uses64BitFramePtr
True if the 64-bit frame or stack pointer should be used.
unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Adjust the prologue to have the function use segmented stacks.
DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const override
Return the frame base information to be encoded in the DWARF subprogram debug info.
void emitCalleeSavedFrameMovesFullCFA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const override
Emits Dwarf Info specifying offsets of callee saved registers and frame pointer.
int getWin64EHFrameIndexRef(const MachineFunction &MF, int FI, Register &SPReg) const
bool canUseLEAForSPInEpilogue(const MachineFunction &MF) const
Check that LEA can be used on SP in an epilogue sequence for MF.
bool stackProbeFunctionModifiesSP() const override
Does the stack probe function call return with a modified stack pointer?
void orderFrameObjects(const MachineFunction &MF, SmallVectorImpl< int > &ObjectsToAllocate) const override
Order the symbols in the local stack.
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst, MachineInstr::MIFlag Flag=MachineInstr::NoFlags) const
Wraps up getting a CFI index and building a MachineInstr for it.
int mergeSPUpdates(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, bool doMergeWithPrevious) const
Check the instruction before/after the passed instruction.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
StackOffset getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, Register &FrameReg, bool IgnoreSPUpdates) const override
Same as getFrameIndexReference, except that the stack pointer (as opposed to the frame pointer) will ...
void restoreWinEHStackPointersInParent(MachineFunction &MF) const
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
void adjustForHiPEPrologue(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Erlang programs may need a special prologue to handle the stack size they might need at runtime.
const X86Subtarget & STI
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects=true) const override
int64_t getFrameAdjustment(const MachineInstr &I) const
Returns the stack pointer adjustment that happens inside the frame setup..destroy sequence (e....
Definition: X86InstrInfo.h:215
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
bool isCandidateForPush2Pop2(Register Reg) const
void setRestoreBasePointer(const MachineFunction *MF)
DenseMap< int, unsigned > & getWinEHXMMSlotInfo()
MachineInstr * getStackPtrSaveMI() const
AMXProgModelEnum getAMXProgModel() const
void addCandidateForPush2Pop2(Register Reg)
void setStackPtrSaveMI(MachineInstr *MI)
void setCalleeSavedFrameSize(unsigned bytes)
bool hasBasePointer(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const
findDeadCallerSavedReg - Return a caller-saved register that isn't live when it reaches the "return" ...
Register getStackRegister() const
unsigned getSlotSize() const
Register getFramePtr() const
Returns physical register used as frame pointer.
Register getBaseRegister() const
bool isOSWindows() const
Definition: X86Subtarget.h:334
const X86TargetLowering * getTargetLowering() const override
Definition: X86Subtarget.h:125
bool isTargetDragonFly() const
Definition: X86Subtarget.h:294
bool isTargetWindowsMSVC() const
Definition: X86Subtarget.h:312
bool isTarget64BitILP32() const
Is this x86_64 with the ILP32 programming model (x32 ABI)?
Definition: X86Subtarget.h:180
bool isTargetDarwin() const
Definition: X86Subtarget.h:292
bool isTargetWin64() const
Definition: X86Subtarget.h:336
bool isTarget64BitLP64() const
Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
Definition: X86Subtarget.h:185
bool swiftAsyncContextIsDynamicallySet() const
Return whether FrameLowering should always set the "extended frame present" bit in FP,...
Definition: X86Subtarget.h:398
bool isTargetWindowsCoreCLR() const
Definition: X86Subtarget.h:316
const X86InstrInfo * getInstrInfo() const override
Definition: X86Subtarget.h:129
bool isCallingConvWin64(CallingConv::ID CC) const
Definition: X86Subtarget.h:349
bool isTargetFreeBSD() const
Definition: X86Subtarget.h:293
bool isTargetNaCl64() const
Definition: X86Subtarget.h:308
bool isTargetWin32() const
Definition: X86Subtarget.h:338
bool useIndirectThunkCalls() const
Definition: X86Subtarget.h:230
bool isTargetLinux() const
Definition: X86Subtarget.h:302
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
StringRef getStackProbeSymbolName(const MachineFunction &MF) const override
Returns the name of the symbol used to emit stack probes or the empty string if not applicable.
bool hasStackProbeSymbol(const MachineFunction &MF) const override
Returns true if stack probing through a function call is requested.
unsigned getStackProbeSize(const MachineFunction &MF) const
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint16_t StackAdjustment(const RuntimeFunction &RF)
StackAdjustment - calculated stack adjustment in words.
Definition: ARMWinEH.h:199
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition: CallingConv.h:53
@ X86_INTR
x86 hardware interrupt context.
Definition: CallingConv.h:173
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ X86_FastCall
'fast' analog of X86_StdCall.
Definition: CallingConv.h:103
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:403
CallingConvention
Definition: Dwarf.h:738
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
@ DwarfCFI
DWARF-like instruction based exceptions.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
static const MachineInstrBuilder & addRegOffset(const MachineInstrBuilder &MIB, unsigned Reg, bool isKill, int Offset)
addRegOffset - This function is used to add a memory reference of the form [Reg + Offset],...
@ Always
Always set the bit.
@ DeploymentBased
Determine whether to set the bit statically or dynamically based on the deployment target.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a SLEB128 value to an output stream.
Definition: LEB128.h:23
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition: LEB128.h:80
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:483
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
Definition: LivePhysRegs.h:215
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Pair of physical register and lane mask.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
enum llvm::TargetFrameLowering::DwarfFrameBase::FrameBaseKind Kind
union llvm::TargetFrameLowering::DwarfFrameBase::@240 Location
SmallVector< WinEHTryBlockMapEntry, 4 > TryBlockMap
Definition: WinEHFuncInfo.h:97
SmallVector< WinEHHandlerType, 1 > HandlerArray
Definition: WinEHFuncInfo.h:76