LLVM 20.0.0git
X86FrameLowering.cpp
Go to the documentation of this file.
1//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86FrameLowering.h"
15#include "X86InstrBuilder.h"
16#include "X86InstrInfo.h"
18#include "X86Subtarget.h"
19#include "X86TargetMachine.h"
20#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/Function.h"
31#include "llvm/IR/Module.h"
32#include "llvm/MC/MCAsmInfo.h"
34#include "llvm/MC/MCSymbol.h"
35#include "llvm/Support/Debug.h"
36#include "llvm/Support/LEB128.h"
38#include <cstdlib>
39
40#define DEBUG_TYPE "x86-fl"
41
42STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
43STATISTIC(NumFrameExtraProbe,
44 "Number of extra stack probes generated in prologue");
45STATISTIC(NumFunctionUsingPush2Pop2, "Number of funtions using push2/pop2");
46
47using namespace llvm;
48
50 MaybeAlign StackAlignOverride)
51 : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(),
52 STI.is64Bit() ? -8 : -4),
53 STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
54 // Cache a bunch of frame-related predicates for this subtarget.
56 Is64Bit = STI.is64Bit();
58 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
61}
62
64 return !MF.getFrameInfo().hasVarSizedObjects() &&
65 !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
66 !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
67}
68
69/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
70/// call frame pseudos can be simplified. Having a FP, as in the default
71/// implementation, is not sufficient here since we can't always use it.
72/// Use a more nuanced condition.
74 const MachineFunction &MF) const {
75 return hasReservedCallFrame(MF) ||
76 MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
77 (hasFP(MF) && !TRI->hasStackRealignment(MF)) ||
79}
80
81// needsFrameIndexResolution - Do we need to perform FI resolution for
82// this function. Normally, this is required only when the function
83// has any stack objects. However, FI resolution actually has another job,
84// not apparent from the title - it resolves callframesetup/destroy
85// that were not simplified earlier.
86// So, this is required for x86 functions that have push sequences even
87// when there are no stack objects.
89 const MachineFunction &MF) const {
90 return MF.getFrameInfo().hasStackObjects() ||
91 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
92}
93
94/// hasFP - Return true if the specified function should have a dedicated frame
95/// pointer register. This is true if the function has variable sized allocas
96/// or if frame pointer elimination is disabled.
98 const MachineFrameInfo &MFI = MF.getFrameInfo();
99 return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
100 TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
104 MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
105 MFI.hasStackMap() || MFI.hasPatchPoint() ||
106 (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment()));
107}
108
109static unsigned getSUBriOpcode(bool IsLP64) {
110 return IsLP64 ? X86::SUB64ri32 : X86::SUB32ri;
111}
112
113static unsigned getADDriOpcode(bool IsLP64) {
114 return IsLP64 ? X86::ADD64ri32 : X86::ADD32ri;
115}
116
117static unsigned getSUBrrOpcode(bool IsLP64) {
118 return IsLP64 ? X86::SUB64rr : X86::SUB32rr;
119}
120
121static unsigned getADDrrOpcode(bool IsLP64) {
122 return IsLP64 ? X86::ADD64rr : X86::ADD32rr;
123}
124
125static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
126 return IsLP64 ? X86::AND64ri32 : X86::AND32ri;
127}
128
129static unsigned getLEArOpcode(bool IsLP64) {
130 return IsLP64 ? X86::LEA64r : X86::LEA32r;
131}
132
133static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) {
134 if (Use64BitReg) {
135 if (isUInt<32>(Imm))
136 return X86::MOV32ri64;
137 if (isInt<32>(Imm))
138 return X86::MOV64ri32;
139 return X86::MOV64ri;
140 }
141 return X86::MOV32ri;
142}
143
144// Push-Pop Acceleration (PPX) hint is used to indicate that the POP reads the
145// value written by the PUSH from the stack. The processor tracks these marked
146// instructions internally and fast-forwards register data between matching PUSH
147// and POP instructions, without going through memory or through the training
148// loop of the Fast Store Forwarding Predictor (FSFP). Instead, a more efficient
149// memory-renaming optimization can be used.
150//
151// The PPX hint is purely a performance hint. Instructions with this hint have
152// the same functional semantics as those without. PPX hints set by the
153// compiler that violate the balancing rule may turn off the PPX optimization,
154// but they will not affect program semantics.
155//
156// Hence, PPX is used for balanced spill/reloads (Exceptions and setjmp/longjmp
157// are not considered).
158//
159// PUSH2 and POP2 are instructions for (respectively) pushing/popping 2
160// GPRs at a time to/from the stack.
161static unsigned getPUSHOpcode(const X86Subtarget &ST) {
162 return ST.is64Bit() ? (ST.hasPPX() ? X86::PUSHP64r : X86::PUSH64r)
163 : X86::PUSH32r;
164}
165static unsigned getPOPOpcode(const X86Subtarget &ST) {
166 return ST.is64Bit() ? (ST.hasPPX() ? X86::POPP64r : X86::POP64r)
167 : X86::POP32r;
168}
169static unsigned getPUSH2Opcode(const X86Subtarget &ST) {
170 return ST.hasPPX() ? X86::PUSH2P : X86::PUSH2;
171}
172static unsigned getPOP2Opcode(const X86Subtarget &ST) {
173 return ST.hasPPX() ? X86::POP2P : X86::POP2;
174}
175
178 unsigned Reg = RegMask.PhysReg;
179
180 if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
181 Reg == X86::AH || Reg == X86::AL)
182 return true;
183 }
184
185 return false;
186}
187
188/// Check if the flags need to be preserved before the terminators.
189/// This would be the case, if the eflags is live-in of the region
190/// composed by the terminators or live-out of that region, without
191/// being defined by a terminator.
192static bool
194 for (const MachineInstr &MI : MBB.terminators()) {
195 bool BreakNext = false;
196 for (const MachineOperand &MO : MI.operands()) {
197 if (!MO.isReg())
198 continue;
199 Register Reg = MO.getReg();
200 if (Reg != X86::EFLAGS)
201 continue;
202
203 // This terminator needs an eflags that is not defined
204 // by a previous another terminator:
205 // EFLAGS is live-in of the region composed by the terminators.
206 if (!MO.isDef())
207 return true;
208 // This terminator defines the eflags, i.e., we don't need to preserve it.
209 // However, we still need to check this specific terminator does not
210 // read a live-in value.
211 BreakNext = true;
212 }
213 // We found a definition of the eflags, no need to preserve them.
214 if (BreakNext)
215 return false;
216 }
217
218 // None of the terminators use or define the eflags.
219 // Check if they are live-out, that would imply we need to preserve them.
220 for (const MachineBasicBlock *Succ : MBB.successors())
221 if (Succ->isLiveIn(X86::EFLAGS))
222 return true;
223
224 return false;
225}
226
227/// emitSPUpdate - Emit a series of instructions to increment / decrement the
228/// stack pointer by a constant value.
231 const DebugLoc &DL, int64_t NumBytes,
232 bool InEpilogue) const {
233 bool isSub = NumBytes < 0;
234 uint64_t Offset = isSub ? -NumBytes : NumBytes;
237
238 uint64_t Chunk = (1LL << 31) - 1;
239
243 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
244
245 // It's ok to not take into account large chunks when probing, as the
246 // allocation is split in smaller chunks anyway.
247 if (EmitInlineStackProbe && !InEpilogue) {
248
249 // This pseudo-instruction is going to be expanded, potentially using a
250 // loop, by inlineStackProbe().
251 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)).addImm(Offset);
252 return;
253 } else if (Offset > Chunk) {
254 // Rather than emit a long series of instructions for large offsets,
255 // load the offset into a register and do one sub/add
256 unsigned Reg = 0;
257 unsigned Rax = (unsigned)(Is64Bit ? X86::RAX : X86::EAX);
258
259 if (isSub && !isEAXLiveIn(MBB))
260 Reg = Rax;
261 else
263
264 unsigned AddSubRROpc =
266 if (Reg) {
268 .addImm(Offset)
269 .setMIFlag(Flag);
270 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr)
272 .addReg(Reg);
273 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
274 return;
275 } else if (Offset > 8 * Chunk) {
276 // If we would need more than 8 add or sub instructions (a >16GB stack
277 // frame), it's worth spilling RAX to materialize this immediate.
278 // pushq %rax
279 // movabsq +-$Offset+-SlotSize, %rax
280 // addq %rsp, %rax
281 // xchg %rax, (%rsp)
282 // movq (%rsp), %rsp
283 assert(Is64Bit && "can't have 32-bit 16GB stack frame");
284 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
286 .setMIFlag(Flag);
287 // Subtract is not commutative, so negate the offset and always use add.
288 // Subtract 8 less and add 8 more to account for the PUSH we just did.
289 if (isSub)
290 Offset = -(Offset - SlotSize);
291 else
294 .addImm(Offset)
295 .setMIFlag(Flag);
296 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax)
297 .addReg(Rax)
299 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
300 // Exchange the new SP in RAX with the top of the stack.
302 BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax),
303 StackPtr, false, 0);
304 // Load new SP from the top of the stack into RSP.
305 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr),
306 StackPtr, false, 0);
307 return;
308 }
309 }
310
311 while (Offset) {
312 uint64_t ThisVal = std::min(Offset, Chunk);
313 if (ThisVal == SlotSize) {
314 // Use push / pop for slot sized adjustments as a size optimization. We
315 // need to find a dead register when using pop.
316 unsigned Reg = isSub ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
318 if (Reg) {
319 unsigned Opc = isSub ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
320 : (Is64Bit ? X86::POP64r : X86::POP32r);
321 BuildMI(MBB, MBBI, DL, TII.get(Opc))
322 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub))
323 .setMIFlag(Flag);
324 Offset -= ThisVal;
325 continue;
326 }
327 }
328
329 BuildStackAdjustment(MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue)
330 .setMIFlag(Flag);
331
332 Offset -= ThisVal;
333 }
334}
335
336MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
338 const DebugLoc &DL, int64_t Offset, bool InEpilogue) const {
339 assert(Offset != 0 && "zero offset stack adjustment requested");
340
341 // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue
342 // is tricky.
343 bool UseLEA;
344 if (!InEpilogue) {
345 // Check if inserting the prologue at the beginning
346 // of MBB would require to use LEA operations.
347 // We need to use LEA operations if EFLAGS is live in, because
348 // it means an instruction will read it before it gets defined.
349 UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS);
350 } else {
351 // If we can use LEA for SP but we shouldn't, check that none
352 // of the terminators uses the eflags. Otherwise we will insert
353 // a ADD that will redefine the eflags and break the condition.
354 // Alternatively, we could move the ADD, but this may not be possible
355 // and is an optimization anyway.
357 if (UseLEA && !STI.useLeaForSP())
359 // If that assert breaks, that means we do not do the right thing
360 // in canUseAsEpilogue.
362 "We shouldn't have allowed this insertion point");
363 }
364
366 if (UseLEA) {
369 StackPtr),
370 StackPtr, false, Offset);
371 } else {
372 bool IsSub = Offset < 0;
373 uint64_t AbsOffset = IsSub ? -Offset : Offset;
374 const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr)
376 MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
378 .addImm(AbsOffset);
379 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
380 }
381 return MI;
382}
383
386 bool doMergeWithPrevious) const {
387 if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
388 (!doMergeWithPrevious && MBBI == MBB.end()))
389 return 0;
390
391 MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
392
394 // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI
395 // instruction, and that there are no DBG_VALUE or other instructions between
396 // ADD/SUB/LEA and its corresponding CFI instruction.
397 /* TODO: Add support for the case where there are multiple CFI instructions
398 below the ADD/SUB/LEA, e.g.:
399 ...
400 add
401 cfi_def_cfa_offset
402 cfi_offset
403 ...
404 */
405 if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
406 PI = std::prev(PI);
407
408 unsigned Opc = PI->getOpcode();
409 int Offset = 0;
410
411 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
412 PI->getOperand(0).getReg() == StackPtr) {
413 assert(PI->getOperand(1).getReg() == StackPtr);
414 Offset = PI->getOperand(2).getImm();
415 } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
416 PI->getOperand(0).getReg() == StackPtr &&
417 PI->getOperand(1).getReg() == StackPtr &&
418 PI->getOperand(2).getImm() == 1 &&
419 PI->getOperand(3).getReg() == X86::NoRegister &&
420 PI->getOperand(5).getReg() == X86::NoRegister) {
421 // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
422 Offset = PI->getOperand(4).getImm();
423 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
424 PI->getOperand(0).getReg() == StackPtr) {
425 assert(PI->getOperand(1).getReg() == StackPtr);
426 Offset = -PI->getOperand(2).getImm();
427 } else
428 return 0;
429
430 PI = MBB.erase(PI);
431 if (PI != MBB.end() && PI->isCFIInstruction()) {
432 auto CIs = MBB.getParent()->getFrameInstructions();
433 MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()];
436 PI = MBB.erase(PI);
437 }
438 if (!doMergeWithPrevious)
440
441 return Offset;
442}
443
446 const DebugLoc &DL,
447 const MCCFIInstruction &CFIInst,
448 MachineInstr::MIFlag Flag) const {
450 unsigned CFIIndex = MF.addFrameInst(CFIInst);
451
453 MF.getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true);
454
455 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
456 .addCFIIndex(CFIIndex)
457 .setMIFlag(Flag);
458}
459
460/// Emits Dwarf Info specifying offsets of callee saved registers and
461/// frame pointer. This is called only when basic block sections are enabled.
465 if (!hasFP(MF)) {
467 return;
468 }
471 const Register MachineFramePtr =
473 : FramePtr;
474 unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true);
475 // Offset = space for return address + size of the frame pointer itself.
476 int64_t Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4);
478 MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset));
480}
481
484 const DebugLoc &DL, bool IsPrologue) const {
486 MachineFrameInfo &MFI = MF.getFrameInfo();
489
490 // Add callee saved registers to move list.
491 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
492
493 // Calculate offsets.
494 for (const CalleeSavedInfo &I : CSI) {
495 int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
496 Register Reg = I.getReg();
497 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
498
499 if (IsPrologue) {
500 if (X86FI->getStackPtrSaveMI()) {
501 // +2*SlotSize because there is return address and ebp at the bottom
502 // of the stack.
503 // | retaddr |
504 // | ebp |
505 // | |<--ebp
506 Offset += 2 * SlotSize;
507 SmallString<64> CfaExpr;
508 CfaExpr.push_back(dwarf::DW_CFA_expression);
509 uint8_t buffer[16];
510 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
511 CfaExpr.push_back(2);
513 const Register MachineFramePtr =
516 : FramePtr;
517 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
518 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
519 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
521 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
523 } else {
525 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
526 }
527 } else {
529 MCCFIInstruction::createRestore(nullptr, DwarfReg));
530 }
531 }
532 if (auto *MI = X86FI->getStackPtrSaveMI()) {
533 int FI = MI->getOperand(1).getIndex();
534 int64_t Offset = MFI.getObjectOffset(FI) + 2 * SlotSize;
535 SmallString<64> CfaExpr;
537 const Register MachineFramePtr =
540 : FramePtr;
541 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
542 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
543 uint8_t buffer[16];
544 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
545 CfaExpr.push_back(dwarf::DW_OP_deref);
546
547 SmallString<64> DefCfaExpr;
548 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
549 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
550 DefCfaExpr.append(CfaExpr.str());
551 // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref
553 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
555 }
556}
557
558void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
559 MachineBasicBlock &MBB) const {
560 const MachineFunction &MF = *MBB.getParent();
561
562 // Insertion point.
564
565 // Fake a debug loc.
566 DebugLoc DL;
567 if (MBBI != MBB.end())
568 DL = MBBI->getDebugLoc();
569
570 // Zero out FP stack if referenced. Do this outside of the loop below so that
571 // it's done only once.
572 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
573 for (MCRegister Reg : RegsToZero.set_bits()) {
574 if (!X86::RFP80RegClass.contains(Reg))
575 continue;
576
577 unsigned NumFPRegs = ST.is64Bit() ? 8 : 7;
578 for (unsigned i = 0; i != NumFPRegs; ++i)
579 BuildMI(MBB, MBBI, DL, TII.get(X86::LD_F0));
580
581 for (unsigned i = 0; i != NumFPRegs; ++i)
582 BuildMI(MBB, MBBI, DL, TII.get(X86::ST_FPrr)).addReg(X86::ST0);
583 break;
584 }
585
586 // For GPRs, we only care to clear out the 32-bit register.
587 BitVector GPRsToZero(TRI->getNumRegs());
588 for (MCRegister Reg : RegsToZero.set_bits())
589 if (TRI->isGeneralPurposeRegister(MF, Reg)) {
590 GPRsToZero.set(getX86SubSuperRegister(Reg, 32));
591 RegsToZero.reset(Reg);
592 }
593
594 // Zero out the GPRs first.
595 for (MCRegister Reg : GPRsToZero.set_bits())
597
598 // Zero out the remaining registers.
599 for (MCRegister Reg : RegsToZero.set_bits())
601}
602
605 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
606 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
609 if (InProlog) {
610 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING))
611 .addImm(0 /* no explicit stack size */);
612 } else {
613 emitStackProbeInline(MF, MBB, MBBI, DL, false);
614 }
615 } else {
616 emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum);
617 }
618}
619
621 return STI.isOSWindows() && !STI.isTargetWin64();
622}
623
625 MachineBasicBlock &PrologMBB) const {
626 auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
627 return MI.getOpcode() == X86::STACKALLOC_W_PROBING;
628 });
629 if (Where != PrologMBB.end()) {
630 DebugLoc DL = PrologMBB.findDebugLoc(Where);
631 emitStackProbeInline(MF, PrologMBB, Where, DL, true);
632 Where->eraseFromParent();
633 }
634}
635
636void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
639 const DebugLoc &DL,
640 bool InProlog) const {
642 if (STI.isTargetWindowsCoreCLR() && STI.is64Bit())
643 emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog);
644 else
645 emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog);
646}
647
648void X86FrameLowering::emitStackProbeInlineGeneric(
650 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
651 MachineInstr &AllocWithProbe = *MBBI;
652 uint64_t Offset = AllocWithProbe.getOperand(0).getImm();
653
656 assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&
657 "different expansion expected for CoreCLR 64 bit");
658
659 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
660 uint64_t ProbeChunk = StackProbeSize * 8;
661
662 uint64_t MaxAlign =
663 TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
664
665 // Synthesize a loop or unroll it, depending on the number of iterations.
666 // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
667 // between the unaligned rsp and current rsp.
668 if (Offset > ProbeChunk) {
669 emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
670 MaxAlign % StackProbeSize);
671 } else {
672 emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
673 MaxAlign % StackProbeSize);
674 }
675}
676
677void X86FrameLowering::emitStackProbeInlineGenericBlock(
680 uint64_t AlignOffset) const {
681
682 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
683 const bool HasFP = hasFP(MF);
686 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
687 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
688
689 uint64_t CurrentOffset = 0;
690
691 assert(AlignOffset < StackProbeSize);
692
693 // If the offset is so small it fits within a page, there's nothing to do.
694 if (StackProbeSize < Offset + AlignOffset) {
695
696 uint64_t StackAdjustment = StackProbeSize - AlignOffset;
697 BuildStackAdjustment(MBB, MBBI, DL, -StackAdjustment, /*InEpilogue=*/false)
699 if (!HasFP && NeedsDwarfCFI) {
700 BuildCFI(
701 MBB, MBBI, DL,
702 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
703 }
704
705 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
707 StackPtr, false, 0)
708 .addImm(0)
710 NumFrameExtraProbe++;
711 CurrentOffset = StackProbeSize - AlignOffset;
712 }
713
714 // For the next N - 1 pages, just probe. I tried to take advantage of
715 // natural probes but it implies much more logic and there was very few
716 // interesting natural probes to interleave.
717 while (CurrentOffset + StackProbeSize < Offset) {
718 BuildStackAdjustment(MBB, MBBI, DL, -StackProbeSize, /*InEpilogue=*/false)
720
721 if (!HasFP && NeedsDwarfCFI) {
722 BuildCFI(
723 MBB, MBBI, DL,
724 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize));
725 }
726 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
728 StackPtr, false, 0)
729 .addImm(0)
731 NumFrameExtraProbe++;
732 CurrentOffset += StackProbeSize;
733 }
734
735 // No need to probe the tail, it is smaller than a Page.
736 uint64_t ChunkSize = Offset - CurrentOffset;
737 if (ChunkSize == SlotSize) {
738 // Use push for slot sized adjustments as a size optimization,
739 // like emitSPUpdate does when not probing.
740 unsigned Reg = Is64Bit ? X86::RAX : X86::EAX;
741 unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
742 BuildMI(MBB, MBBI, DL, TII.get(Opc))
745 } else {
746 BuildStackAdjustment(MBB, MBBI, DL, -ChunkSize, /*InEpilogue=*/false)
748 }
749 // No need to adjust Dwarf CFA offset here, the last position of the stack has
750 // been defined
751}
752
753void X86FrameLowering::emitStackProbeInlineGenericLoop(
756 uint64_t AlignOffset) const {
757 assert(Offset && "null offset");
758
759 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
761 "Inline stack probe loop will clobber live EFLAGS.");
762
763 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
764 const bool HasFP = hasFP(MF);
767 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
768 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
769
770 if (AlignOffset) {
771 if (AlignOffset < StackProbeSize) {
772 // Perform a first smaller allocation followed by a probe.
773 BuildStackAdjustment(MBB, MBBI, DL, -AlignOffset, /*InEpilogue=*/false)
775
776 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
778 StackPtr, false, 0)
779 .addImm(0)
781 NumFrameExtraProbe++;
782 Offset -= AlignOffset;
783 }
784 }
785
786 // Synthesize a loop
787 NumFrameLoopProbe++;
788 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
789
790 MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB);
791 MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB);
792
794 MF.insert(MBBIter, testMBB);
795 MF.insert(MBBIter, tailMBB);
796
797 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
798 : Is64Bit ? X86::R11D
799 : X86::EAX;
800
801 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
804
805 // save loop bound
806 {
807 const unsigned BoundOffset = alignDown(Offset, StackProbeSize);
808 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
809 BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
810 .addReg(FinalStackProbed)
811 .addImm(BoundOffset)
813
814 // while in the loop, use loop-invariant reg for CFI,
815 // instead of the stack pointer, which changes during the loop
816 if (!HasFP && NeedsDwarfCFI) {
817 // x32 uses the same DWARF register numbers as x86-64,
818 // so there isn't a register number for r11d, we must use r11 instead
819 const Register DwarfFinalStackProbed =
821 ? Register(getX86SubSuperRegister(FinalStackProbed, 64))
822 : FinalStackProbed;
823
826 nullptr, TRI->getDwarfRegNum(DwarfFinalStackProbed, true)));
828 MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset));
829 }
830 }
831
832 // allocate a page
833 BuildStackAdjustment(*testMBB, testMBB->end(), DL, -StackProbeSize,
834 /*InEpilogue=*/false)
836
837 // touch the page
838 addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
840 StackPtr, false, 0)
841 .addImm(0)
843
844 // cmp with stack pointer bound
845 BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
847 .addReg(FinalStackProbed)
849
850 // jump
851 BuildMI(testMBB, DL, TII.get(X86::JCC_1))
852 .addMBB(testMBB)
855 testMBB->addSuccessor(testMBB);
856 testMBB->addSuccessor(tailMBB);
857
858 // BB management
859 tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end());
861 MBB.addSuccessor(testMBB);
862
863 // handle tail
864 const uint64_t TailOffset = Offset % StackProbeSize;
865 MachineBasicBlock::iterator TailMBBIter = tailMBB->begin();
866 if (TailOffset) {
867 BuildStackAdjustment(*tailMBB, TailMBBIter, DL, -TailOffset,
868 /*InEpilogue=*/false)
870 }
871
872 // after the loop, switch back to stack pointer for CFI
873 if (!HasFP && NeedsDwarfCFI) {
874 // x32 uses the same DWARF register numbers as x86-64,
875 // so there isn't a register number for esp, we must use rsp instead
876 const Register DwarfStackPtr =
880
881 BuildCFI(*tailMBB, TailMBBIter, DL,
883 nullptr, TRI->getDwarfRegNum(DwarfStackPtr, true)));
884 }
885
886 // Update Live In information
887 fullyRecomputeLiveIns({tailMBB, testMBB});
888}
889
890void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
892 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
894 assert(STI.is64Bit() && "different expansion needed for 32 bit");
895 assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
897 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
898
899 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
901 "Inline stack probe loop will clobber live EFLAGS.");
902
903 // RAX contains the number of bytes of desired stack adjustment.
904 // The handling here assumes this value has already been updated so as to
905 // maintain stack alignment.
906 //
907 // We need to exit with RSP modified by this amount and execute suitable
908 // page touches to notify the OS that we're growing the stack responsibly.
909 // All stack probing must be done without modifying RSP.
910 //
911 // MBB:
912 // SizeReg = RAX;
913 // ZeroReg = 0
914 // CopyReg = RSP
915 // Flags, TestReg = CopyReg - SizeReg
916 // FinalReg = !Flags.Ovf ? TestReg : ZeroReg
917 // LimitReg = gs magic thread env access
918 // if FinalReg >= LimitReg goto ContinueMBB
919 // RoundBB:
920 // RoundReg = page address of FinalReg
921 // LoopMBB:
922 // LoopReg = PHI(LimitReg,ProbeReg)
923 // ProbeReg = LoopReg - PageSize
924 // [ProbeReg] = 0
925 // if (ProbeReg > RoundReg) goto LoopMBB
926 // ContinueMBB:
927 // RSP = RSP - RAX
928 // [rest of original MBB]
929
930 // Set up the new basic blocks
931 MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB);
932 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
933 MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB);
934
935 MachineFunction::iterator MBBIter = std::next(MBB.getIterator());
936 MF.insert(MBBIter, RoundMBB);
937 MF.insert(MBBIter, LoopMBB);
938 MF.insert(MBBIter, ContinueMBB);
939
940 // Split MBB and move the tail portion down to ContinueMBB.
941 MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI);
942 ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end());
944
945 // Some useful constants
946 const int64_t ThreadEnvironmentStackLimit = 0x10;
947 const int64_t PageSize = 0x1000;
948 const int64_t PageMask = ~(PageSize - 1);
949
950 // Registers we need. For the normal case we use virtual
951 // registers. For the prolog expansion we use RAX, RCX and RDX.
953 const TargetRegisterClass *RegClass = &X86::GR64RegClass;
954 const Register
955 SizeReg = InProlog ? X86::RAX : MRI.createVirtualRegister(RegClass),
956 ZeroReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
957 CopyReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
958 TestReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
959 FinalReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
960 RoundedReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
961 LimitReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
962 JoinReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
963 ProbeReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass);
964
965 // SP-relative offsets where we can save RCX and RDX.
966 int64_t RCXShadowSlot = 0;
967 int64_t RDXShadowSlot = 0;
968
969 // If inlining in the prolog, save RCX and RDX.
970 if (InProlog) {
971 // Compute the offsets. We need to account for things already
972 // pushed onto the stack at this point: return address, frame
973 // pointer (if used), and callee saves.
975 const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
976 const bool HasFP = hasFP(MF);
977
978 // Check if we need to spill RCX and/or RDX.
979 // Here we assume that no earlier prologue instruction changes RCX and/or
980 // RDX, so checking the block live-ins is enough.
981 const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX);
982 const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX);
983 int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
984 // Assign the initial slot to both registers, then change RDX's slot if both
985 // need to be spilled.
986 if (IsRCXLiveIn)
987 RCXShadowSlot = InitSlot;
988 if (IsRDXLiveIn)
989 RDXShadowSlot = InitSlot;
990 if (IsRDXLiveIn && IsRCXLiveIn)
991 RDXShadowSlot += 8;
992 // Emit the saves if needed.
993 if (IsRCXLiveIn)
994 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
995 RCXShadowSlot)
996 .addReg(X86::RCX);
997 if (IsRDXLiveIn)
998 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
999 RDXShadowSlot)
1000 .addReg(X86::RDX);
1001 } else {
1002 // Not in the prolog. Copy RAX to a virtual reg.
1003 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
1004 }
1005
1006 // Add code to MBB to check for overflow and set the new target stack pointer
1007 // to zero if so.
1008 BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg)
1009 .addReg(ZeroReg, RegState::Undef)
1010 .addReg(ZeroReg, RegState::Undef);
1011 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP);
1012 BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
1013 .addReg(CopyReg)
1014 .addReg(SizeReg);
1015 BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg)
1016 .addReg(TestReg)
1017 .addReg(ZeroReg)
1019
1020 // FinalReg now holds final stack pointer value, or zero if
1021 // allocation would overflow. Compare against the current stack
1022 // limit from the thread environment block. Note this limit is the
1023 // lowest touched page on the stack, not the point at which the OS
1024 // will cause an overflow exception, so this is just an optimization
1025 // to avoid unnecessarily touching pages that are below the current
1026 // SP but already committed to the stack by the OS.
1027 BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg)
1028 .addReg(0)
1029 .addImm(1)
1030 .addReg(0)
1031 .addImm(ThreadEnvironmentStackLimit)
1032 .addReg(X86::GS);
1033 BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
1034 // Jump if the desired stack pointer is at or above the stack limit.
1035 BuildMI(&MBB, DL, TII.get(X86::JCC_1))
1036 .addMBB(ContinueMBB)
1038
1039 // Add code to roundMBB to round the final stack pointer to a page boundary.
1040 RoundMBB->addLiveIn(FinalReg);
1041 BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
1042 .addReg(FinalReg)
1043 .addImm(PageMask);
1044 BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB);
1045
1046 // LimitReg now holds the current stack limit, RoundedReg page-rounded
1047 // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
1048 // and probe until we reach RoundedReg.
1049 if (!InProlog) {
1050 BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg)
1051 .addReg(LimitReg)
1052 .addMBB(RoundMBB)
1053 .addReg(ProbeReg)
1054 .addMBB(LoopMBB);
1055 }
1056
1057 LoopMBB->addLiveIn(JoinReg);
1058 addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
1059 false, -PageSize);
1060
1061 // Probe by storing a byte onto the stack.
1062 BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi))
1063 .addReg(ProbeReg)
1064 .addImm(1)
1065 .addReg(0)
1066 .addImm(0)
1067 .addReg(0)
1068 .addImm(0);
1069
1070 LoopMBB->addLiveIn(RoundedReg);
1071 BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
1072 .addReg(RoundedReg)
1073 .addReg(ProbeReg);
1074 BuildMI(LoopMBB, DL, TII.get(X86::JCC_1))
1075 .addMBB(LoopMBB)
1077
1078 MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
1079
1080 // If in prolog, restore RDX and RCX.
1081 if (InProlog) {
1082 if (RCXShadowSlot) // It means we spilled RCX in the prologue.
1083 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1084 TII.get(X86::MOV64rm), X86::RCX),
1085 X86::RSP, false, RCXShadowSlot);
1086 if (RDXShadowSlot) // It means we spilled RDX in the prologue.
1087 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1088 TII.get(X86::MOV64rm), X86::RDX),
1089 X86::RSP, false, RDXShadowSlot);
1090 }
1091
1092 // Now that the probing is done, add code to continueMBB to update
1093 // the stack pointer for real.
1094 ContinueMBB->addLiveIn(SizeReg);
1095 BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
1096 .addReg(X86::RSP)
1097 .addReg(SizeReg);
1098
1099 // Add the control flow edges we need.
1100 MBB.addSuccessor(ContinueMBB);
1101 MBB.addSuccessor(RoundMBB);
1102 RoundMBB->addSuccessor(LoopMBB);
1103 LoopMBB->addSuccessor(ContinueMBB);
1104 LoopMBB->addSuccessor(LoopMBB);
1105
1106 // Mark all the instructions added to the prolog as frame setup.
1107 if (InProlog) {
1108 for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) {
1109 BeforeMBBI->setFlag(MachineInstr::FrameSetup);
1110 }
1111 for (MachineInstr &MI : *RoundMBB) {
1113 }
1114 for (MachineInstr &MI : *LoopMBB) {
1116 }
1117 for (MachineInstr &MI :
1118 llvm::make_range(ContinueMBB->begin(), ContinueMBBI)) {
1120 }
1121 }
1122}
1123
1124void X86FrameLowering::emitStackProbeCall(
1126 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
1127 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
1128 bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
1129
1130 // FIXME: Add indirect thunk support and remove this.
1131 if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
1132 report_fatal_error("Emitting stack probe calls on 64-bit with the large "
1133 "code model and indirect thunks not yet implemented.");
1134
1135 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
1137 "Stack probe calls will clobber live EFLAGS.");
1138
1139 unsigned CallOp;
1140 if (Is64Bit)
1141 CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
1142 else
1143 CallOp = X86::CALLpcrel32;
1144
1146
1148 MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI);
1149
1150 // All current stack probes take AX and SP as input, clobber flags, and
1151 // preserve all registers. x86_64 probes leave RSP unmodified.
1153 // For the large code model, we have to call through a register. Use R11,
1154 // as it is scratch in all supported calling conventions.
1155 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
1157 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
1158 } else {
1159 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp))
1161 }
1162
1163 unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX;
1164 unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP;
1170
1171 MachineInstr *ModInst = CI;
1172 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1173 // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
1174 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
1175 // themselves. They also does not clobber %rax so we can reuse it when
1176 // adjusting %rsp.
1177 // All other platforms do not specify a particular ABI for the stack probe
1178 // function, so we arbitrarily define it to not adjust %esp/%rsp itself.
1179 ModInst =
1181 .addReg(SP)
1182 .addReg(AX);
1183 }
1184
1185 // DebugInfo variable locations -- if there's an instruction number for the
1186 // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that
1187 // modifies SP.
1188 if (InstrNum) {
1189 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1190 // Label destination operand of the subtract.
1191 MF.makeDebugValueSubstitution(*InstrNum,
1192 {ModInst->getDebugInstrNum(), 0});
1193 } else {
1194 // Label the call. The operand number is the penultimate operand, zero
1195 // based.
1196 unsigned SPDefOperand = ModInst->getNumOperands() - 2;
1198 *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand});
1199 }
1200 }
1201
1202 if (InProlog) {
1203 // Apply the frame setup flag to all inserted instrs.
1204 for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
1205 ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
1206 }
1207}
1208
1209static unsigned calculateSetFPREG(uint64_t SPAdjust) {
1210 // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
1211 // and might require smaller successive adjustments.
1212 const uint64_t Win64MaxSEHOffset = 128;
1213 uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
1214 // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
1215 return SEHFrameOffset & -16;
1216}
1217
1218// If we're forcing a stack realignment we can't rely on just the frame
1219// info, we need to know the ABI stack alignment as well in case we
1220// have a call out. Otherwise just make sure we have some alignment - we'll
1221// go with the minimum SlotSize.
1223X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
1224 const MachineFrameInfo &MFI = MF.getFrameInfo();
1225 Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment.
1226 Align StackAlign = getStackAlign();
1227 bool HasRealign = MF.getFunction().hasFnAttribute("stackrealign");
1228 if (HasRealign) {
1229 if (MFI.hasCalls())
1230 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
1231 else if (MaxAlign < SlotSize)
1232 MaxAlign = Align(SlotSize);
1233 }
1234
1236 if (HasRealign)
1237 MaxAlign = (MaxAlign > 16) ? MaxAlign : Align(16);
1238 else
1239 MaxAlign = Align(16);
1240 }
1241 return MaxAlign.value();
1242}
1243
1244void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
1246 const DebugLoc &DL, unsigned Reg,
1247 uint64_t MaxAlign) const {
1248 uint64_t Val = -MaxAlign;
1249 unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
1250
1251 MachineFunction &MF = *MBB.getParent();
1253 const X86TargetLowering &TLI = *STI.getTargetLowering();
1254 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
1255 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
1256
1257 // We want to make sure that (in worst case) less than StackProbeSize bytes
1258 // are not probed after the AND. This assumption is used in
1259 // emitStackProbeInlineGeneric.
1260 if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
1261 {
1262 NumFrameLoopProbe++;
1263 MachineBasicBlock *entryMBB =
1265 MachineBasicBlock *headMBB =
1267 MachineBasicBlock *bodyMBB =
1269 MachineBasicBlock *footMBB =
1271
1273 MF.insert(MBBIter, entryMBB);
1274 MF.insert(MBBIter, headMBB);
1275 MF.insert(MBBIter, bodyMBB);
1276 MF.insert(MBBIter, footMBB);
1277 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
1278 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
1279 : Is64Bit ? X86::R11D
1280 : X86::EAX;
1281
1282 // Setup entry block
1283 {
1284
1285 entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI);
1286 BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
1289 MachineInstr *MI =
1290 BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed)
1291 .addReg(FinalStackProbed)
1292 .addImm(Val)
1294
1295 // The EFLAGS implicit def is dead.
1296 MI->getOperand(3).setIsDead();
1297
1298 BuildMI(entryMBB, DL,
1299 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1300 .addReg(FinalStackProbed)
1303 BuildMI(entryMBB, DL, TII.get(X86::JCC_1))
1304 .addMBB(&MBB)
1307 entryMBB->addSuccessor(headMBB);
1308 entryMBB->addSuccessor(&MBB);
1309 }
1310
1311 // Loop entry block
1312
1313 {
1314 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1315 BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
1317 .addImm(StackProbeSize)
1319
1320 BuildMI(headMBB, DL,
1321 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1323 .addReg(FinalStackProbed)
1325
1326 // jump to the footer if StackPtr < FinalStackProbed
1327 BuildMI(headMBB, DL, TII.get(X86::JCC_1))
1328 .addMBB(footMBB)
1331
1332 headMBB->addSuccessor(bodyMBB);
1333 headMBB->addSuccessor(footMBB);
1334 }
1335
1336 // setup loop body
1337 {
1338 addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc))
1340 StackPtr, false, 0)
1341 .addImm(0)
1343
1344 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1345 BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
1347 .addImm(StackProbeSize)
1349
1350 // cmp with stack pointer bound
1351 BuildMI(bodyMBB, DL,
1352 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1353 .addReg(FinalStackProbed)
1356
1357 // jump back while FinalStackProbed < StackPtr
1358 BuildMI(bodyMBB, DL, TII.get(X86::JCC_1))
1359 .addMBB(bodyMBB)
1362 bodyMBB->addSuccessor(bodyMBB);
1363 bodyMBB->addSuccessor(footMBB);
1364 }
1365
1366 // setup loop footer
1367 {
1368 BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr)
1369 .addReg(FinalStackProbed)
1371 addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc))
1373 StackPtr, false, 0)
1374 .addImm(0)
1376 footMBB->addSuccessor(&MBB);
1377 }
1378
1379 fullyRecomputeLiveIns({footMBB, bodyMBB, headMBB, &MBB});
1380 }
1381 } else {
1382 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
1383 .addReg(Reg)
1384 .addImm(Val)
1386
1387 // The EFLAGS implicit def is dead.
1388 MI->getOperand(3).setIsDead();
1389 }
1390}
1391
1393 // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
1394 // clobbered by any interrupt handler.
1395 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1396 "MF used frame lowering for wrong subtarget");
1397 const Function &Fn = MF.getFunction();
1398 const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
1399 return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone);
1400}
1401
1402/// Return true if we need to use the restricted Windows x64 prologue and
1403/// epilogue code patterns that can be described with WinCFI (.seh_*
1404/// directives).
1405bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const {
1406 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
1407}
1408
1409bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
1410 return !isWin64Prologue(MF) && MF.needsFrameMoves();
1411}
1412
1413/// Return true if an opcode is part of the REP group of instructions
1414static bool isOpcodeRep(unsigned Opcode) {
1415 switch (Opcode) {
1416 case X86::REPNE_PREFIX:
1417 case X86::REP_MOVSB_32:
1418 case X86::REP_MOVSB_64:
1419 case X86::REP_MOVSD_32:
1420 case X86::REP_MOVSD_64:
1421 case X86::REP_MOVSQ_32:
1422 case X86::REP_MOVSQ_64:
1423 case X86::REP_MOVSW_32:
1424 case X86::REP_MOVSW_64:
1425 case X86::REP_PREFIX:
1426 case X86::REP_STOSB_32:
1427 case X86::REP_STOSB_64:
1428 case X86::REP_STOSD_32:
1429 case X86::REP_STOSD_64:
1430 case X86::REP_STOSQ_32:
1431 case X86::REP_STOSQ_64:
1432 case X86::REP_STOSW_32:
1433 case X86::REP_STOSW_64:
1434 return true;
1435 default:
1436 break;
1437 }
1438 return false;
1439}
1440
1441/// emitPrologue - Push callee-saved registers onto the stack, which
1442/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
1443/// space for local variables. Also emit labels used by the exception handler to
1444/// generate the exception handling frames.
1445
1446/*
1447 Here's a gist of what gets emitted:
1448
1449 ; Establish frame pointer, if needed
1450 [if needs FP]
1451 push %rbp
1452 .cfi_def_cfa_offset 16
1453 .cfi_offset %rbp, -16
1454 .seh_pushreg %rpb
1455 mov %rsp, %rbp
1456 .cfi_def_cfa_register %rbp
1457
1458 ; Spill general-purpose registers
1459 [for all callee-saved GPRs]
1460 pushq %<reg>
1461 [if not needs FP]
1462 .cfi_def_cfa_offset (offset from RETADDR)
1463 .seh_pushreg %<reg>
1464
1465 ; If the required stack alignment > default stack alignment
1466 ; rsp needs to be re-aligned. This creates a "re-alignment gap"
1467 ; of unknown size in the stack frame.
1468 [if stack needs re-alignment]
1469 and $MASK, %rsp
1470
1471 ; Allocate space for locals
1472 [if target is Windows and allocated space > 4096 bytes]
1473 ; Windows needs special care for allocations larger
1474 ; than one page.
1475 mov $NNN, %rax
1476 call ___chkstk_ms/___chkstk
1477 sub %rax, %rsp
1478 [else]
1479 sub $NNN, %rsp
1480
1481 [if needs FP]
1482 .seh_stackalloc (size of XMM spill slots)
1483 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
1484 [else]
1485 .seh_stackalloc NNN
1486
1487 ; Spill XMMs
1488 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
1489 ; they may get spilled on any platform, if the current function
1490 ; calls @llvm.eh.unwind.init
1491 [if needs FP]
1492 [for all callee-saved XMM registers]
1493 movaps %<xmm reg>, -MMM(%rbp)
1494 [for all callee-saved XMM registers]
1495 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
1496 ; i.e. the offset relative to (%rbp - SEHFrameOffset)
1497 [else]
1498 [for all callee-saved XMM registers]
1499 movaps %<xmm reg>, KKK(%rsp)
1500 [for all callee-saved XMM registers]
1501 .seh_savexmm %<xmm reg>, KKK
1502
1503 .seh_endprologue
1504
1505 [if needs base pointer]
1506 mov %rsp, %rbx
1507 [if needs to restore base pointer]
1508 mov %rsp, -MMM(%rbp)
1509
1510 ; Emit CFI info
1511 [if needs FP]
1512 [for all callee-saved registers]
1513 .cfi_offset %<reg>, (offset from %rbp)
1514 [else]
1515 .cfi_def_cfa_offset (offset from RETADDR)
1516 [for all callee-saved registers]
1517 .cfi_offset %<reg>, (offset from %rsp)
1518
1519 Notes:
1520 - .seh directives are emitted only for Windows 64 ABI
1521 - .cv_fpo directives are emitted on win32 when emitting CodeView
1522 - .cfi directives are emitted for all other ABIs
1523 - for 32-bit code, substitute %e?? registers for %r??
1524*/
1525
1527 MachineBasicBlock &MBB) const {
1528 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1529 "MF used frame lowering for wrong subtarget");
1531 MachineFrameInfo &MFI = MF.getFrameInfo();
1532 const Function &Fn = MF.getFunction();
1534 uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
1535 uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate.
1536 bool IsFunclet = MBB.isEHFuncletEntry();
1538 if (Fn.hasPersonalityFn())
1539 Personality = classifyEHPersonality(Fn.getPersonalityFn());
1540 bool FnHasClrFunclet =
1541 MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
1542 bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
1543 bool HasFP = hasFP(MF);
1544 bool IsWin64Prologue = isWin64Prologue(MF);
1545 bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry();
1546 // FIXME: Emit FPO data for EH funclets.
1547 bool NeedsWinFPO = !IsFunclet && STI.isTargetWin32() &&
1549 bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO;
1550 bool NeedsDwarfCFI = needsDwarfCFI(MF);
1552 const Register MachineFramePtr =
1554 : FramePtr;
1555 Register BasePtr = TRI->getBaseRegister();
1556 bool HasWinCFI = false;
1557
1558 // Debug location must be unknown since the first debug location is used
1559 // to determine the end of the prologue.
1560 DebugLoc DL;
1561 Register ArgBaseReg;
1562
1563 // Emit extra prolog for argument stack slot reference.
1564 if (auto *MI = X86FI->getStackPtrSaveMI()) {
1565 // MI is lea instruction that created in X86ArgumentStackSlotPass.
1566 // Creat extra prolog for stack realignment.
1567 ArgBaseReg = MI->getOperand(0).getReg();
1568 // leal 4(%esp), %basereg
1569 // .cfi_def_cfa %basereg, 0
1570 // andl $-128, %esp
1571 // pushl -4(%basereg)
1572 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::LEA64r : X86::LEA32r),
1573 ArgBaseReg)
1575 .addImm(1)
1576 .addUse(X86::NoRegister)
1578 .addUse(X86::NoRegister)
1580 if (NeedsDwarfCFI) {
1581 // .cfi_def_cfa %basereg, 0
1582 unsigned DwarfStackPtr = TRI->getDwarfRegNum(ArgBaseReg, true);
1583 BuildCFI(MBB, MBBI, DL,
1584 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, 0),
1586 }
1587 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1588 int64_t Offset = -(int64_t)SlotSize;
1589 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm))
1590 .addReg(ArgBaseReg)
1591 .addImm(1)
1592 .addReg(X86::NoRegister)
1593 .addImm(Offset)
1594 .addReg(X86::NoRegister)
1596 }
1597
1598 // Space reserved for stack-based arguments when making a (ABI-guaranteed)
1599 // tail call.
1600 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
1601 if (TailCallArgReserveSize && IsWin64Prologue)
1602 report_fatal_error("Can't handle guaranteed tail call under win64 yet");
1603
1604 const bool EmitStackProbeCall =
1606 unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
1607
1608 if (HasFP && X86FI->hasSwiftAsyncContext()) {
1612 // The special symbol below is absolute and has a *value* suitable to be
1613 // combined with the frame pointer directly.
1614 BuildMI(MBB, MBBI, DL, TII.get(X86::OR64rm), MachineFramePtr)
1615 .addUse(MachineFramePtr)
1616 .addUse(X86::RIP)
1617 .addImm(1)
1618 .addUse(X86::NoRegister)
1619 .addExternalSymbol("swift_async_extendedFramePointerFlags",
1621 .addUse(X86::NoRegister);
1622 break;
1623 }
1624 [[fallthrough]];
1625
1627 assert(
1628 !IsWin64Prologue &&
1629 "win64 prologue does not set the bit 60 in the saved frame pointer");
1630 BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr)
1631 .addUse(MachineFramePtr)
1632 .addImm(60)
1634 break;
1635
1637 break;
1638 }
1639 }
1640
1641 // Re-align the stack on 64-bit if the x86-interrupt calling convention is
1642 // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
1643 // stack alignment.
1645 Fn.arg_size() == 2) {
1646 StackSize += 8;
1647 MFI.setStackSize(StackSize);
1648
1649 // Update the stack pointer by pushing a register. This is the instruction
1650 // emitted that would be end up being emitted by a call to `emitSPUpdate`.
1651 // Hard-coding the update to a push avoids emitting a second
1652 // `STACKALLOC_W_PROBING` instruction in the save block: We know that stack
1653 // probing isn't needed anyways for an 8-byte update.
1654 // Pushing a register leaves us in a similar situation to a regular
1655 // function call where we know that the address at (rsp-8) is writeable.
1656 // That way we avoid any off-by-ones with stack probing for additional
1657 // stack pointer updates later on.
1658 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1659 .addReg(X86::RAX, RegState::Undef)
1661 }
1662
1663 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
1664 // function, and use up to 128 bytes of stack space, don't have a frame
1665 // pointer, calls, or dynamic alloca then we do not need to adjust the
1666 // stack pointer (we fit in the Red Zone). We also check that we don't
1667 // push and pop from the stack.
1668 if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) &&
1669 !MFI.hasVarSizedObjects() && // No dynamic alloca.
1670 !MFI.adjustsStack() && // No calls.
1671 !EmitStackProbeCall && // No stack probes.
1672 !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
1673 !MF.shouldSplitStack()) { // Regular stack
1674 uint64_t MinSize =
1676 if (HasFP)
1677 MinSize += SlotSize;
1678 X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
1679 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
1680 MFI.setStackSize(StackSize);
1681 }
1682
1683 // Insert stack pointer adjustment for later moving of return addr. Only
1684 // applies to tail call optimized functions where the callee argument stack
1685 // size is bigger than the callers.
1686 if (TailCallArgReserveSize != 0) {
1687 BuildStackAdjustment(MBB, MBBI, DL, -(int)TailCallArgReserveSize,
1688 /*InEpilogue=*/false)
1690 }
1691
1692 // Mapping for machine moves:
1693 //
1694 // DST: VirtualFP AND
1695 // SRC: VirtualFP => DW_CFA_def_cfa_offset
1696 // ELSE => DW_CFA_def_cfa
1697 //
1698 // SRC: VirtualFP AND
1699 // DST: Register => DW_CFA_def_cfa_register
1700 //
1701 // ELSE
1702 // OFFSET < 0 => DW_CFA_offset_extended_sf
1703 // REG < 64 => DW_CFA_offset + Reg
1704 // ELSE => DW_CFA_offset_extended
1705
1706 uint64_t NumBytes = 0;
1707 int stackGrowth = -SlotSize;
1708
1709 // Find the funclet establisher parameter
1710 Register Establisher = X86::NoRegister;
1711 if (IsClrFunclet)
1712 Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX;
1713 else if (IsFunclet)
1714 Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX;
1715
1716 if (IsWin64Prologue && IsFunclet && !IsClrFunclet) {
1717 // Immediately spill establisher into the home slot.
1718 // The runtime cares about this.
1719 // MOV64mr %rdx, 16(%rsp)
1720 unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
1721 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16)
1722 .addReg(Establisher)
1724 MBB.addLiveIn(Establisher);
1725 }
1726
1727 if (HasFP) {
1728 assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved");
1729
1730 // Calculate required stack adjustment.
1731 uint64_t FrameSize = StackSize - SlotSize;
1732 NumBytes =
1733 FrameSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1734
1735 // Callee-saved registers are pushed on stack before the stack is realigned.
1736 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
1737 NumBytes = alignTo(NumBytes, MaxAlign);
1738
1739 // Save EBP/RBP into the appropriate stack slot.
1740 BuildMI(MBB, MBBI, DL,
1742 .addReg(MachineFramePtr, RegState::Kill)
1744
1745 if (NeedsDwarfCFI && !ArgBaseReg.isValid()) {
1746 // Mark the place where EBP/RBP was saved.
1747 // Define the current CFA rule to use the provided offset.
1748 assert(StackSize);
1749 BuildCFI(MBB, MBBI, DL,
1751 nullptr, -2 * stackGrowth + (int)TailCallArgReserveSize),
1753
1754 // Change the rule for the FramePtr to be an "offset" rule.
1755 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1756 BuildCFI(MBB, MBBI, DL,
1757 MCCFIInstruction::createOffset(nullptr, DwarfFramePtr,
1758 2 * stackGrowth -
1759 (int)TailCallArgReserveSize),
1761 }
1762
1763 if (NeedsWinCFI) {
1764 HasWinCFI = true;
1765 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1768 }
1769
1770 if (!IsFunclet) {
1771 if (X86FI->hasSwiftAsyncContext()) {
1772 assert(!IsWin64Prologue &&
1773 "win64 prologue does not store async context right below rbp");
1774 const auto &Attrs = MF.getFunction().getAttributes();
1775
1776 // Before we update the live frame pointer we have to ensure there's a
1777 // valid (or null) asynchronous context in its slot just before FP in
1778 // the frame record, so store it now.
1779 if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) {
1780 // We have an initial context in r14, store it just before the frame
1781 // pointer.
1782 MBB.addLiveIn(X86::R14);
1783 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1784 .addReg(X86::R14)
1786 } else {
1787 // No initial context, store null so that there's no pointer that
1788 // could be misused.
1789 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i32))
1790 .addImm(0)
1792 }
1793
1794 if (NeedsWinCFI) {
1795 HasWinCFI = true;
1796 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1797 .addImm(X86::R14)
1799 }
1800
1801 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr)
1802 .addUse(X86::RSP)
1803 .addImm(1)
1804 .addUse(X86::NoRegister)
1805 .addImm(8)
1806 .addUse(X86::NoRegister)
1808 BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri32), X86::RSP)
1809 .addUse(X86::RSP)
1810 .addImm(8)
1812 }
1813
1814 if (!IsWin64Prologue && !IsFunclet) {
1815 // Update EBP with the new base value.
1816 if (!X86FI->hasSwiftAsyncContext())
1817 BuildMI(MBB, MBBI, DL,
1818 TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
1819 FramePtr)
1822
1823 if (NeedsDwarfCFI) {
1824 if (ArgBaseReg.isValid()) {
1825 SmallString<64> CfaExpr;
1826 CfaExpr.push_back(dwarf::DW_CFA_expression);
1827 uint8_t buffer[16];
1828 unsigned DwarfReg = TRI->getDwarfRegNum(MachineFramePtr, true);
1829 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
1830 CfaExpr.push_back(2);
1831 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
1832 CfaExpr.push_back(0);
1833 // DW_CFA_expression: reg5 DW_OP_breg5 +0
1834 BuildCFI(MBB, MBBI, DL,
1835 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
1837 } else {
1838 // Mark effective beginning of when frame pointer becomes valid.
1839 // Define the current CFA to use the EBP/RBP register.
1840 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1841 BuildCFI(
1842 MBB, MBBI, DL,
1843 MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr),
1845 }
1846 }
1847
1848 if (NeedsWinFPO) {
1849 // .cv_fpo_setframe $FramePtr
1850 HasWinCFI = true;
1851 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
1853 .addImm(0)
1855 }
1856 }
1857 }
1858 } else {
1859 assert(!IsFunclet && "funclets without FPs not yet implemented");
1860 NumBytes =
1861 StackSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1862 }
1863
1864 // Update the offset adjustment, which is mainly used by codeview to translate
1865 // from ESP to VFRAME relative local variable offsets.
1866 if (!IsFunclet) {
1867 if (HasFP && TRI->hasStackRealignment(MF))
1868 MFI.setOffsetAdjustment(-NumBytes);
1869 else
1870 MFI.setOffsetAdjustment(-StackSize);
1871 }
1872
1873 // For EH funclets, only allocate enough space for outgoing calls. Save the
1874 // NumBytes value that we would've used for the parent frame.
1875 unsigned ParentFrameNumBytes = NumBytes;
1876 if (IsFunclet)
1877 NumBytes = getWinEHFuncletFrameSize(MF);
1878
1879 // Skip the callee-saved push instructions.
1880 bool PushedRegs = false;
1881 int StackOffset = 2 * stackGrowth;
1883 auto IsCSPush = [&](const MachineBasicBlock::iterator &MBBI) {
1884 if (MBBI == MBB.end() || !MBBI->getFlag(MachineInstr::FrameSetup))
1885 return false;
1886 unsigned Opc = MBBI->getOpcode();
1887 return Opc == X86::PUSH32r || Opc == X86::PUSH64r || Opc == X86::PUSHP64r ||
1888 Opc == X86::PUSH2 || Opc == X86::PUSH2P;
1889 };
1890
1891 while (IsCSPush(MBBI)) {
1892 PushedRegs = true;
1893 Register Reg = MBBI->getOperand(0).getReg();
1894 LastCSPush = MBBI;
1895 ++MBBI;
1896 unsigned Opc = LastCSPush->getOpcode();
1897
1898 if (!HasFP && NeedsDwarfCFI) {
1899 // Mark callee-saved push instruction.
1900 // Define the current CFA rule to use the provided offset.
1901 assert(StackSize);
1902 // Compared to push, push2 introduces more stack offset (one more
1903 // register).
1904 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1905 StackOffset += stackGrowth;
1906 BuildCFI(MBB, MBBI, DL,
1909 StackOffset += stackGrowth;
1910 }
1911
1912 if (NeedsWinCFI) {
1913 HasWinCFI = true;
1914 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1915 .addImm(Reg)
1917 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1918 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1919 .addImm(LastCSPush->getOperand(1).getReg())
1921 }
1922 }
1923
1924 // Realign stack after we pushed callee-saved registers (so that we'll be
1925 // able to calculate their offsets from the frame pointer).
1926 // Don't do this for Win64, it needs to realign the stack after the prologue.
1927 if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) &&
1928 !ArgBaseReg.isValid()) {
1929 assert(HasFP && "There should be a frame pointer if stack is realigned.");
1930 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1931
1932 if (NeedsWinCFI) {
1933 HasWinCFI = true;
1934 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign))
1935 .addImm(MaxAlign)
1937 }
1938 }
1939
1940 // If there is an SUB32ri of ESP immediately before this instruction, merge
1941 // the two. This can be the case when tail call elimination is enabled and
1942 // the callee has more arguments then the caller.
1943 NumBytes -= mergeSPUpdates(MBB, MBBI, true);
1944
1945 // Adjust stack pointer: ESP -= numbytes.
1946
1947 // Windows and cygwin/mingw require a prologue helper routine when allocating
1948 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
1949 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
1950 // stack and adjust the stack pointer in one go. The 64-bit version of
1951 // __chkstk is only responsible for probing the stack. The 64-bit prologue is
1952 // responsible for adjusting the stack pointer. Touching the stack at 4K
1953 // increments is necessary to ensure that the guard pages used by the OS
1954 // virtual memory manager are allocated in correct sequence.
1955 uint64_t AlignedNumBytes = NumBytes;
1956 if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF))
1957 AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
1958 if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) {
1959 assert(!X86FI->getUsesRedZone() &&
1960 "The Red Zone is not accounted for in stack probes");
1961
1962 // Check whether EAX is livein for this block.
1963 bool isEAXAlive = isEAXLiveIn(MBB);
1964
1965 if (isEAXAlive) {
1966 if (Is64Bit) {
1967 // Save RAX
1968 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1969 .addReg(X86::RAX, RegState::Kill)
1971 } else {
1972 // Save EAX
1973 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
1974 .addReg(X86::EAX, RegState::Kill)
1976 }
1977 }
1978
1979 if (Is64Bit) {
1980 // Handle the 64-bit Windows ABI case where we need to call __chkstk.
1981 // Function prologue is responsible for adjusting the stack pointer.
1982 int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes;
1983 BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Alloc)), X86::RAX)
1984 .addImm(Alloc)
1986 } else {
1987 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
1988 // We'll also use 4 already allocated bytes for EAX.
1989 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
1990 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
1992 }
1993
1994 // Call __chkstk, __chkstk_ms, or __alloca.
1995 emitStackProbe(MF, MBB, MBBI, DL, true);
1996
1997 if (isEAXAlive) {
1998 // Restore RAX/EAX
2000 if (Is64Bit)
2001 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX),
2002 StackPtr, false, NumBytes - 8);
2003 else
2004 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX),
2005 StackPtr, false, NumBytes - 4);
2006 MI->setFlag(MachineInstr::FrameSetup);
2007 MBB.insert(MBBI, MI);
2008 }
2009 } else if (NumBytes) {
2010 emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false);
2011 }
2012
2013 if (NeedsWinCFI && NumBytes) {
2014 HasWinCFI = true;
2015 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
2016 .addImm(NumBytes)
2018 }
2019
2020 int SEHFrameOffset = 0;
2021 unsigned SPOrEstablisher;
2022 if (IsFunclet) {
2023 if (IsClrFunclet) {
2024 // The establisher parameter passed to a CLR funclet is actually a pointer
2025 // to the (mostly empty) frame of its nearest enclosing funclet; we have
2026 // to find the root function establisher frame by loading the PSPSym from
2027 // the intermediate frame.
2028 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2029 MachinePointerInfo NoInfo;
2030 MBB.addLiveIn(Establisher);
2031 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher),
2032 Establisher, false, PSPSlotOffset)
2035 ;
2036 // Save the root establisher back into the current funclet's (mostly
2037 // empty) frame, in case a sub-funclet or the GC needs it.
2038 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr,
2039 false, PSPSlotOffset)
2040 .addReg(Establisher)
2042 NoInfo,
2045 }
2046 SPOrEstablisher = Establisher;
2047 } else {
2048 SPOrEstablisher = StackPtr;
2049 }
2050
2051 if (IsWin64Prologue && HasFP) {
2052 // Set RBP to a small fixed offset from RSP. In the funclet case, we base
2053 // this calculation on the incoming establisher, which holds the value of
2054 // RSP from the parent frame at the end of the prologue.
2055 SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
2056 if (SEHFrameOffset)
2057 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
2058 SPOrEstablisher, false, SEHFrameOffset);
2059 else
2060 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr)
2061 .addReg(SPOrEstablisher);
2062
2063 // If this is not a funclet, emit the CFI describing our frame pointer.
2064 if (NeedsWinCFI && !IsFunclet) {
2065 assert(!NeedsWinFPO && "this setframe incompatible with FPO data");
2066 HasWinCFI = true;
2067 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
2069 .addImm(SEHFrameOffset)
2071 if (isAsynchronousEHPersonality(Personality))
2072 MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset;
2073 }
2074 } else if (IsFunclet && STI.is32Bit()) {
2075 // Reset EBP / ESI to something good for funclets.
2077 // If we're a catch funclet, we can be returned to via catchret. Save ESP
2078 // into the registration node so that the runtime will restore it for us.
2079 if (!MBB.isCleanupFuncletEntry()) {
2080 assert(Personality == EHPersonality::MSVC_CXX);
2081 Register FrameReg;
2083 int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed();
2084 // ESP is the first field, so no extra displacement is needed.
2085 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg,
2086 false, EHRegOffset)
2087 .addReg(X86::ESP);
2088 }
2089 }
2090
2091 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
2092 const MachineInstr &FrameInstr = *MBBI;
2093 ++MBBI;
2094
2095 if (NeedsWinCFI) {
2096 int FI;
2097 if (Register Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
2098 if (X86::FR64RegClass.contains(Reg)) {
2099 int Offset;
2100 Register IgnoredFrameReg;
2101 if (IsWin64Prologue && IsFunclet)
2102 Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg);
2103 else
2104 Offset =
2105 getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() +
2106 SEHFrameOffset;
2107
2108 HasWinCFI = true;
2109 assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
2110 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
2111 .addImm(Reg)
2112 .addImm(Offset)
2114 }
2115 }
2116 }
2117 }
2118
2119 if (NeedsWinCFI && HasWinCFI)
2120 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
2122
2123 if (FnHasClrFunclet && !IsFunclet) {
2124 // Save the so-called Initial-SP (i.e. the value of the stack pointer
2125 // immediately after the prolog) into the PSPSlot so that funclets
2126 // and the GC can recover it.
2127 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2128 auto PSPInfo = MachinePointerInfo::getFixedStack(
2130 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false,
2131 PSPSlotOffset)
2136 }
2137
2138 // Realign stack after we spilled callee-saved registers (so that we'll be
2139 // able to calculate their offsets from the frame pointer).
2140 // Win64 requires aligning the stack after the prologue.
2141 if (IsWin64Prologue && TRI->hasStackRealignment(MF)) {
2142 assert(HasFP && "There should be a frame pointer if stack is realigned.");
2143 BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign);
2144 }
2145
2146 // We already dealt with stack realignment and funclets above.
2147 if (IsFunclet && STI.is32Bit())
2148 return;
2149
2150 // If we need a base pointer, set it up here. It's whatever the value
2151 // of the stack pointer is at this point. Any variable size objects
2152 // will be allocated after this, so we can still use the base pointer
2153 // to reference locals.
2154 if (TRI->hasBasePointer(MF)) {
2155 // Update the base pointer with the current stack pointer.
2156 unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
2157 BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
2158 .addReg(SPOrEstablisher)
2160 if (X86FI->getRestoreBasePointer()) {
2161 // Stash value of base pointer. Saving RSP instead of EBP shortens
2162 // dependence chain. Used by SjLj EH.
2163 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2164 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), FramePtr, true,
2166 .addReg(SPOrEstablisher)
2168 }
2169
2170 if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
2171 // Stash the value of the frame pointer relative to the base pointer for
2172 // Win32 EH. This supports Win32 EH, which does the inverse of the above:
2173 // it recovers the frame pointer from the base pointer rather than the
2174 // other way around.
2175 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2176 Register UsedReg;
2177 int Offset =
2178 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
2179 .getFixed();
2180 assert(UsedReg == BasePtr);
2181 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
2184 }
2185 }
2186 if (ArgBaseReg.isValid()) {
2187 // Save argument base pointer.
2188 auto *MI = X86FI->getStackPtrSaveMI();
2189 int FI = MI->getOperand(1).getIndex();
2190 unsigned MOVmr = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
2191 // movl %basereg, offset(%ebp)
2192 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), FI)
2193 .addReg(ArgBaseReg)
2195 }
2196
2197 if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
2198 // Mark end of stack pointer adjustment.
2199 if (!HasFP && NumBytes) {
2200 // Define the current CFA rule to use the provided offset.
2201 assert(StackSize);
2202 BuildCFI(
2203 MBB, MBBI, DL,
2204 MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth),
2206 }
2207
2208 // Emit DWARF info specifying the offsets of the callee-saved registers.
2210 }
2211
2212 // X86 Interrupt handling function cannot assume anything about the direction
2213 // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
2214 // in each prologue of interrupt handler function.
2215 //
2216 // Create "cld" instruction only in these cases:
2217 // 1. The interrupt handling function uses any of the "rep" instructions.
2218 // 2. Interrupt handling function calls another function.
2219 // 3. If there are any inline asm blocks, as we do not know what they do
2220 //
2221 // TODO: We should also emit cld if we detect the use of std, but as of now,
2222 // the compiler does not even emit that instruction or even define it, so in
2223 // practice, this would only happen with inline asm, which we cover anyway.
2225 bool NeedsCLD = false;
2226
2227 for (const MachineBasicBlock &B : MF) {
2228 for (const MachineInstr &MI : B) {
2229 if (MI.isCall()) {
2230 NeedsCLD = true;
2231 break;
2232 }
2233
2234 if (isOpcodeRep(MI.getOpcode())) {
2235 NeedsCLD = true;
2236 break;
2237 }
2238
2239 if (MI.isInlineAsm()) {
2240 // TODO: Parse asm for rep instructions or call sites?
2241 // For now, let's play it safe and emit a cld instruction
2242 // just in case.
2243 NeedsCLD = true;
2244 break;
2245 }
2246 }
2247 }
2248
2249 if (NeedsCLD) {
2250 BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
2252 }
2253 }
2254
2255 // At this point we know if the function has WinCFI or not.
2256 MF.setHasWinCFI(HasWinCFI);
2257}
2258
2260 const MachineFunction &MF) const {
2261 // We can't use LEA instructions for adjusting the stack pointer if we don't
2262 // have a frame pointer in the Win64 ABI. Only ADD instructions may be used
2263 // to deallocate the stack.
2264 // This means that we can use LEA for SP in two situations:
2265 // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
2266 // 2. We *have* a frame pointer which means we are permitted to use LEA.
2267 return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
2268}
2269
2271 switch (MI.getOpcode()) {
2272 case X86::CATCHRET:
2273 case X86::CLEANUPRET:
2274 return true;
2275 default:
2276 return false;
2277 }
2278 llvm_unreachable("impossible");
2279}
2280
2281// CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
2282// stack. It holds a pointer to the bottom of the root function frame. The
2283// establisher frame pointer passed to a nested funclet may point to the
2284// (mostly empty) frame of its parent funclet, but it will need to find
2285// the frame of the root function to access locals. To facilitate this,
2286// every funclet copies the pointer to the bottom of the root function
2287// frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
2288// same offset for the PSPSym in the root function frame that's used in the
2289// funclets' frames allows each funclet to dynamically accept any ancestor
2290// frame as its establisher argument (the runtime doesn't guarantee the
2291// immediate parent for some reason lost to history), and also allows the GC,
2292// which uses the PSPSym for some bookkeeping, to find it in any funclet's
2293// frame with only a single offset reported for the entire method.
2294unsigned
2295X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
2296 const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
2297 Register SPReg;
2298 int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg,
2299 /*IgnoreSPUpdates*/ true)
2300 .getFixed();
2301 assert(Offset >= 0 && SPReg == TRI->getStackRegister());
2302 return static_cast<unsigned>(Offset);
2303}
2304
2305unsigned
2306X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
2308 // This is the size of the pushed CSRs.
2309 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2310 // This is the size of callee saved XMMs.
2311 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2312 unsigned XMMSize =
2313 WinEHXMMSlotInfo.size() * TRI->getSpillSize(X86::VR128RegClass);
2314 // This is the amount of stack a funclet needs to allocate.
2315 unsigned UsedSize;
2316 EHPersonality Personality =
2318 if (Personality == EHPersonality::CoreCLR) {
2319 // CLR funclets need to hold enough space to include the PSPSym, at the
2320 // same offset from the stack pointer (immediately after the prolog) as it
2321 // resides at in the main function.
2322 UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
2323 } else {
2324 // Other funclets just need enough stack for outgoing call arguments.
2325 UsedSize = MF.getFrameInfo().getMaxCallFrameSize();
2326 }
2327 // RBP is not included in the callee saved register block. After pushing RBP,
2328 // everything is 16 byte aligned. Everything we allocate before an outgoing
2329 // call must also be 16 byte aligned.
2330 unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlign());
2331 // Subtract out the size of the callee saved registers. This is how much stack
2332 // each funclet will allocate.
2333 return FrameSizeMinusRBP + XMMSize - CSSize;
2334}
2335
2336static bool isTailCallOpcode(unsigned Opc) {
2337 return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi ||
2338 Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 ||
2339 Opc == X86::TCRETURNdi64 || Opc == X86::TCRETURNmi64;
2340}
2341
2343 MachineBasicBlock &MBB) const {
2344 const MachineFrameInfo &MFI = MF.getFrameInfo();
2347 MachineBasicBlock::iterator MBBI = Terminator;
2348 DebugLoc DL;
2349 if (MBBI != MBB.end())
2350 DL = MBBI->getDebugLoc();
2351 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
2352 const bool Is64BitILP32 = STI.isTarget64BitILP32();
2354 Register MachineFramePtr =
2355 Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
2356
2357 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2358 bool NeedsWin64CFI =
2359 IsWin64Prologue && MF.getFunction().needsUnwindTableEntry();
2360 bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI);
2361
2362 // Get the number of bytes to allocate from the FrameInfo.
2363 uint64_t StackSize = MFI.getStackSize();
2364 uint64_t MaxAlign = calculateMaxStackAlign(MF);
2365 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2366 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
2367 bool HasFP = hasFP(MF);
2368 uint64_t NumBytes = 0;
2369
2370 bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() &&
2372 MF.needsFrameMoves();
2373
2374 Register ArgBaseReg;
2375 if (auto *MI = X86FI->getStackPtrSaveMI()) {
2376 unsigned Opc = X86::LEA32r;
2377 Register StackReg = X86::ESP;
2378 ArgBaseReg = MI->getOperand(0).getReg();
2379 if (STI.is64Bit()) {
2380 Opc = X86::LEA64r;
2381 StackReg = X86::RSP;
2382 }
2383 // leal -4(%basereg), %esp
2384 // .cfi_def_cfa %esp, 4
2385 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackReg)
2386 .addUse(ArgBaseReg)
2387 .addImm(1)
2388 .addUse(X86::NoRegister)
2389 .addImm(-(int64_t)SlotSize)
2390 .addUse(X86::NoRegister)
2392 if (NeedsDwarfCFI) {
2393 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackReg, true);
2394 BuildCFI(MBB, MBBI, DL,
2395 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2397 --MBBI;
2398 }
2399 --MBBI;
2400 }
2401
2402 if (IsFunclet) {
2403 assert(HasFP && "EH funclets without FP not yet implemented");
2404 NumBytes = getWinEHFuncletFrameSize(MF);
2405 } else if (HasFP) {
2406 // Calculate required stack adjustment.
2407 uint64_t FrameSize = StackSize - SlotSize;
2408 NumBytes = FrameSize - CSSize - TailCallArgReserveSize;
2409
2410 // Callee-saved registers were pushed on stack before the stack was
2411 // realigned.
2412 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
2413 NumBytes = alignTo(FrameSize, MaxAlign);
2414 } else {
2415 NumBytes = StackSize - CSSize - TailCallArgReserveSize;
2416 }
2417 uint64_t SEHStackAllocAmt = NumBytes;
2418
2419 // AfterPop is the position to insert .cfi_restore.
2421 if (HasFP) {
2422 if (X86FI->hasSwiftAsyncContext()) {
2423 // Discard the context.
2424 int Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
2425 emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/ true);
2426 }
2427 // Pop EBP.
2428 BuildMI(MBB, MBBI, DL,
2430 MachineFramePtr)
2432
2433 // We need to reset FP to its untagged state on return. Bit 60 is currently
2434 // used to show the presence of an extended frame.
2435 if (X86FI->hasSwiftAsyncContext()) {
2436 BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8), MachineFramePtr)
2437 .addUse(MachineFramePtr)
2438 .addImm(60)
2440 }
2441
2442 if (NeedsDwarfCFI) {
2443 if (!ArgBaseReg.isValid()) {
2444 unsigned DwarfStackPtr =
2445 TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
2446 BuildCFI(MBB, MBBI, DL,
2447 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2449 }
2450 if (!MBB.succ_empty() && !MBB.isReturnBlock()) {
2451 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
2452 BuildCFI(MBB, AfterPop, DL,
2453 MCCFIInstruction::createRestore(nullptr, DwarfFramePtr),
2455 --MBBI;
2456 --AfterPop;
2457 }
2458 --MBBI;
2459 }
2460 }
2461
2462 MachineBasicBlock::iterator FirstCSPop = MBBI;
2463 // Skip the callee-saved pop instructions.
2464 while (MBBI != MBB.begin()) {
2465 MachineBasicBlock::iterator PI = std::prev(MBBI);
2466 unsigned Opc = PI->getOpcode();
2467
2468 if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
2469 if (!PI->getFlag(MachineInstr::FrameDestroy) ||
2470 (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::BTR64ri8 &&
2471 Opc != X86::ADD64ri32 && Opc != X86::POPP64r && Opc != X86::POP2 &&
2472 Opc != X86::POP2P && Opc != X86::LEA64r))
2473 break;
2474 FirstCSPop = PI;
2475 }
2476
2477 --MBBI;
2478 }
2479 if (ArgBaseReg.isValid()) {
2480 // Restore argument base pointer.
2481 auto *MI = X86FI->getStackPtrSaveMI();
2482 int FI = MI->getOperand(1).getIndex();
2483 unsigned MOVrm = Is64Bit ? X86::MOV64rm : X86::MOV32rm;
2484 // movl offset(%ebp), %basereg
2485 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVrm), ArgBaseReg), FI)
2487 }
2488 MBBI = FirstCSPop;
2489
2490 if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET)
2491 emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator);
2492
2493 if (MBBI != MBB.end())
2494 DL = MBBI->getDebugLoc();
2495 // If there is an ADD32ri or SUB32ri of ESP immediately before this
2496 // instruction, merge the two instructions.
2497 if (NumBytes || MFI.hasVarSizedObjects())
2498 NumBytes += mergeSPUpdates(MBB, MBBI, true);
2499
2500 // If dynamic alloca is used, then reset esp to point to the last callee-saved
2501 // slot before popping them off! Same applies for the case, when stack was
2502 // realigned. Don't do this if this was a funclet epilogue, since the funclets
2503 // will not do realignment or dynamic stack allocation.
2504 if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) &&
2505 !IsFunclet) {
2506 if (TRI->hasStackRealignment(MF))
2507 MBBI = FirstCSPop;
2508 unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
2509 uint64_t LEAAmount =
2510 IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
2511
2512 if (X86FI->hasSwiftAsyncContext())
2513 LEAAmount -= 16;
2514
2515 // There are only two legal forms of epilogue:
2516 // - add SEHAllocationSize, %rsp
2517 // - lea SEHAllocationSize(%FramePtr), %rsp
2518 //
2519 // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
2520 // However, we may use this sequence if we have a frame pointer because the
2521 // effects of the prologue can safely be undone.
2522 if (LEAAmount != 0) {
2523 unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
2525 false, LEAAmount);
2526 --MBBI;
2527 } else {
2528 unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
2529 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr).addReg(FramePtr);
2530 --MBBI;
2531 }
2532 } else if (NumBytes) {
2533 // Adjust stack pointer back: ESP += numbytes.
2534 emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
2535 if (!HasFP && NeedsDwarfCFI) {
2536 // Define the current CFA rule to use the provided offset.
2537 BuildCFI(MBB, MBBI, DL,
2539 nullptr, CSSize + TailCallArgReserveSize + SlotSize),
2541 }
2542 --MBBI;
2543 }
2544
2545 // Windows unwinder will not invoke function's exception handler if IP is
2546 // either in prologue or in epilogue. This behavior causes a problem when a
2547 // call immediately precedes an epilogue, because the return address points
2548 // into the epilogue. To cope with that, we insert an epilogue marker here,
2549 // then replace it with a 'nop' if it ends up immediately after a CALL in the
2550 // final emitted code.
2551 if (NeedsWin64CFI && MF.hasWinCFI())
2552 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
2553
2554 if (!HasFP && NeedsDwarfCFI) {
2555 MBBI = FirstCSPop;
2556 int64_t Offset = -(int64_t)CSSize - SlotSize;
2557 // Mark callee-saved pop instruction.
2558 // Define the current CFA rule to use the provided offset.
2559 while (MBBI != MBB.end()) {
2561 unsigned Opc = PI->getOpcode();
2562 ++MBBI;
2563 if (Opc == X86::POP32r || Opc == X86::POP64r || Opc == X86::POPP64r ||
2564 Opc == X86::POP2 || Opc == X86::POP2P) {
2565 Offset += SlotSize;
2566 // Compared to pop, pop2 introduces more stack offset (one more
2567 // register).
2568 if (Opc == X86::POP2 || Opc == X86::POP2P)
2569 Offset += SlotSize;
2570 BuildCFI(MBB, MBBI, DL,
2573 }
2574 }
2575 }
2576
2577 // Emit DWARF info specifying the restores of the callee-saved registers.
2578 // For epilogue with return inside or being other block without successor,
2579 // no need to generate .cfi_restore for callee-saved registers.
2580 if (NeedsDwarfCFI && !MBB.succ_empty())
2581 emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false);
2582
2583 if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
2584 // Add the return addr area delta back since we are not tail calling.
2585 int Offset = -1 * X86FI->getTCReturnAddrDelta();
2586 assert(Offset >= 0 && "TCDelta should never be positive");
2587 if (Offset) {
2588 // Check for possible merge with preceding ADD instruction.
2589 Offset += mergeSPUpdates(MBB, Terminator, true);
2590 emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true);
2591 }
2592 }
2593
2594 // Emit tilerelease for AMX kernel.
2596 BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
2597}
2598
2600 int FI,
2601 Register &FrameReg) const {
2602 const MachineFrameInfo &MFI = MF.getFrameInfo();
2603
2604 bool IsFixed = MFI.isFixedObjectIndex(FI);
2605 // We can't calculate offset from frame pointer if the stack is realigned,
2606 // so enforce usage of stack/base pointer. The base pointer is used when we
2607 // have dynamic allocas in addition to dynamic realignment.
2608 if (TRI->hasBasePointer(MF))
2609 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister();
2610 else if (TRI->hasStackRealignment(MF))
2611 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister();
2612 else
2613 FrameReg = TRI->getFrameRegister(MF);
2614
2615 // Offset will hold the offset from the stack pointer at function entry to the
2616 // object.
2617 // We need to factor in additional offsets applied during the prologue to the
2618 // frame, base, and stack pointer depending on which is used.
2621 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2622 uint64_t StackSize = MFI.getStackSize();
2623 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2624 int64_t FPDelta = 0;
2625
2626 // In an x86 interrupt, remove the offset we added to account for the return
2627 // address from any stack object allocated in the caller's frame. Interrupts
2628 // do not have a standard return address. Fixed objects in the current frame,
2629 // such as SSE register spills, should not get this treatment.
2631 Offset >= 0) {
2633 }
2634
2635 if (IsWin64Prologue) {
2636 assert(!MFI.hasCalls() || (StackSize % 16) == 8);
2637
2638 // Calculate required stack adjustment.
2639 uint64_t FrameSize = StackSize - SlotSize;
2640 // If required, include space for extra hidden slot for stashing base
2641 // pointer.
2642 if (X86FI->getRestoreBasePointer())
2643 FrameSize += SlotSize;
2644 uint64_t NumBytes = FrameSize - CSSize;
2645
2646 uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
2647 if (FI && FI == X86FI->getFAIndex())
2648 return StackOffset::getFixed(-SEHFrameOffset);
2649
2650 // FPDelta is the offset from the "traditional" FP location of the old base
2651 // pointer followed by return address and the location required by the
2652 // restricted Win64 prologue.
2653 // Add FPDelta to all offsets below that go through the frame pointer.
2654 FPDelta = FrameSize - SEHFrameOffset;
2655 assert((!MFI.hasCalls() || (FPDelta % 16) == 0) &&
2656 "FPDelta isn't aligned per the Win64 ABI!");
2657 }
2658
2659 if (FrameReg == TRI->getFramePtr()) {
2660 // Skip saved EBP/RBP
2661 Offset += SlotSize;
2662
2663 // Account for restricted Windows prologue.
2664 Offset += FPDelta;
2665
2666 // Skip the RETADDR move area
2667 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2668 if (TailCallReturnAddrDelta < 0)
2669 Offset -= TailCallReturnAddrDelta;
2670
2672 }
2673
2674 // FrameReg is either the stack pointer or a base pointer. But the base is
2675 // located at the end of the statically known StackSize so the distinction
2676 // doesn't really matter.
2677 if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF))
2678 assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
2679 return StackOffset::getFixed(Offset + StackSize);
2680}
2681
2683 Register &FrameReg) const {
2684 const MachineFrameInfo &MFI = MF.getFrameInfo();
2686 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2687 const auto it = WinEHXMMSlotInfo.find(FI);
2688
2689 if (it == WinEHXMMSlotInfo.end())
2690 return getFrameIndexReference(MF, FI, FrameReg).getFixed();
2691
2692 FrameReg = TRI->getStackRegister();
2694 it->second;
2695}
2696
2699 Register &FrameReg,
2700 int Adjustment) const {
2701 const MachineFrameInfo &MFI = MF.getFrameInfo();
2702 FrameReg = TRI->getStackRegister();
2703 return StackOffset::getFixed(MFI.getObjectOffset(FI) -
2704 getOffsetOfLocalArea() + Adjustment);
2705}
2706
2709 int FI, Register &FrameReg,
2710 bool IgnoreSPUpdates) const {
2711
2712 const MachineFrameInfo &MFI = MF.getFrameInfo();
2713 // Does not include any dynamic realign.
2714 const uint64_t StackSize = MFI.getStackSize();
2715 // LLVM arranges the stack as follows:
2716 // ...
2717 // ARG2
2718 // ARG1
2719 // RETADDR
2720 // PUSH RBP <-- RBP points here
2721 // PUSH CSRs
2722 // ~~~~~~~ <-- possible stack realignment (non-win64)
2723 // ...
2724 // STACK OBJECTS
2725 // ... <-- RSP after prologue points here
2726 // ~~~~~~~ <-- possible stack realignment (win64)
2727 //
2728 // if (hasVarSizedObjects()):
2729 // ... <-- "base pointer" (ESI/RBX) points here
2730 // DYNAMIC ALLOCAS
2731 // ... <-- RSP points here
2732 //
2733 // Case 1: In the simple case of no stack realignment and no dynamic
2734 // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
2735 // with fixed offsets from RSP.
2736 //
2737 // Case 2: In the case of stack realignment with no dynamic allocas, fixed
2738 // stack objects are addressed with RBP and regular stack objects with RSP.
2739 //
2740 // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
2741 // to address stack arguments for outgoing calls and nothing else. The "base
2742 // pointer" points to local variables, and RBP points to fixed objects.
2743 //
2744 // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
2745 // answer we give is relative to the SP after the prologue, and not the
2746 // SP in the middle of the function.
2747
2748 if (MFI.isFixedObjectIndex(FI) && TRI->hasStackRealignment(MF) &&
2749 !STI.isTargetWin64())
2750 return getFrameIndexReference(MF, FI, FrameReg);
2751
2752 // If !hasReservedCallFrame the function might have SP adjustement in the
2753 // body. So, even though the offset is statically known, it depends on where
2754 // we are in the function.
2755 if (!IgnoreSPUpdates && !hasReservedCallFrame(MF))
2756 return getFrameIndexReference(MF, FI, FrameReg);
2757
2758 // We don't handle tail calls, and shouldn't be seeing them either.
2760 "we don't handle this case!");
2761
2762 // This is how the math works out:
2763 //
2764 // %rsp grows (i.e. gets lower) left to right. Each box below is
2765 // one word (eight bytes). Obj0 is the stack slot we're trying to
2766 // get to.
2767 //
2768 // ----------------------------------
2769 // | BP | Obj0 | Obj1 | ... | ObjN |
2770 // ----------------------------------
2771 // ^ ^ ^ ^
2772 // A B C E
2773 //
2774 // A is the incoming stack pointer.
2775 // (B - A) is the local area offset (-8 for x86-64) [1]
2776 // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2]
2777 //
2778 // |(E - B)| is the StackSize (absolute value, positive). For a
2779 // stack that grown down, this works out to be (B - E). [3]
2780 //
2781 // E is also the value of %rsp after stack has been set up, and we
2782 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now
2783 // (C - E) == (C - A) - (B - A) + (B - E)
2784 // { Using [1], [2] and [3] above }
2785 // == getObjectOffset - LocalAreaOffset + StackSize
2786
2787 return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize);
2788}
2789
2792 std::vector<CalleeSavedInfo> &CSI) const {
2793 MachineFrameInfo &MFI = MF.getFrameInfo();
2795
2796 unsigned CalleeSavedFrameSize = 0;
2797 unsigned XMMCalleeSavedFrameSize = 0;
2798 auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2799 int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
2800
2801 int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2802
2803 if (TailCallReturnAddrDelta < 0) {
2804 // create RETURNADDR area
2805 // arg
2806 // arg
2807 // RETADDR
2808 // { ...
2809 // RETADDR area
2810 // ...
2811 // }
2812 // [EBP]
2813 MFI.CreateFixedObject(-TailCallReturnAddrDelta,
2814 TailCallReturnAddrDelta - SlotSize, true);
2815 }
2816
2817 // Spill the BasePtr if it's used.
2818 if (this->TRI->hasBasePointer(MF)) {
2819 // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
2820 if (MF.hasEHFunclets()) {
2822 X86FI->setHasSEHFramePtrSave(true);
2823 X86FI->setSEHFramePtrSaveIndex(FI);
2824 }
2825 }
2826
2827 if (hasFP(MF)) {
2828 // emitPrologue always spills frame register the first thing.
2829 SpillSlotOffset -= SlotSize;
2830 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2831
2832 // The async context lives directly before the frame pointer, and we
2833 // allocate a second slot to preserve stack alignment.
2834 if (X86FI->hasSwiftAsyncContext()) {
2835 SpillSlotOffset -= SlotSize;
2836 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2837 SpillSlotOffset -= SlotSize;
2838 }
2839
2840 // Since emitPrologue and emitEpilogue will handle spilling and restoring of
2841 // the frame register, we can delete it from CSI list and not have to worry
2842 // about avoiding it later.
2843 Register FPReg = TRI->getFrameRegister(MF);
2844 for (unsigned i = 0; i < CSI.size(); ++i) {
2845 if (TRI->regsOverlap(CSI[i].getReg(), FPReg)) {
2846 CSI.erase(CSI.begin() + i);
2847 break;
2848 }
2849 }
2850 }
2851
2852 // Strategy:
2853 // 1. Use push2 when
2854 // a) number of CSR > 1 if no need padding
2855 // b) number of CSR > 2 if need padding
2856 // 2. When the number of CSR push is odd
2857 // a. Start to use push2 from the 1st push if stack is 16B aligned.
2858 // b. Start to use push2 from the 2nd push if stack is not 16B aligned.
2859 // 3. When the number of CSR push is even, start to use push2 from the 1st
2860 // push and make the stack 16B aligned before the push
2861 unsigned NumRegsForPush2 = 0;
2862 if (STI.hasPush2Pop2()) {
2863 unsigned NumCSGPR = llvm::count_if(CSI, [](const CalleeSavedInfo &I) {
2864 return X86::GR64RegClass.contains(I.getReg());
2865 });
2866 bool NeedPadding = (SpillSlotOffset % 16 != 0) && (NumCSGPR % 2 == 0);
2867 bool UsePush2Pop2 = NeedPadding ? NumCSGPR > 2 : NumCSGPR > 1;
2868 X86FI->setPadForPush2Pop2(NeedPadding && UsePush2Pop2);
2869 NumRegsForPush2 = UsePush2Pop2 ? alignDown(NumCSGPR, 2) : 0;
2870 if (X86FI->padForPush2Pop2()) {
2871 SpillSlotOffset -= SlotSize;
2872 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2873 }
2874 }
2875
2876 // Assign slots for GPRs. It increases frame size.
2877 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2878 Register Reg = I.getReg();
2879
2880 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2881 continue;
2882
2883 // A CSR is a candidate for push2/pop2 when it's slot offset is 16B aligned
2884 // or only an odd number of registers in the candidates.
2885 if (X86FI->getNumCandidatesForPush2Pop2() < NumRegsForPush2 &&
2886 (SpillSlotOffset % 16 == 0 ||
2887 X86FI->getNumCandidatesForPush2Pop2() % 2))
2888 X86FI->addCandidateForPush2Pop2(Reg);
2889
2890 SpillSlotOffset -= SlotSize;
2891 CalleeSavedFrameSize += SlotSize;
2892
2893 int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2894 I.setFrameIdx(SlotIndex);
2895 }
2896
2897 // Adjust the offset of spill slot as we know the accurate callee saved frame
2898 // size.
2899 if (X86FI->getRestoreBasePointer()) {
2900 SpillSlotOffset -= SlotSize;
2901 CalleeSavedFrameSize += SlotSize;
2902
2903 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2904 // TODO: saving the slot index is better?
2905 X86FI->setRestoreBasePointer(CalleeSavedFrameSize);
2906 }
2907 assert(X86FI->getNumCandidatesForPush2Pop2() % 2 == 0 &&
2908 "Expect even candidates for push2/pop2");
2909 if (X86FI->getNumCandidatesForPush2Pop2())
2910 ++NumFunctionUsingPush2Pop2;
2911 X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
2912 MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
2913
2914 // Assign slots for XMMs.
2915 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2916 Register Reg = I.getReg();
2917 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
2918 continue;
2919
2920 // If this is k-register make sure we lookup via the largest legal type.
2921 MVT VT = MVT::Other;
2922 if (X86::VK16RegClass.contains(Reg))
2923 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
2924
2925 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2926 unsigned Size = TRI->getSpillSize(*RC);
2927 Align Alignment = TRI->getSpillAlign(*RC);
2928 // ensure alignment
2929 assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86");
2930 SpillSlotOffset = -alignTo(-SpillSlotOffset, Alignment);
2931
2932 // spill into slot
2933 SpillSlotOffset -= Size;
2934 int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
2935 I.setFrameIdx(SlotIndex);
2936 MFI.ensureMaxAlignment(Alignment);
2937
2938 // Save the start offset and size of XMM in stack frame for funclets.
2939 if (X86::VR128RegClass.contains(Reg)) {
2940 WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize;
2941 XMMCalleeSavedFrameSize += Size;
2942 }
2943 }
2944
2945 return true;
2946}
2947
2952
2953 // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
2954 // for us, and there are no XMM CSRs on Win32.
2955 if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows())
2956 return true;
2957
2958 // Push GPRs. It increases frame size.
2959 const MachineFunction &MF = *MBB.getParent();
2961 if (X86FI->padForPush2Pop2())
2962 emitSPUpdate(MBB, MI, DL, -(int64_t)SlotSize, /*InEpilogue=*/false);
2963
2964 // Update LiveIn of the basic block and decide whether we can add a kill flag
2965 // to the use.
2966 auto UpdateLiveInCheckCanKill = [&](Register Reg) {
2967 const MachineRegisterInfo &MRI = MF.getRegInfo();
2968 // Do not set a kill flag on values that are also marked as live-in. This
2969 // happens with the @llvm-returnaddress intrinsic and with arguments
2970 // passed in callee saved registers.
2971 // Omitting the kill flags is conservatively correct even if the live-in
2972 // is not used after all.
2973 if (MRI.isLiveIn(Reg))
2974 return false;
2975 MBB.addLiveIn(Reg);
2976 // Check if any subregister is live-in
2977 for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg)
2978 if (MRI.isLiveIn(*AReg))
2979 return false;
2980 return true;
2981 };
2982 auto UpdateLiveInGetKillRegState = [&](Register Reg) {
2983 return getKillRegState(UpdateLiveInCheckCanKill(Reg));
2984 };
2985
2986 for (auto RI = CSI.rbegin(), RE = CSI.rend(); RI != RE; ++RI) {
2987 Register Reg = RI->getReg();
2988 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2989 continue;
2990
2991 if (X86FI->isCandidateForPush2Pop2(Reg)) {
2992 Register Reg2 = (++RI)->getReg();
2994 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
2995 .addReg(Reg2, UpdateLiveInGetKillRegState(Reg2))
2997 } else {
2998 BuildMI(MBB, MI, DL, TII.get(getPUSHOpcode(STI)))
2999 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
3001 }
3002 }
3003
3004 if (X86FI->getRestoreBasePointer()) {
3005 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
3006 Register BaseReg = this->TRI->getBaseRegister();
3007 BuildMI(MBB, MI, DL, TII.get(Opc))
3008 .addReg(BaseReg, getKillRegState(true))
3010 }
3011
3012 // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
3013 // It can be done by spilling XMMs to stack frame.
3014 for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
3015 Register Reg = I.getReg();
3016 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3017 continue;
3018
3019 // If this is k-register make sure we lookup via the largest legal type.
3020 MVT VT = MVT::Other;
3021 if (X86::VK16RegClass.contains(Reg))
3022 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3023
3024 // Add the callee-saved register as live-in. It's killed at the spill.
3025 MBB.addLiveIn(Reg);
3026 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3027
3028 TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI,
3029 Register());
3030 --MI;
3031 MI->setFlag(MachineInstr::FrameSetup);
3032 ++MI;
3033 }
3034
3035 return true;
3036}
3037
3038void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB,
3040 MachineInstr *CatchRet) const {
3041 // SEH shouldn't use catchret.
3044 "SEH should not use CATCHRET");
3045 const DebugLoc &DL = CatchRet->getDebugLoc();
3046 MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB();
3047
3048 // Fill EAX/RAX with the address of the target block.
3049 if (STI.is64Bit()) {
3050 // LEA64r CatchRetTarget(%rip), %rax
3051 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX)
3052 .addReg(X86::RIP)
3053 .addImm(0)
3054 .addReg(0)
3055 .addMBB(CatchRetTarget)
3056 .addReg(0);
3057 } else {
3058 // MOV32ri $CatchRetTarget, %eax
3059 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
3060 .addMBB(CatchRetTarget);
3061 }
3062
3063 // Record that we've taken the address of CatchRetTarget and no longer just
3064 // reference it in a terminator.
3065 CatchRetTarget->setMachineBlockAddressTaken();
3066}
3067
3071 if (CSI.empty())
3072 return false;
3073
3074 if (MI != MBB.end() && isFuncletReturnInstr(*MI) && STI.isOSWindows()) {
3075 // Don't restore CSRs in 32-bit EH funclets. Matches
3076 // spillCalleeSavedRegisters.
3077 if (STI.is32Bit())
3078 return true;
3079 // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
3080 // funclets. emitEpilogue transforms these to normal jumps.
3081 if (MI->getOpcode() == X86::CATCHRET) {
3082 const Function &F = MBB.getParent()->getFunction();
3083 bool IsSEH = isAsynchronousEHPersonality(
3084 classifyEHPersonality(F.getPersonalityFn()));
3085 if (IsSEH)
3086 return true;
3087 }
3088 }
3089
3091
3092 // Reload XMMs from stack frame.
3093 for (const CalleeSavedInfo &I : CSI) {
3094 Register Reg = I.getReg();
3095 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3096 continue;
3097
3098 // If this is k-register make sure we lookup via the largest legal type.
3099 MVT VT = MVT::Other;
3100 if (X86::VK16RegClass.contains(Reg))
3101 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3102
3103 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3104 TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, TRI,
3105 Register());
3106 }
3107
3108 // Clear the stack slot for spill base pointer register.
3109 MachineFunction &MF = *MBB.getParent();
3111 if (X86FI->getRestoreBasePointer()) {
3112 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
3113 Register BaseReg = this->TRI->getBaseRegister();
3114 BuildMI(MBB, MI, DL, TII.get(Opc), BaseReg)
3116 }
3117
3118 // POP GPRs.
3119 for (auto I = CSI.begin(), E = CSI.end(); I != E; ++I) {
3120 Register Reg = I->getReg();
3121 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3122 continue;
3123
3124 if (X86FI->isCandidateForPush2Pop2(Reg))
3125 BuildMI(MBB, MI, DL, TII.get(getPOP2Opcode(STI)), Reg)
3128 else
3129 BuildMI(MBB, MI, DL, TII.get(getPOPOpcode(STI)), Reg)
3131 }
3132 if (X86FI->padForPush2Pop2())
3133 emitSPUpdate(MBB, MI, DL, SlotSize, /*InEpilogue=*/true);
3134
3135 return true;
3136}
3137
3139 BitVector &SavedRegs,
3140 RegScavenger *RS) const {
3142
3143 // Spill the BasePtr if it's used.
3144 if (TRI->hasBasePointer(MF)) {
3145 Register BasePtr = TRI->getBaseRegister();
3146 if (STI.isTarget64BitILP32())
3147 BasePtr = getX86SubSuperRegister(BasePtr, 64);
3148 SavedRegs.set(BasePtr);
3149 }
3150}
3151
3152static bool HasNestArgument(const MachineFunction *MF) {
3153 const Function &F = MF->getFunction();
3154 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
3155 I++) {
3156 if (I->hasNestAttr() && !I->use_empty())
3157 return true;
3158 }
3159 return false;
3160}
3161
3162/// GetScratchRegister - Get a temp register for performing work in the
3163/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
3164/// and the properties of the function either one or two registers will be
3165/// needed. Set primary to true for the first register, false for the second.
3166static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64,
3167 const MachineFunction &MF, bool Primary) {
3169
3170 // Erlang stuff.
3172 if (Is64Bit)
3173 return Primary ? X86::R14 : X86::R13;
3174 else
3175 return Primary ? X86::EBX : X86::EDI;
3176 }
3177
3178 if (Is64Bit) {
3179 if (IsLP64)
3180 return Primary ? X86::R11 : X86::R12;
3181 else
3182 return Primary ? X86::R11D : X86::R12D;
3183 }
3184
3185 bool IsNested = HasNestArgument(&MF);
3186
3190 if (IsNested)
3191 report_fatal_error("Segmented stacks does not support fastcall with "
3192 "nested function.");
3193 return Primary ? X86::EAX : X86::ECX;
3194 }
3195 if (IsNested)
3196 return Primary ? X86::EDX : X86::EAX;
3197 return Primary ? X86::ECX : X86::EAX;
3198}
3199
3200// The stack limit in the TCB is set to this many bytes above the actual stack
3201// limit.
3203
3205 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3206 MachineFrameInfo &MFI = MF.getFrameInfo();
3207 uint64_t StackSize;
3208 unsigned TlsReg, TlsOffset;
3209 DebugLoc DL;
3210
3211 // To support shrink-wrapping we would need to insert the new blocks
3212 // at the right place and update the branches to PrologueMBB.
3213 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3214
3215 unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3216 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3217 "Scratch register is live-in");
3218
3219 if (MF.getFunction().isVarArg())
3220 report_fatal_error("Segmented stacks do not support vararg functions.");
3221 if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
3224 report_fatal_error("Segmented stacks not supported on this platform.");
3225
3226 // Eventually StackSize will be calculated by a link-time pass; which will
3227 // also decide whether checking code needs to be injected into this particular
3228 // prologue.
3229 StackSize = MFI.getStackSize();
3230
3231 if (!MFI.needsSplitStackProlog())
3232 return;
3233
3237 bool IsNested = false;
3238
3239 // We need to know if the function has a nest argument only in 64 bit mode.
3240 if (Is64Bit)
3241 IsNested = HasNestArgument(&MF);
3242
3243 // The MOV R10, RAX needs to be in a different block, since the RET we emit in
3244 // allocMBB needs to be last (terminating) instruction.
3245
3246 for (const auto &LI : PrologueMBB.liveins()) {
3247 allocMBB->addLiveIn(LI);
3248 checkMBB->addLiveIn(LI);
3249 }
3250
3251 if (IsNested)
3252 allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
3253
3254 MF.push_front(allocMBB);
3255 MF.push_front(checkMBB);
3256
3257 // When the frame size is less than 256 we just compare the stack
3258 // boundary directly to the value of the stack pointer, per gcc.
3259 bool CompareStackPointer = StackSize < kSplitStackAvailable;
3260
3261 // Read the limit off the current stacklet off the stack_guard location.
3262 if (Is64Bit) {
3263 if (STI.isTargetLinux()) {
3264 TlsReg = X86::FS;
3265 TlsOffset = IsLP64 ? 0x70 : 0x40;
3266 } else if (STI.isTargetDarwin()) {
3267 TlsReg = X86::GS;
3268 TlsOffset = 0x60 + 90 * 8; // See pthread_machdep.h. Steal TLS slot 90.
3269 } else if (STI.isTargetWin64()) {
3270 TlsReg = X86::GS;
3271 TlsOffset = 0x28; // pvArbitrary, reserved for application use
3272 } else if (STI.isTargetFreeBSD()) {
3273 TlsReg = X86::FS;
3274 TlsOffset = 0x18;
3275 } else if (STI.isTargetDragonFly()) {
3276 TlsReg = X86::FS;
3277 TlsOffset = 0x20; // use tls_tcb.tcb_segstack
3278 } else {
3279 report_fatal_error("Segmented stacks not supported on this platform.");
3280 }
3281
3282 if (CompareStackPointer)
3283 ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
3284 else
3285 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r),
3286 ScratchReg)
3287 .addReg(X86::RSP)
3288 .addImm(1)
3289 .addReg(0)
3290 .addImm(-StackSize)
3291 .addReg(0);
3292
3293 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm))
3294 .addReg(ScratchReg)
3295 .addReg(0)
3296 .addImm(1)
3297 .addReg(0)
3298 .addImm(TlsOffset)
3299 .addReg(TlsReg);
3300 } else {
3301 if (STI.isTargetLinux()) {
3302 TlsReg = X86::GS;
3303 TlsOffset = 0x30;
3304 } else if (STI.isTargetDarwin()) {
3305 TlsReg = X86::GS;
3306 TlsOffset = 0x48 + 90 * 4;
3307 } else if (STI.isTargetWin32()) {
3308 TlsReg = X86::FS;
3309 TlsOffset = 0x14; // pvArbitrary, reserved for application use
3310 } else if (STI.isTargetDragonFly()) {
3311 TlsReg = X86::FS;
3312 TlsOffset = 0x10; // use tls_tcb.tcb_segstack
3313 } else if (STI.isTargetFreeBSD()) {
3314 report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
3315 } else {
3316 report_fatal_error("Segmented stacks not supported on this platform.");
3317 }
3318
3319 if (CompareStackPointer)
3320 ScratchReg = X86::ESP;
3321 else
3322 BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg)
3323 .addReg(X86::ESP)
3324 .addImm(1)
3325 .addReg(0)
3326 .addImm(-StackSize)
3327 .addReg(0);
3328
3331 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3332 .addReg(ScratchReg)
3333 .addReg(0)
3334 .addImm(0)
3335 .addReg(0)
3336 .addImm(TlsOffset)
3337 .addReg(TlsReg);
3338 } else if (STI.isTargetDarwin()) {
3339
3340 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
3341 unsigned ScratchReg2;
3342 bool SaveScratch2;
3343 if (CompareStackPointer) {
3344 // The primary scratch register is available for holding the TLS offset.
3345 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3346 SaveScratch2 = false;
3347 } else {
3348 // Need to use a second register to hold the TLS offset
3349 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
3350
3351 // Unfortunately, with fastcc the second scratch register may hold an
3352 // argument.
3353 SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
3354 }
3355
3356 // If Scratch2 is live-in then it needs to be saved.
3357 assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
3358 "Scratch register is live-in and not saved");
3359
3360 if (SaveScratch2)
3361 BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
3362 .addReg(ScratchReg2, RegState::Kill);
3363
3364 BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
3365 .addImm(TlsOffset);
3366 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3367 .addReg(ScratchReg)
3368 .addReg(ScratchReg2)
3369 .addImm(1)
3370 .addReg(0)
3371 .addImm(0)
3372 .addReg(TlsReg);
3373
3374 if (SaveScratch2)
3375 BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
3376 }
3377 }
3378
3379 // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
3380 // It jumps to normal execution of the function body.
3381 BuildMI(checkMBB, DL, TII.get(X86::JCC_1))
3382 .addMBB(&PrologueMBB)
3384
3385 // On 32 bit we first push the arguments size and then the frame size. On 64
3386 // bit, we pass the stack frame size in r10 and the argument size in r11.
3387 if (Is64Bit) {
3388 // Functions with nested arguments use R10, so it needs to be saved across
3389 // the call to _morestack
3390
3391 const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
3392 const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
3393 const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
3394 const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
3395
3396 if (IsNested)
3397 BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
3398
3399 BuildMI(allocMBB, DL, TII.get(getMOVriOpcode(IsLP64, StackSize)), Reg10)
3400 .addImm(StackSize);
3401 BuildMI(allocMBB, DL,
3403 Reg11)
3404 .addImm(X86FI->getArgumentStackSize());
3405 } else {
3406 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i))
3407 .addImm(X86FI->getArgumentStackSize());
3408 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i)).addImm(StackSize);
3409 }
3410
3411 // __morestack is in libgcc
3413 // Under the large code model, we cannot assume that __morestack lives
3414 // within 2^31 bytes of the call site, so we cannot use pc-relative
3415 // addressing. We cannot perform the call via a temporary register,
3416 // as the rax register may be used to store the static chain, and all
3417 // other suitable registers may be either callee-save or used for
3418 // parameter passing. We cannot use the stack at this point either
3419 // because __morestack manipulates the stack directly.
3420 //
3421 // To avoid these issues, perform an indirect call via a read-only memory
3422 // location containing the address.
3423 //
3424 // This solution is not perfect, as it assumes that the .rodata section
3425 // is laid out within 2^31 bytes of each function body, but this seems
3426 // to be sufficient for JIT.
3427 // FIXME: Add retpoline support and remove the error here..
3429 report_fatal_error("Emitting morestack calls on 64-bit with the large "
3430 "code model and thunks not yet implemented.");
3431 BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
3432 .addReg(X86::RIP)
3433 .addImm(0)
3434 .addReg(0)
3435 .addExternalSymbol("__morestack_addr")
3436 .addReg(0);
3437 } else {
3438 if (Is64Bit)
3439 BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
3440 .addExternalSymbol("__morestack");
3441 else
3442 BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
3443 .addExternalSymbol("__morestack");
3444 }
3445
3446 if (IsNested)
3447 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
3448 else
3449 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
3450
3451 allocMBB->addSuccessor(&PrologueMBB);
3452
3453 checkMBB->addSuccessor(allocMBB, BranchProbability::getZero());
3454 checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne());
3455
3456#ifdef EXPENSIVE_CHECKS
3457 MF.verify();
3458#endif
3459}
3460
3461/// Lookup an ERTS parameter in the !hipe.literals named metadata node.
3462/// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets
3463/// to fields it needs, through a named metadata node "hipe.literals" containing
3464/// name-value pairs.
3465static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD,
3466 const StringRef LiteralName) {
3467 for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) {
3468 MDNode *Node = HiPELiteralsMD->getOperand(i);
3469 if (Node->getNumOperands() != 2)
3470 continue;
3471 MDString *NodeName = dyn_cast<MDString>(Node->getOperand(0));
3472 ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Node->getOperand(1));
3473 if (!NodeName || !NodeVal)
3474 continue;
3475 ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(NodeVal->getValue());
3476 if (ValConst && NodeName->getString() == LiteralName) {
3477 return ValConst->getZExtValue();
3478 }
3479 }
3480
3481 report_fatal_error("HiPE literal " + LiteralName +
3482 " required but not provided");
3483}
3484
3485// Return true if there are no non-ehpad successors to MBB and there are no
3486// non-meta instructions between MBBI and MBB.end().
3489 return llvm::all_of(
3490 MBB.successors(),
3491 [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) &&
3492 std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) {
3493 return MI.isMetaInstruction();
3494 });
3495}
3496
3497/// Erlang programs may need a special prologue to handle the stack size they
3498/// might need at runtime. That is because Erlang/OTP does not implement a C
3499/// stack but uses a custom implementation of hybrid stack/heap architecture.
3500/// (for more information see Eric Stenman's Ph.D. thesis:
3501/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
3502///
3503/// CheckStack:
3504/// temp0 = sp - MaxStack
3505/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3506/// OldStart:
3507/// ...
3508/// IncStack:
3509/// call inc_stack # doubles the stack space
3510/// temp0 = sp - MaxStack
3511/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3513 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3514 MachineFrameInfo &MFI = MF.getFrameInfo();
3515 DebugLoc DL;
3516
3517 // To support shrink-wrapping we would need to insert the new blocks
3518 // at the right place and update the branches to PrologueMBB.
3519 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3520
3521 // HiPE-specific values
3522 NamedMDNode *HiPELiteralsMD =
3523 MF.getFunction().getParent()->getNamedMetadata("hipe.literals");
3524 if (!HiPELiteralsMD)
3526 "Can't generate HiPE prologue without runtime parameters");
3527 const unsigned HipeLeafWords = getHiPELiteral(
3528 HiPELiteralsMD, Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS");
3529 const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
3530 const unsigned Guaranteed = HipeLeafWords * SlotSize;
3531 unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs
3532 ? MF.getFunction().arg_size() - CCRegisteredArgs
3533 : 0;
3534 unsigned MaxStack = MFI.getStackSize() + CallerStkArity * SlotSize + SlotSize;
3535
3537 "HiPE prologue is only supported on Linux operating systems.");
3538
3539 // Compute the largest caller's frame that is needed to fit the callees'
3540 // frames. This 'MaxStack' is computed from:
3541 //
3542 // a) the fixed frame size, which is the space needed for all spilled temps,
3543 // b) outgoing on-stack parameter areas, and
3544 // c) the minimum stack space this function needs to make available for the
3545 // functions it calls (a tunable ABI property).
3546 if (MFI.hasCalls()) {
3547 unsigned MoreStackForCalls = 0;
3548
3549 for (auto &MBB : MF) {
3550 for (auto &MI : MBB) {
3551 if (!MI.isCall())
3552 continue;
3553
3554 // Get callee operand.
3555 const MachineOperand &MO = MI.getOperand(0);
3556
3557 // Only take account of global function calls (no closures etc.).
3558 if (!MO.isGlobal())
3559 continue;
3560
3561 const Function *F = dyn_cast<Function>(MO.getGlobal());
3562 if (!F)
3563 continue;
3564
3565 // Do not update 'MaxStack' for primitive and built-in functions
3566 // (encoded with names either starting with "erlang."/"bif_" or not
3567 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
3568 // "_", such as the BIF "suspend_0") as they are executed on another
3569 // stack.
3570 if (F->getName().contains("erlang.") || F->getName().contains("bif_") ||
3571 F->getName().find_first_of("._") == StringRef::npos)
3572 continue;
3573
3574 unsigned CalleeStkArity = F->arg_size() > CCRegisteredArgs
3575 ? F->arg_size() - CCRegisteredArgs
3576 : 0;
3577 if (HipeLeafWords - 1 > CalleeStkArity)
3578 MoreStackForCalls =
3579 std::max(MoreStackForCalls,
3580 (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
3581 }
3582 }
3583 MaxStack += MoreStackForCalls;
3584 }
3585
3586 // If the stack frame needed is larger than the guaranteed then runtime checks
3587 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
3588 if (MaxStack > Guaranteed) {
3589 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
3590 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
3591
3592 for (const auto &LI : PrologueMBB.liveins()) {
3593 stackCheckMBB->addLiveIn(LI);
3594 incStackMBB->addLiveIn(LI);
3595 }
3596
3597 MF.push_front(incStackMBB);
3598 MF.push_front(stackCheckMBB);
3599
3600 unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
3601 unsigned LEAop, CMPop, CALLop;
3602 SPLimitOffset = getHiPELiteral(HiPELiteralsMD, "P_NSP_LIMIT");
3603 if (Is64Bit) {
3604 SPReg = X86::RSP;
3605 PReg = X86::RBP;
3606 LEAop = X86::LEA64r;
3607 CMPop = X86::CMP64rm;
3608 CALLop = X86::CALL64pcrel32;
3609 } else {
3610 SPReg = X86::ESP;
3611 PReg = X86::EBP;
3612 LEAop = X86::LEA32r;
3613 CMPop = X86::CMP32rm;
3614 CALLop = X86::CALLpcrel32;
3615 }
3616
3617 ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3618 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3619 "HiPE prologue scratch register is live-in");
3620
3621 // Create new MBB for StackCheck:
3622 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3623 false, -MaxStack);
3624 // SPLimitOffset is in a fixed heap location (pointed by BP).
3625 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3626 PReg, false, SPLimitOffset);
3627 BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1))
3628 .addMBB(&PrologueMBB)
3630
3631 // Create new MBB for IncStack:
3632 BuildMI(incStackMBB, DL, TII.get(CALLop)).addExternalSymbol("inc_stack_0");
3633 addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3634 false, -MaxStack);
3635 addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3636 PReg, false, SPLimitOffset);
3637 BuildMI(incStackMBB, DL, TII.get(X86::JCC_1))
3638 .addMBB(incStackMBB)
3640
3641 stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
3642 stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
3643 incStackMBB->addSuccessor(&PrologueMBB, {99, 100});
3644 incStackMBB->addSuccessor(incStackMBB, {1, 100});
3645 }
3646#ifdef EXPENSIVE_CHECKS
3647 MF.verify();
3648#endif
3649}
3650
3651bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
3653 const DebugLoc &DL,
3654 int Offset) const {
3655 if (Offset <= 0)
3656 return false;
3657
3658 if (Offset % SlotSize)
3659 return false;
3660
3661 int NumPops = Offset / SlotSize;
3662 // This is only worth it if we have at most 2 pops.
3663 if (NumPops != 1 && NumPops != 2)
3664 return false;
3665
3666 // Handle only the trivial case where the adjustment directly follows
3667 // a call. This is the most common one, anyway.
3668 if (MBBI == MBB.begin())
3669 return false;
3670 MachineBasicBlock::iterator Prev = std::prev(MBBI);
3671 if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
3672 return false;
3673
3674 unsigned Regs[2];
3675 unsigned FoundRegs = 0;
3676
3678 const MachineOperand &RegMask = Prev->getOperand(1);
3679
3680 auto &RegClass =
3681 Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass;
3682 // Try to find up to NumPops free registers.
3683 for (auto Candidate : RegClass) {
3684 // Poor man's liveness:
3685 // Since we're immediately after a call, any register that is clobbered
3686 // by the call and not defined by it can be considered dead.
3687 if (!RegMask.clobbersPhysReg(Candidate))
3688 continue;
3689
3690 // Don't clobber reserved registers
3691 if (MRI.isReserved(Candidate))
3692 continue;
3693
3694 bool IsDef = false;
3695 for (const MachineOperand &MO : Prev->implicit_operands()) {
3696 if (MO.isReg() && MO.isDef() &&
3697 TRI->isSuperOrSubRegisterEq(MO.getReg(), Candidate)) {
3698 IsDef = true;
3699 break;
3700 }
3701 }
3702
3703 if (IsDef)
3704 continue;
3705
3706 Regs[FoundRegs++] = Candidate;
3707 if (FoundRegs == (unsigned)NumPops)
3708 break;
3709 }
3710
3711 if (FoundRegs == 0)
3712 return false;
3713
3714 // If we found only one free register, but need two, reuse the same one twice.
3715 while (FoundRegs < (unsigned)NumPops)
3716 Regs[FoundRegs++] = Regs[0];
3717
3718 for (int i = 0; i < NumPops; ++i)
3719 BuildMI(MBB, MBBI, DL, TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r),
3720 Regs[i]);
3721
3722 return true;
3723}
3724
3728 bool reserveCallFrame = hasReservedCallFrame(MF);
3729 unsigned Opcode = I->getOpcode();
3730 bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
3731 DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased.
3732 uint64_t Amount = TII.getFrameSize(*I);
3733 uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0;
3734 I = MBB.erase(I);
3735 auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
3736
3737 // Try to avoid emitting dead SP adjustments if the block end is unreachable,
3738 // typically because the function is marked noreturn (abort, throw,
3739 // assert_fail, etc).
3740 if (isDestroy && blockEndIsUnreachable(MBB, I))
3741 return I;
3742
3743 if (!reserveCallFrame) {
3744 // If the stack pointer can be changed after prologue, turn the
3745 // adjcallstackup instruction into a 'sub ESP, <amt>' and the
3746 // adjcallstackdown instruction into 'add ESP, <amt>'
3747
3748 // We need to keep the stack aligned properly. To do this, we round the
3749 // amount of space needed for the outgoing arguments up to the next
3750 // alignment boundary.
3751 Amount = alignTo(Amount, getStackAlign());
3752
3753 const Function &F = MF.getFunction();
3754 bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
3755 bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves();
3756
3757 // If we have any exception handlers in this function, and we adjust
3758 // the SP before calls, we may need to indicate this to the unwinder
3759 // using GNU_ARGS_SIZE. Note that this may be necessary even when
3760 // Amount == 0, because the preceding function may have set a non-0
3761 // GNU_ARGS_SIZE.
3762 // TODO: We don't need to reset this between subsequent functions,
3763 // if it didn't change.
3764 bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty();
3765
3766 if (HasDwarfEHHandlers && !isDestroy &&
3768 BuildCFI(MBB, InsertPos, DL,
3769 MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
3770
3771 if (Amount == 0)
3772 return I;
3773
3774 // Factor out the amount that gets handled inside the sequence
3775 // (Pushes of argument for frame setup, callee pops for frame destroy)
3776 Amount -= InternalAmt;
3777
3778 // TODO: This is needed only if we require precise CFA.
3779 // If this is a callee-pop calling convention, emit a CFA adjust for
3780 // the amount the callee popped.
3781 if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
3782 BuildCFI(MBB, InsertPos, DL,
3783 MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
3784
3785 // Add Amount to SP to destroy a frame, or subtract to setup.
3786 int64_t StackAdjustment = isDestroy ? Amount : -Amount;
3787
3788 if (StackAdjustment) {
3789 // Merge with any previous or following adjustment instruction. Note: the
3790 // instructions merged with here do not have CFI, so their stack
3791 // adjustments do not feed into CfaAdjustment.
3792 StackAdjustment += mergeSPUpdates(MBB, InsertPos, true);
3793 StackAdjustment += mergeSPUpdates(MBB, InsertPos, false);
3794
3795 if (StackAdjustment) {
3796 if (!(F.hasMinSize() &&
3797 adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
3798 BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
3799 /*InEpilogue=*/false);
3800 }
3801 }
3802
3803 if (DwarfCFI && !hasFP(MF)) {
3804 // If we don't have FP, but need to generate unwind information,
3805 // we need to set the correct CFA offset after the stack adjustment.
3806 // How much we adjust the CFA offset depends on whether we're emitting
3807 // CFI only for EH purposes or for debugging. EH only requires the CFA
3808 // offset to be correct at each call site, while for debugging we want
3809 // it to be more precise.
3810
3811 int64_t CfaAdjustment = -StackAdjustment;
3812 // TODO: When not using precise CFA, we also need to adjust for the
3813 // InternalAmt here.
3814 if (CfaAdjustment) {
3815 BuildCFI(
3816 MBB, InsertPos, DL,
3817 MCCFIInstruction::createAdjustCfaOffset(nullptr, CfaAdjustment));
3818 }
3819 }
3820
3821 return I;
3822 }
3823
3824 if (InternalAmt) {
3827 while (CI != B && !std::prev(CI)->isCall())
3828 --CI;
3829 BuildStackAdjustment(MBB, CI, DL, -InternalAmt, /*InEpilogue=*/false);
3830 }
3831
3832 return I;
3833}
3834
3836 assert(MBB.getParent() && "Block is not attached to a function!");
3837 const MachineFunction &MF = *MBB.getParent();
3838 if (!MBB.isLiveIn(X86::EFLAGS))
3839 return true;
3840
3841 // If stack probes have to loop inline or call, that will clobber EFLAGS.
3842 // FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock.
3844 const X86TargetLowering &TLI = *STI.getTargetLowering();
3845 if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF))
3846 return false;
3847
3849 return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext();
3850}
3851
3853 assert(MBB.getParent() && "Block is not attached to a function!");
3854
3855 // Win64 has strict requirements in terms of epilogue and we are
3856 // not taking a chance at messing with them.
3857 // I.e., unless this block is already an exit block, we can't use
3858 // it as an epilogue.
3859 if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
3860 return false;
3861
3862 // Swift async context epilogue has a BTR instruction that clobbers parts of
3863 // EFLAGS.
3864 const MachineFunction &MF = *MBB.getParent();
3867
3869 return true;
3870
3871 // If we cannot use LEA to adjust SP, we may need to use ADD, which
3872 // clobbers the EFLAGS. Check that we do not need to preserve it,
3873 // otherwise, conservatively assume this is not
3874 // safe to insert the epilogue here.
3876}
3877
3879 // If we may need to emit frameless compact unwind information, give
3880 // up as this is currently broken: PR25614.
3881 bool CompactUnwind =
3883 return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF) ||
3884 !CompactUnwind) &&
3885 // The lowering of segmented stack and HiPE only support entry
3886 // blocks as prologue blocks: PR26107. This limitation may be
3887 // lifted if we fix:
3888 // - adjustForSegmentedStacks
3889 // - adjustForHiPEPrologue
3891 !MF.shouldSplitStack();
3892}
3893
3896 const DebugLoc &DL, bool RestoreSP) const {
3897 assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
3898 assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
3899 assert(STI.is32Bit() && !Uses64BitFramePtr &&
3900 "restoring EBP/ESI on non-32-bit target");
3901
3902 MachineFunction &MF = *MBB.getParent();
3904 Register BasePtr = TRI->getBaseRegister();
3905 WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo();
3907 MachineFrameInfo &MFI = MF.getFrameInfo();
3908
3909 // FIXME: Don't set FrameSetup flag in catchret case.
3910
3911 int FI = FuncInfo.EHRegNodeFrameIndex;
3912 int EHRegSize = MFI.getObjectSize(FI);
3913
3914 if (RestoreSP) {
3915 // MOV32rm -EHRegSize(%ebp), %esp
3916 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP),
3917 X86::EBP, true, -EHRegSize)
3919 }
3920
3921 Register UsedReg;
3922 int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg).getFixed();
3923 int EndOffset = -EHRegOffset - EHRegSize;
3924 FuncInfo.EHRegNodeEndOffset = EndOffset;
3925
3926 if (UsedReg == FramePtr) {
3927 // ADD $offset, %ebp
3928 unsigned ADDri = getADDriOpcode(false);
3929 BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
3931 .addImm(EndOffset)
3933 ->getOperand(3)
3934 .setIsDead();
3935 assert(EndOffset >= 0 &&
3936 "end of registration object above normal EBP position!");
3937 } else if (UsedReg == BasePtr) {
3938 // LEA offset(%ebp), %esi
3939 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr),
3940 FramePtr, false, EndOffset)
3942 // MOV32rm SavedEBPOffset(%esi), %ebp
3943 assert(X86FI->getHasSEHFramePtrSave());
3944 int Offset =
3945 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
3946 .getFixed();
3947 assert(UsedReg == BasePtr);
3948 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr),
3949 UsedReg, true, Offset)
3951 } else {
3952 llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
3953 }
3954 return MBBI;
3955}
3956
3958 return TRI->getSlotSize();
3959}
3960
3963 return StackPtr;
3964}
3965
3969 Register FrameRegister = RI->getFrameRegister(MF);
3970 if (getInitialCFARegister(MF) == FrameRegister &&
3972 DwarfFrameBase FrameBase;
3973 FrameBase.Kind = DwarfFrameBase::CFA;
3974 FrameBase.Location.Offset =
3976 return FrameBase;
3977 }
3978
3979 return DwarfFrameBase{DwarfFrameBase::Register, {FrameRegister}};
3980}
3981
3982namespace {
3983// Struct used by orderFrameObjects to help sort the stack objects.
3984struct X86FrameSortingObject {
3985 bool IsValid = false; // true if we care about this Object.
3986 unsigned ObjectIndex = 0; // Index of Object into MFI list.
3987 unsigned ObjectSize = 0; // Size of Object in bytes.
3988 Align ObjectAlignment = Align(1); // Alignment of Object in bytes.
3989 unsigned ObjectNumUses = 0; // Object static number of uses.
3990};
3991
3992// The comparison function we use for std::sort to order our local
3993// stack symbols. The current algorithm is to use an estimated
3994// "density". This takes into consideration the size and number of
3995// uses each object has in order to roughly minimize code size.
3996// So, for example, an object of size 16B that is referenced 5 times
3997// will get higher priority than 4 4B objects referenced 1 time each.
3998// It's not perfect and we may be able to squeeze a few more bytes out of
3999// it (for example : 0(esp) requires fewer bytes, symbols allocated at the
4000// fringe end can have special consideration, given their size is less
4001// important, etc.), but the algorithmic complexity grows too much to be
4002// worth the extra gains we get. This gets us pretty close.
4003// The final order leaves us with objects with highest priority going
4004// at the end of our list.
4005struct X86FrameSortingComparator {
4006 inline bool operator()(const X86FrameSortingObject &A,
4007 const X86FrameSortingObject &B) const {
4008 uint64_t DensityAScaled, DensityBScaled;
4009
4010 // For consistency in our comparison, all invalid objects are placed
4011 // at the end. This also allows us to stop walking when we hit the
4012 // first invalid item after it's all sorted.
4013 if (!A.IsValid)
4014 return false;
4015 if (!B.IsValid)
4016 return true;
4017
4018 // The density is calculated by doing :
4019 // (double)DensityA = A.ObjectNumUses / A.ObjectSize
4020 // (double)DensityB = B.ObjectNumUses / B.ObjectSize
4021 // Since this approach may cause inconsistencies in
4022 // the floating point <, >, == comparisons, depending on the floating
4023 // point model with which the compiler was built, we're going
4024 // to scale both sides by multiplying with
4025 // A.ObjectSize * B.ObjectSize. This ends up factoring away
4026 // the division and, with it, the need for any floating point
4027 // arithmetic.
4028 DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) *
4029 static_cast<uint64_t>(B.ObjectSize);
4030 DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) *
4031 static_cast<uint64_t>(A.ObjectSize);
4032
4033 // If the two densities are equal, prioritize highest alignment
4034 // objects. This allows for similar alignment objects
4035 // to be packed together (given the same density).
4036 // There's room for improvement here, also, since we can pack
4037 // similar alignment (different density) objects next to each
4038 // other to save padding. This will also require further
4039 // complexity/iterations, and the overall gain isn't worth it,
4040 // in general. Something to keep in mind, though.
4041 if (DensityAScaled == DensityBScaled)
4042 return A.ObjectAlignment < B.ObjectAlignment;
4043
4044 return DensityAScaled < DensityBScaled;
4045 }
4046};
4047} // namespace
4048
4049// Order the symbols in the local stack.
4050// We want to place the local stack objects in some sort of sensible order.
4051// The heuristic we use is to try and pack them according to static number
4052// of uses and size of object in order to minimize code size.
4054 const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
4055 const MachineFrameInfo &MFI = MF.getFrameInfo();
4056
4057 // Don't waste time if there's nothing to do.
4058 if (ObjectsToAllocate.empty())
4059 return;
4060
4061 // Create an array of all MFI objects. We won't need all of these
4062 // objects, but we're going to create a full array of them to make
4063 // it easier to index into when we're counting "uses" down below.
4064 // We want to be able to easily/cheaply access an object by simply
4065 // indexing into it, instead of having to search for it every time.
4066 std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd());
4067
4068 // Walk the objects we care about and mark them as such in our working
4069 // struct.
4070 for (auto &Obj : ObjectsToAllocate) {
4071 SortingObjects[Obj].IsValid = true;
4072 SortingObjects[Obj].ObjectIndex = Obj;
4073 SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj);
4074 // Set the size.
4075 int ObjectSize = MFI.getObjectSize(Obj);
4076 if (ObjectSize == 0)
4077 // Variable size. Just use 4.
4078 SortingObjects[Obj].ObjectSize = 4;
4079 else
4080 SortingObjects[Obj].ObjectSize = ObjectSize;
4081 }
4082
4083 // Count the number of uses for each object.
4084 for (auto &MBB : MF) {
4085 for (auto &MI : MBB) {
4086 if (MI.isDebugInstr())
4087 continue;
4088 for (const MachineOperand &MO : MI.operands()) {
4089 // Check to see if it's a local stack symbol.
4090 if (!MO.isFI())
4091 continue;
4092 int Index = MO.getIndex();
4093 // Check to see if it falls within our range, and is tagged
4094 // to require ordering.
4095 if (Index >= 0 && Index < MFI.getObjectIndexEnd() &&
4096 SortingObjects[Index].IsValid)
4097 SortingObjects[Index].ObjectNumUses++;
4098 }
4099 }
4100 }
4101
4102 // Sort the objects using X86FrameSortingAlgorithm (see its comment for
4103 // info).
4104 llvm::stable_sort(SortingObjects, X86FrameSortingComparator());
4105
4106 // Now modify the original list to represent the final order that
4107 // we want. The order will depend on whether we're going to access them
4108 // from the stack pointer or the frame pointer. For SP, the list should
4109 // end up with the END containing objects that we want with smaller offsets.
4110 // For FP, it should be flipped.
4111 int i = 0;
4112 for (auto &Obj : SortingObjects) {
4113 // All invalid items are sorted at the end, so it's safe to stop.
4114 if (!Obj.IsValid)
4115 break;
4116 ObjectsToAllocate[i++] = Obj.ObjectIndex;
4117 }
4118
4119 // Flip it if we're accessing off of the FP.
4120 if (!TRI->hasStackRealignment(MF) && hasFP(MF))
4121 std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end());
4122}
4123
4124unsigned
4126 // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
4127 unsigned Offset = 16;
4128 // RBP is immediately pushed.
4129 Offset += SlotSize;
4130 // All callee-saved registers are then pushed.
4131 Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
4132 // Every funclet allocates enough stack space for the largest outgoing call.
4133 Offset += getWinEHFuncletFrameSize(MF);
4134 return Offset;
4135}
4136
4138 MachineFunction &MF, RegScavenger *RS) const {
4139 // Mark the function as not having WinCFI. We will set it back to true in
4140 // emitPrologue if it gets called and emits CFI.
4141 MF.setHasWinCFI(false);
4142
4143 // If we are using Windows x64 CFI, ensure that the stack is always 8 byte
4144 // aligned. The format doesn't support misaligned stack adjustments.
4147
4148 // If this function isn't doing Win64-style C++ EH, we don't need to do
4149 // anything.
4150 if (STI.is64Bit() && MF.hasEHFunclets() &&
4153 adjustFrameForMsvcCxxEh(MF);
4154 }
4155}
4156
4157void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const {
4158 // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
4159 // relative to RSP after the prologue. Find the offset of the last fixed
4160 // object, so that we can allocate a slot immediately following it. If there
4161 // were no fixed objects, use offset -SlotSize, which is immediately after the
4162 // return address. Fixed objects have negative frame indices.
4163 MachineFrameInfo &MFI = MF.getFrameInfo();
4164 WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
4165 int64_t MinFixedObjOffset = -SlotSize;
4166 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I)
4167 MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
4168
4169 for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
4170 for (WinEHHandlerType &H : TBME.HandlerArray) {
4171 int FrameIndex = H.CatchObj.FrameIndex;
4172 if (FrameIndex != INT_MAX) {
4173 // Ensure alignment.
4174 unsigned Align = MFI.getObjectAlign(FrameIndex).value();
4175 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align;
4176 MinFixedObjOffset -= MFI.getObjectSize(FrameIndex);
4177 MFI.setObjectOffset(FrameIndex, MinFixedObjOffset);
4178 }
4179 }
4180 }
4181
4182 // Ensure alignment.
4183 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8;
4184 int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
4185 int UnwindHelpFI =
4186 MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false);
4187 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
4188
4189 // Store -2 into UnwindHelp on function entry. We have to scan forwards past
4190 // other frame setup instructions.
4191 MachineBasicBlock &MBB = MF.front();
4192 auto MBBI = MBB.begin();
4193 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
4194 ++MBBI;
4195
4197 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
4198 UnwindHelpFI)
4199 .addImm(-2);
4200}
4201
4203 MachineFunction &MF, RegScavenger *RS) const {
4204 auto *X86FI = MF.getInfo<X86MachineFunctionInfo>();
4205
4206 if (STI.is32Bit() && MF.hasEHFunclets())
4208 // We have emitted prolog and epilog. Don't need stack pointer saving
4209 // instruction any more.
4210 if (MachineInstr *MI = X86FI->getStackPtrSaveMI()) {
4211 MI->eraseFromParent();
4212 X86FI->setStackPtrSaveMI(nullptr);
4213 }
4214}
4215
4217 MachineFunction &MF) const {
4218 // 32-bit functions have to restore stack pointers when control is transferred
4219 // back to the parent function. These blocks are identified as eh pads that
4220 // are not funclet entries.
4221 bool IsSEH = isAsynchronousEHPersonality(
4223 for (MachineBasicBlock &MBB : MF) {
4224 bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry();
4225 if (NeedsRestore)
4227 /*RestoreSP=*/IsSEH);
4228 }
4229}
unsigned const MachineRegisterInfo * MRI
static bool isFuncletReturnInstr(const MachineInstr &MI)
static const uint64_t kSplitStackAvailable
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Given that RA is a live value
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static cl::opt< int > PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), cl::init(4096), cl::Hidden)
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static bool isTailCallOpcode(unsigned Opc)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Module.h This file contains the declarations for the Module class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static bool is64Bit(const char *name)
static unsigned calculateSetFPREG(uint64_t SPAdjust)
static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary)
GetScratchRegister - Get a temp register for performing work in the segmented stack and the Erlang/Hi...
static unsigned getADDriOpcode(bool IsLP64)
static unsigned getPUSH2Opcode(const X86Subtarget &ST)
static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm)
static unsigned getLEArOpcode(bool IsLP64)
static unsigned getSUBriOpcode(bool IsLP64)
static bool flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB)
Check if the flags need to be preserved before the terminators.
static bool isOpcodeRep(unsigned Opcode)
Return true if an opcode is part of the REP group of instructions.
static unsigned getANDriOpcode(bool IsLP64, int64_t Imm)
static bool isEAXLiveIn(MachineBasicBlock &MBB)
static unsigned getADDrrOpcode(bool IsLP64)
static bool HasNestArgument(const MachineFunction *MF)
static unsigned getPOPOpcode(const X86Subtarget &ST)
static unsigned getPOP2Opcode(const X86Subtarget &ST)
static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD, const StringRef LiteralName)
Lookup an ERTS parameter in the !hipe.literals named metadata node.
static bool blockEndIsUnreachable(const MachineBasicBlock &MBB, MachineBasicBlock::const_iterator MBBI)
static unsigned getSUBrrOpcode(bool IsLP64)
static unsigned getPUSHOpcode(const X86Subtarget &ST)
static const unsigned FramePtr
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
reverse_iterator rend() const
Definition: ArrayRef.h:157
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
reverse_iterator rbegin() const
Definition: ArrayRef.h:156
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
BitVector & reset()
Definition: BitVector.h:392
BitVector & set()
Definition: BitVector.h:351
iterator_range< const_set_bits_iterator > set_bits() const
Definition: BitVector.h:140
static BranchProbability getOne()
static BranchProbability getZero()
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:274
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition: Function.h:868
Constant * getPersonalityFn() const
Get the personality function associated with this function.
Definition: Function.cpp:1963
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:350
size_t arg_size() const
Definition: Function.h:864
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition: Function.h:673
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:225
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:719
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:793
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:565
static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int64_t Size, SMLoc Loc={})
A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE.
Definition: MCDwarf.h:670
static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_restore says that the rule for Register is now the same as it was at the beginning of the functi...
Definition: MCDwarf.h:633
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:558
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:600
OpType getOperation() const
Definition: MCDwarf.h:680
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:573
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition: MCDwarf.h:664
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int64_t Adjustment, SMLoc Loc={})
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Definition: MCDwarf.h:581
const MCObjectFileInfo * getObjectFileInfo() const
Definition: MCContext.h:416
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:414
MCSection * getCompactUnwindSection() const
MCRegAliasIterator enumerates all registers aliasing Reg.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
Metadata node.
Definition: Metadata.h:1067
A single uniqued string.
Definition: Metadata.h:720
StringRef getString() const
Definition: Metadata.cpp:610
Machine Value Type.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isReturnBlock() const
Convenience function that returns true if the block ends in a return instruction.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
@ LQR_Live
Register is known to be (at least partially) live.
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
bool isCleanupFuncletEntry() const
Returns true if this is the entry block of a cleanup funclet.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool adjustsStack() const
Return true if this function adjusts the stack – e.g., when calling another function.
void ensureMaxAlignment(Align Alignment)
Make sure the function is at least Align bytes aligned.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
void setObjectOffset(int ObjectIdx, int64_t SPOffset)
Set the stack frame offset of the specified object.
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool hasOpaqueSPAdjustment() const
Returns true if the function contains opaque dynamic stack adjustments.
void setCVBytesOfCalleeSavedRegisters(unsigned S)
int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasCopyImplyingStackAdjustment() const
Returns true if the function contains operations which will lower down to instructions which manipula...
bool hasStackObjects() const
Return true if there are any stack objects in this function.
int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset, bool IsImmutable=false)
Create a spill slot at a fixed location on the stack.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
void setStackSize(uint64_t Size)
Set the size of the stack.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
void setOffsetAdjustment(int64_t Adj)
Set the correction for frame offsets.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
bool needsFrameMoves() const
True if this function needs frame moves for debug or exceptions.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
bool callsUnwindInit() const
void push_front(MachineBasicBlock *MBB)
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const std::vector< LandingPadInfo > & getLandingPads() const
Return a reference to the landing pad info for the current function.
bool shouldSplitStack() const
Should we be emitting segmented stack stuff for the function.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:572
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:498
unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
bool isLiveIn(Register Reg) const
NamedMDNode * getNamedMetadata(const Twine &Name) const
Return the first NamedMDNode in the module with the specified name.
Definition: Module.cpp:262
unsigned getCodeViewFlag() const
Returns the CodeView Version by checking module flags.
Definition: Module.cpp:585
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
iterator end() const
Definition: ArrayRef.h:357
iterator begin() const
Definition: ArrayRef.h:356
A tuple of MDNodes.
Definition: Metadata.h:1729
MDNode * getOperand(unsigned i) const
Definition: Metadata.cpp:1381
unsigned getNumOperands() const
Definition: Metadata.cpp:1377
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:65
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:254
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:49
static StackOffset getFixed(int64_t Fixed)
Definition: TypeSize.h:42
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
static constexpr size_t npos
Definition: StringRef.h:52
Information about stack frame layout on the target.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
int getOffsetOfLocalArea() const
getOffsetOfLocalArea - This method returns the offset of the local area from the stack pointer on ent...
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
const Triple & getTargetTriple() const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
SwiftAsyncFramePointerMode SwiftAsyncFramePointer
Control when and how the Swift async frame pointer bit should be set.
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual Register getFrameRegister(const MachineFunction &MF) const =0
Debug information queries.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
bool isOSWindows() const
Tests whether the OS is Windows.
Definition: Triple.h:626
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Definition: Triple.h:560
Value wrapper in the Metadata hierarchy.
Definition: Metadata.h:450
Value * getValue() const
Definition: Metadata.h:490
bool has128ByteRedZone(const MachineFunction &MF) const
Return true if the function has a redzone (accessible bytes past the frame of the top of stack functi...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
bool needsFrameIndexResolution(const MachineFunction &MF) const override
X86FrameLowering(const X86Subtarget &STI, MaybeAlign StackAlignOverride)
const X86RegisterInfo * TRI
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
MachineBasicBlock::iterator restoreWin32EHStackPointers(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool RestoreSP=false) const
Sets up EBP and optionally ESI based on the incoming EBP value.
int getInitialCFAOffset(const MachineFunction &MF) const override
Return initial CFA offset value i.e.
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a prologue for the target.
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
void emitStackProbe(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, std::optional< MachineFunction::DebugInstrOperandPair > InstrNum=std::nullopt) const
Emit target stack probe code.
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsPrologue) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
StackOffset getFrameIndexReferenceSP(const MachineFunction &MF, int FI, Register &SPReg, int Adjustment) const
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const override
Replace a StackProbe inline-stub with the actual probe code inline.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
const X86InstrInfo & TII
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &DL, int64_t NumBytes, bool InEpilogue) const
Emit a series of instructions to increment / decrement the stack pointer by a constant value.
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a epilogue for the target.
bool Is64Bit
Is64Bit implies that x86_64 instructions are available.
Register getInitialCFARegister(const MachineFunction &MF) const override
Return initial CFA register value i.e.
bool Uses64BitFramePtr
True if the 64-bit frame or stack pointer should be used.
unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Adjust the prologue to have the function use segmented stacks.
DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const override
Return the frame base information to be encoded in the DWARF subprogram debug info.
void emitCalleeSavedFrameMovesFullCFA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const override
Emits Dwarf Info specifying offsets of callee saved registers and frame pointer.
int getWin64EHFrameIndexRef(const MachineFunction &MF, int FI, Register &SPReg) const
bool canUseLEAForSPInEpilogue(const MachineFunction &MF) const
Check that LEA can be used on SP in an epilogue sequence for MF.
bool stackProbeFunctionModifiesSP() const override
Does the stack probe function call return with a modified stack pointer?
void orderFrameObjects(const MachineFunction &MF, SmallVectorImpl< int > &ObjectsToAllocate) const override
Order the symbols in the local stack.
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst, MachineInstr::MIFlag Flag=MachineInstr::NoFlags) const
Wraps up getting a CFI index and building a MachineInstr for it.
int mergeSPUpdates(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, bool doMergeWithPrevious) const
Check the instruction before/after the passed instruction.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
StackOffset getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, Register &FrameReg, bool IgnoreSPUpdates) const override
Same as getFrameIndexReference, except that the stack pointer (as opposed to the frame pointer) will ...
void restoreWinEHStackPointersInParent(MachineFunction &MF) const
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
void adjustForHiPEPrologue(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Erlang programs may need a special prologue to handle the stack size they might need at runtime.
const X86Subtarget & STI
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects=true) const override
int64_t getFrameAdjustment(const MachineInstr &I) const
Returns the stack pointer adjustment that happens inside the frame setup..destroy sequence (e....
Definition: X86InstrInfo.h:215
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
bool isCandidateForPush2Pop2(Register Reg) const
void setRestoreBasePointer(const MachineFunction *MF)
DenseMap< int, unsigned > & getWinEHXMMSlotInfo()
MachineInstr * getStackPtrSaveMI() const
AMXProgModelEnum getAMXProgModel() const
void addCandidateForPush2Pop2(Register Reg)
void setStackPtrSaveMI(MachineInstr *MI)
void setCalleeSavedFrameSize(unsigned bytes)
bool hasBasePointer(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const
findDeadCallerSavedReg - Return a caller-saved register that isn't live when it reaches the "return" ...
Register getStackRegister() const
unsigned getSlotSize() const
Register getFramePtr() const
Returns physical register used as frame pointer.
Register getBaseRegister() const
bool isOSWindows() const
Definition: X86Subtarget.h:322
const X86TargetLowering * getTargetLowering() const override
Definition: X86Subtarget.h:118
bool isTargetDragonFly() const
Definition: X86Subtarget.h:282
bool isTargetWindowsMSVC() const
Definition: X86Subtarget.h:300
bool isTarget64BitILP32() const
Is this x86_64 with the ILP32 programming model (x32 ABI)?
Definition: X86Subtarget.h:173
bool isTargetDarwin() const
Definition: X86Subtarget.h:280
bool isTargetWin64() const
Definition: X86Subtarget.h:324
bool isTarget64BitLP64() const
Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
Definition: X86Subtarget.h:178
bool swiftAsyncContextIsDynamicallySet() const
Return whether FrameLowering should always set the "extended frame present" bit in FP,...
Definition: X86Subtarget.h:386
bool isTargetWindowsCoreCLR() const
Definition: X86Subtarget.h:304
const X86InstrInfo * getInstrInfo() const override
Definition: X86Subtarget.h:122
bool isCallingConvWin64(CallingConv::ID CC) const
Definition: X86Subtarget.h:337
bool isTargetFreeBSD() const
Definition: X86Subtarget.h:281
bool isTargetNaCl64() const
Definition: X86Subtarget.h:296
bool isTargetWin32() const
Definition: X86Subtarget.h:326
bool useIndirectThunkCalls() const
Definition: X86Subtarget.h:218
bool isTargetLinux() const
Definition: X86Subtarget.h:290
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
StringRef getStackProbeSymbolName(const MachineFunction &MF) const override
Returns the name of the symbol used to emit stack probes or the empty string if not applicable.
bool hasStackProbeSymbol(const MachineFunction &MF) const override
Returns true if stack probing through a function call is requested.
unsigned getStackProbeSize(const MachineFunction &MF) const
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint16_t StackAdjustment(const RuntimeFunction &RF)
StackAdjustment - calculated stack adjustment in words.
Definition: ARMWinEH.h:199
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition: CallingConv.h:53
@ X86_INTR
x86 hardware interrupt context.
Definition: CallingConv.h:173
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ X86_FastCall
'fast' analog of X86_StdCall.
Definition: CallingConv.h:103
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:387
CallingConvention
Definition: Dwarf.h:738
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
@ DwarfCFI
DWARF-like instruction based exceptions.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:547
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
static const MachineInstrBuilder & addRegOffset(const MachineInstrBuilder &MIB, unsigned Reg, bool isKill, int Offset)
addRegOffset - This function is used to add a memory reference of the form [Reg + Offset],...
@ Always
Always set the bit.
@ DeploymentBased
Determine whether to set the bit statically or dynamically based on the deployment target.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a SLEB128 value to an output stream.
Definition: LEB128.h:23
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition: LEB128.h:80
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
Definition: LivePhysRegs.h:215
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Pair of physical register and lane mask.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
union llvm::TargetFrameLowering::DwarfFrameBase::@241 Location
enum llvm::TargetFrameLowering::DwarfFrameBase::FrameBaseKind Kind
SmallVector< WinEHTryBlockMapEntry, 4 > TryBlockMap
Definition: WinEHFuncInfo.h:97
SmallVector< WinEHHandlerType, 1 > HandlerArray
Definition: WinEHFuncInfo.h:76