LLVM 19.0.0git
X86FrameLowering.cpp
Go to the documentation of this file.
1//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86FrameLowering.h"
15#include "X86InstrBuilder.h"
16#include "X86InstrInfo.h"
18#include "X86Subtarget.h"
19#include "X86TargetMachine.h"
20#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/Function.h"
31#include "llvm/IR/Module.h"
32#include "llvm/MC/MCAsmInfo.h"
34#include "llvm/MC/MCSymbol.h"
35#include "llvm/Support/Debug.h"
36#include "llvm/Support/LEB128.h"
38#include <cstdlib>
39
40#define DEBUG_TYPE "x86-fl"
41
42STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
43STATISTIC(NumFrameExtraProbe,
44 "Number of extra stack probes generated in prologue");
45STATISTIC(NumFunctionUsingPush2Pop2, "Number of funtions using push2/pop2");
46
47using namespace llvm;
48
50 MaybeAlign StackAlignOverride)
51 : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(),
52 STI.is64Bit() ? -8 : -4),
53 STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
54 // Cache a bunch of frame-related predicates for this subtarget.
56 Is64Bit = STI.is64Bit();
58 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
61}
62
64 return !MF.getFrameInfo().hasVarSizedObjects() &&
65 !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
66 !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
67}
68
69/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
70/// call frame pseudos can be simplified. Having a FP, as in the default
71/// implementation, is not sufficient here since we can't always use it.
72/// Use a more nuanced condition.
74 const MachineFunction &MF) const {
75 return hasReservedCallFrame(MF) ||
76 MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
77 (hasFP(MF) && !TRI->hasStackRealignment(MF)) ||
79}
80
81// needsFrameIndexResolution - Do we need to perform FI resolution for
82// this function. Normally, this is required only when the function
83// has any stack objects. However, FI resolution actually has another job,
84// not apparent from the title - it resolves callframesetup/destroy
85// that were not simplified earlier.
86// So, this is required for x86 functions that have push sequences even
87// when there are no stack objects.
89 const MachineFunction &MF) const {
90 return MF.getFrameInfo().hasStackObjects() ||
91 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
92}
93
94/// hasFP - Return true if the specified function should have a dedicated frame
95/// pointer register. This is true if the function has variable sized allocas
96/// or if frame pointer elimination is disabled.
98 const MachineFrameInfo &MFI = MF.getFrameInfo();
99 return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
100 TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
104 MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
105 MFI.hasStackMap() || MFI.hasPatchPoint() ||
106 (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment()));
107}
108
109static unsigned getSUBriOpcode(bool IsLP64) {
110 return IsLP64 ? X86::SUB64ri32 : X86::SUB32ri;
111}
112
113static unsigned getADDriOpcode(bool IsLP64) {
114 return IsLP64 ? X86::ADD64ri32 : X86::ADD32ri;
115}
116
117static unsigned getSUBrrOpcode(bool IsLP64) {
118 return IsLP64 ? X86::SUB64rr : X86::SUB32rr;
119}
120
121static unsigned getADDrrOpcode(bool IsLP64) {
122 return IsLP64 ? X86::ADD64rr : X86::ADD32rr;
123}
124
125static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
126 return IsLP64 ? X86::AND64ri32 : X86::AND32ri;
127}
128
129static unsigned getLEArOpcode(bool IsLP64) {
130 return IsLP64 ? X86::LEA64r : X86::LEA32r;
131}
132
133static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) {
134 if (Use64BitReg) {
135 if (isUInt<32>(Imm))
136 return X86::MOV32ri64;
137 if (isInt<32>(Imm))
138 return X86::MOV64ri32;
139 return X86::MOV64ri;
140 }
141 return X86::MOV32ri;
142}
143
144// Push-Pop Acceleration (PPX) hint is used to indicate that the POP reads the
145// value written by the PUSH from the stack. The processor tracks these marked
146// instructions internally and fast-forwards register data between matching PUSH
147// and POP instructions, without going through memory or through the training
148// loop of the Fast Store Forwarding Predictor (FSFP). Instead, a more efficient
149// memory-renaming optimization can be used.
150//
151// The PPX hint is purely a performance hint. Instructions with this hint have
152// the same functional semantics as those without. PPX hints set by the
153// compiler that violate the balancing rule may turn off the PPX optimization,
154// but they will not affect program semantics.
155//
156// Hence, PPX is used for balanced spill/reloads (Exceptions and setjmp/longjmp
157// are not considered).
158//
159// PUSH2 and POP2 are instructions for (respectively) pushing/popping 2
160// GPRs at a time to/from the stack.
161static unsigned getPUSHOpcode(const X86Subtarget &ST) {
162 return ST.is64Bit() ? (ST.hasPPX() ? X86::PUSHP64r : X86::PUSH64r)
163 : X86::PUSH32r;
164}
165static unsigned getPOPOpcode(const X86Subtarget &ST) {
166 return ST.is64Bit() ? (ST.hasPPX() ? X86::POPP64r : X86::POP64r)
167 : X86::POP32r;
168}
169static unsigned getPUSH2Opcode(const X86Subtarget &ST) {
170 return ST.hasPPX() ? X86::PUSH2P : X86::PUSH2;
171}
172static unsigned getPOP2Opcode(const X86Subtarget &ST) {
173 return ST.hasPPX() ? X86::POP2P : X86::POP2;
174}
175
178 unsigned Reg = RegMask.PhysReg;
179
180 if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
181 Reg == X86::AH || Reg == X86::AL)
182 return true;
183 }
184
185 return false;
186}
187
188/// Check if the flags need to be preserved before the terminators.
189/// This would be the case, if the eflags is live-in of the region
190/// composed by the terminators or live-out of that region, without
191/// being defined by a terminator.
192static bool
194 for (const MachineInstr &MI : MBB.terminators()) {
195 bool BreakNext = false;
196 for (const MachineOperand &MO : MI.operands()) {
197 if (!MO.isReg())
198 continue;
199 Register Reg = MO.getReg();
200 if (Reg != X86::EFLAGS)
201 continue;
202
203 // This terminator needs an eflags that is not defined
204 // by a previous another terminator:
205 // EFLAGS is live-in of the region composed by the terminators.
206 if (!MO.isDef())
207 return true;
208 // This terminator defines the eflags, i.e., we don't need to preserve it.
209 // However, we still need to check this specific terminator does not
210 // read a live-in value.
211 BreakNext = true;
212 }
213 // We found a definition of the eflags, no need to preserve them.
214 if (BreakNext)
215 return false;
216 }
217
218 // None of the terminators use or define the eflags.
219 // Check if they are live-out, that would imply we need to preserve them.
220 for (const MachineBasicBlock *Succ : MBB.successors())
221 if (Succ->isLiveIn(X86::EFLAGS))
222 return true;
223
224 return false;
225}
226
227/// emitSPUpdate - Emit a series of instructions to increment / decrement the
228/// stack pointer by a constant value.
231 const DebugLoc &DL, int64_t NumBytes,
232 bool InEpilogue) const {
233 bool isSub = NumBytes < 0;
234 uint64_t Offset = isSub ? -NumBytes : NumBytes;
237
238 uint64_t Chunk = (1LL << 31) - 1;
239
243 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
244
245 // It's ok to not take into account large chunks when probing, as the
246 // allocation is split in smaller chunks anyway.
247 if (EmitInlineStackProbe && !InEpilogue) {
248
249 // This pseudo-instruction is going to be expanded, potentially using a
250 // loop, by inlineStackProbe().
251 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)).addImm(Offset);
252 return;
253 } else if (Offset > Chunk) {
254 // Rather than emit a long series of instructions for large offsets,
255 // load the offset into a register and do one sub/add
256 unsigned Reg = 0;
257 unsigned Rax = (unsigned)(Is64Bit ? X86::RAX : X86::EAX);
258
259 if (isSub && !isEAXLiveIn(MBB))
260 Reg = Rax;
261 else
263
264 unsigned AddSubRROpc =
266 if (Reg) {
268 .addImm(Offset)
269 .setMIFlag(Flag);
270 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr)
272 .addReg(Reg);
273 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
274 return;
275 } else if (Offset > 8 * Chunk) {
276 // If we would need more than 8 add or sub instructions (a >16GB stack
277 // frame), it's worth spilling RAX to materialize this immediate.
278 // pushq %rax
279 // movabsq +-$Offset+-SlotSize, %rax
280 // addq %rsp, %rax
281 // xchg %rax, (%rsp)
282 // movq (%rsp), %rsp
283 assert(Is64Bit && "can't have 32-bit 16GB stack frame");
284 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
286 .setMIFlag(Flag);
287 // Subtract is not commutative, so negate the offset and always use add.
288 // Subtract 8 less and add 8 more to account for the PUSH we just did.
289 if (isSub)
290 Offset = -(Offset - SlotSize);
291 else
294 .addImm(Offset)
295 .setMIFlag(Flag);
296 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax)
297 .addReg(Rax)
299 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
300 // Exchange the new SP in RAX with the top of the stack.
302 BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax),
303 StackPtr, false, 0);
304 // Load new SP from the top of the stack into RSP.
305 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr),
306 StackPtr, false, 0);
307 return;
308 }
309 }
310
311 while (Offset) {
312 uint64_t ThisVal = std::min(Offset, Chunk);
313 if (ThisVal == SlotSize) {
314 // Use push / pop for slot sized adjustments as a size optimization. We
315 // need to find a dead register when using pop.
316 unsigned Reg = isSub ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
318 if (Reg) {
319 unsigned Opc = isSub ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
320 : (Is64Bit ? X86::POP64r : X86::POP32r);
321 BuildMI(MBB, MBBI, DL, TII.get(Opc))
322 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub))
323 .setMIFlag(Flag);
324 Offset -= ThisVal;
325 continue;
326 }
327 }
328
329 BuildStackAdjustment(MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue)
330 .setMIFlag(Flag);
331
332 Offset -= ThisVal;
333 }
334}
335
336MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
338 const DebugLoc &DL, int64_t Offset, bool InEpilogue) const {
339 assert(Offset != 0 && "zero offset stack adjustment requested");
340
341 // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue
342 // is tricky.
343 bool UseLEA;
344 if (!InEpilogue) {
345 // Check if inserting the prologue at the beginning
346 // of MBB would require to use LEA operations.
347 // We need to use LEA operations if EFLAGS is live in, because
348 // it means an instruction will read it before it gets defined.
349 UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS);
350 } else {
351 // If we can use LEA for SP but we shouldn't, check that none
352 // of the terminators uses the eflags. Otherwise we will insert
353 // a ADD that will redefine the eflags and break the condition.
354 // Alternatively, we could move the ADD, but this may not be possible
355 // and is an optimization anyway.
357 if (UseLEA && !STI.useLeaForSP())
359 // If that assert breaks, that means we do not do the right thing
360 // in canUseAsEpilogue.
362 "We shouldn't have allowed this insertion point");
363 }
364
366 if (UseLEA) {
369 StackPtr),
370 StackPtr, false, Offset);
371 } else {
372 bool IsSub = Offset < 0;
373 uint64_t AbsOffset = IsSub ? -Offset : Offset;
374 const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr)
376 MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
378 .addImm(AbsOffset);
379 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
380 }
381 return MI;
382}
383
386 bool doMergeWithPrevious) const {
387 if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
388 (!doMergeWithPrevious && MBBI == MBB.end()))
389 return 0;
390
391 MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
392
394 // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI
395 // instruction, and that there are no DBG_VALUE or other instructions between
396 // ADD/SUB/LEA and its corresponding CFI instruction.
397 /* TODO: Add support for the case where there are multiple CFI instructions
398 below the ADD/SUB/LEA, e.g.:
399 ...
400 add
401 cfi_def_cfa_offset
402 cfi_offset
403 ...
404 */
405 if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
406 PI = std::prev(PI);
407
408 unsigned Opc = PI->getOpcode();
409 int Offset = 0;
410
411 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
412 PI->getOperand(0).getReg() == StackPtr) {
413 assert(PI->getOperand(1).getReg() == StackPtr);
414 Offset = PI->getOperand(2).getImm();
415 } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
416 PI->getOperand(0).getReg() == StackPtr &&
417 PI->getOperand(1).getReg() == StackPtr &&
418 PI->getOperand(2).getImm() == 1 &&
419 PI->getOperand(3).getReg() == X86::NoRegister &&
420 PI->getOperand(5).getReg() == X86::NoRegister) {
421 // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
422 Offset = PI->getOperand(4).getImm();
423 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
424 PI->getOperand(0).getReg() == StackPtr) {
425 assert(PI->getOperand(1).getReg() == StackPtr);
426 Offset = -PI->getOperand(2).getImm();
427 } else
428 return 0;
429
430 PI = MBB.erase(PI);
431 if (PI != MBB.end() && PI->isCFIInstruction()) {
432 auto CIs = MBB.getParent()->getFrameInstructions();
433 MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()];
436 PI = MBB.erase(PI);
437 }
438 if (!doMergeWithPrevious)
440
441 return Offset;
442}
443
446 const DebugLoc &DL,
447 const MCCFIInstruction &CFIInst,
448 MachineInstr::MIFlag Flag) const {
450 unsigned CFIIndex = MF.addFrameInst(CFIInst);
451
453 MF.getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true);
454
455 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
456 .addCFIIndex(CFIIndex)
457 .setMIFlag(Flag);
458}
459
460/// Emits Dwarf Info specifying offsets of callee saved registers and
461/// frame pointer. This is called only when basic block sections are enabled.
465 if (!hasFP(MF)) {
467 return;
468 }
471 const Register MachineFramePtr =
473 : FramePtr;
474 unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true);
475 // Offset = space for return address + size of the frame pointer itself.
476 unsigned Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4);
478 MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset));
480}
481
484 const DebugLoc &DL, bool IsPrologue) const {
486 MachineFrameInfo &MFI = MF.getFrameInfo();
489
490 // Add callee saved registers to move list.
491 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
492
493 // Calculate offsets.
494 for (const CalleeSavedInfo &I : CSI) {
495 int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
496 Register Reg = I.getReg();
497 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
498
499 if (IsPrologue) {
500 if (X86FI->getStackPtrSaveMI()) {
501 // +2*SlotSize because there is return address and ebp at the bottom
502 // of the stack.
503 // | retaddr |
504 // | ebp |
505 // | |<--ebp
506 Offset += 2 * SlotSize;
507 SmallString<64> CfaExpr;
508 CfaExpr.push_back(dwarf::DW_CFA_expression);
509 uint8_t buffer[16];
510 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
511 CfaExpr.push_back(2);
513 const Register MachineFramePtr =
516 : FramePtr;
517 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
518 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
519 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
521 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
523 } else {
525 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
526 }
527 } else {
529 MCCFIInstruction::createRestore(nullptr, DwarfReg));
530 }
531 }
532 if (auto *MI = X86FI->getStackPtrSaveMI()) {
533 int FI = MI->getOperand(1).getIndex();
534 int64_t Offset = MFI.getObjectOffset(FI) + 2 * SlotSize;
535 SmallString<64> CfaExpr;
537 const Register MachineFramePtr =
540 : FramePtr;
541 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
542 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
543 uint8_t buffer[16];
544 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
545 CfaExpr.push_back(dwarf::DW_OP_deref);
546
547 SmallString<64> DefCfaExpr;
548 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
549 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
550 DefCfaExpr.append(CfaExpr.str());
551 // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref
553 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
555 }
556}
557
558void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
559 MachineBasicBlock &MBB) const {
560 const MachineFunction &MF = *MBB.getParent();
561
562 // Insertion point.
564
565 // Fake a debug loc.
566 DebugLoc DL;
567 if (MBBI != MBB.end())
568 DL = MBBI->getDebugLoc();
569
570 // Zero out FP stack if referenced. Do this outside of the loop below so that
571 // it's done only once.
572 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
573 for (MCRegister Reg : RegsToZero.set_bits()) {
574 if (!X86::RFP80RegClass.contains(Reg))
575 continue;
576
577 unsigned NumFPRegs = ST.is64Bit() ? 8 : 7;
578 for (unsigned i = 0; i != NumFPRegs; ++i)
579 BuildMI(MBB, MBBI, DL, TII.get(X86::LD_F0));
580
581 for (unsigned i = 0; i != NumFPRegs; ++i)
582 BuildMI(MBB, MBBI, DL, TII.get(X86::ST_FPrr)).addReg(X86::ST0);
583 break;
584 }
585
586 // For GPRs, we only care to clear out the 32-bit register.
587 BitVector GPRsToZero(TRI->getNumRegs());
588 for (MCRegister Reg : RegsToZero.set_bits())
589 if (TRI->isGeneralPurposeRegister(MF, Reg)) {
590 GPRsToZero.set(getX86SubSuperRegister(Reg, 32));
591 RegsToZero.reset(Reg);
592 }
593
594 // Zero out the GPRs first.
595 for (MCRegister Reg : GPRsToZero.set_bits())
597
598 // Zero out the remaining registers.
599 for (MCRegister Reg : RegsToZero.set_bits())
601}
602
605 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
606 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
609 if (InProlog) {
610 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING))
611 .addImm(0 /* no explicit stack size */);
612 } else {
613 emitStackProbeInline(MF, MBB, MBBI, DL, false);
614 }
615 } else {
616 emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum);
617 }
618}
619
621 return STI.isOSWindows() && !STI.isTargetWin64();
622}
623
625 MachineBasicBlock &PrologMBB) const {
626 auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
627 return MI.getOpcode() == X86::STACKALLOC_W_PROBING;
628 });
629 if (Where != PrologMBB.end()) {
630 DebugLoc DL = PrologMBB.findDebugLoc(Where);
631 emitStackProbeInline(MF, PrologMBB, Where, DL, true);
632 Where->eraseFromParent();
633 }
634}
635
636void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
639 const DebugLoc &DL,
640 bool InProlog) const {
642 if (STI.isTargetWindowsCoreCLR() && STI.is64Bit())
643 emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog);
644 else
645 emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog);
646}
647
648void X86FrameLowering::emitStackProbeInlineGeneric(
650 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
651 MachineInstr &AllocWithProbe = *MBBI;
652 uint64_t Offset = AllocWithProbe.getOperand(0).getImm();
653
656 assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&
657 "different expansion expected for CoreCLR 64 bit");
658
659 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
660 uint64_t ProbeChunk = StackProbeSize * 8;
661
662 uint64_t MaxAlign =
663 TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
664
665 // Synthesize a loop or unroll it, depending on the number of iterations.
666 // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
667 // between the unaligned rsp and current rsp.
668 if (Offset > ProbeChunk) {
669 emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
670 MaxAlign % StackProbeSize);
671 } else {
672 emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
673 MaxAlign % StackProbeSize);
674 }
675}
676
677void X86FrameLowering::emitStackProbeInlineGenericBlock(
680 uint64_t AlignOffset) const {
681
682 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
683 const bool HasFP = hasFP(MF);
686 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
687 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
688
689 uint64_t CurrentOffset = 0;
690
691 assert(AlignOffset < StackProbeSize);
692
693 // If the offset is so small it fits within a page, there's nothing to do.
694 if (StackProbeSize < Offset + AlignOffset) {
695
696 uint64_t StackAdjustment = StackProbeSize - AlignOffset;
697 BuildStackAdjustment(MBB, MBBI, DL, -StackAdjustment, /*InEpilogue=*/false)
699 if (!HasFP && NeedsDwarfCFI) {
700 BuildCFI(
701 MBB, MBBI, DL,
702 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
703 }
704
705 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
707 StackPtr, false, 0)
708 .addImm(0)
710 NumFrameExtraProbe++;
711 CurrentOffset = StackProbeSize - AlignOffset;
712 }
713
714 // For the next N - 1 pages, just probe. I tried to take advantage of
715 // natural probes but it implies much more logic and there was very few
716 // interesting natural probes to interleave.
717 while (CurrentOffset + StackProbeSize < Offset) {
718 BuildStackAdjustment(MBB, MBBI, DL, -StackProbeSize, /*InEpilogue=*/false)
720
721 if (!HasFP && NeedsDwarfCFI) {
722 BuildCFI(
723 MBB, MBBI, DL,
724 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize));
725 }
726 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
728 StackPtr, false, 0)
729 .addImm(0)
731 NumFrameExtraProbe++;
732 CurrentOffset += StackProbeSize;
733 }
734
735 // No need to probe the tail, it is smaller than a Page.
736 uint64_t ChunkSize = Offset - CurrentOffset;
737 if (ChunkSize == SlotSize) {
738 // Use push for slot sized adjustments as a size optimization,
739 // like emitSPUpdate does when not probing.
740 unsigned Reg = Is64Bit ? X86::RAX : X86::EAX;
741 unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
742 BuildMI(MBB, MBBI, DL, TII.get(Opc))
745 } else {
746 BuildStackAdjustment(MBB, MBBI, DL, -ChunkSize, /*InEpilogue=*/false)
748 }
749 // No need to adjust Dwarf CFA offset here, the last position of the stack has
750 // been defined
751}
752
753void X86FrameLowering::emitStackProbeInlineGenericLoop(
756 uint64_t AlignOffset) const {
757 assert(Offset && "null offset");
758
759 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
761 "Inline stack probe loop will clobber live EFLAGS.");
762
763 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
764 const bool HasFP = hasFP(MF);
767 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
768 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
769
770 if (AlignOffset) {
771 if (AlignOffset < StackProbeSize) {
772 // Perform a first smaller allocation followed by a probe.
773 BuildStackAdjustment(MBB, MBBI, DL, -AlignOffset, /*InEpilogue=*/false)
775
776 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
778 StackPtr, false, 0)
779 .addImm(0)
781 NumFrameExtraProbe++;
782 Offset -= AlignOffset;
783 }
784 }
785
786 // Synthesize a loop
787 NumFrameLoopProbe++;
788 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
789
790 MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB);
791 MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB);
792
794 MF.insert(MBBIter, testMBB);
795 MF.insert(MBBIter, tailMBB);
796
797 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
798 : Is64Bit ? X86::R11D
799 : X86::EAX;
800
801 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
804
805 // save loop bound
806 {
807 const unsigned BoundOffset = alignDown(Offset, StackProbeSize);
808 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
809 BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
810 .addReg(FinalStackProbed)
811 .addImm(BoundOffset)
813
814 // while in the loop, use loop-invariant reg for CFI,
815 // instead of the stack pointer, which changes during the loop
816 if (!HasFP && NeedsDwarfCFI) {
817 // x32 uses the same DWARF register numbers as x86-64,
818 // so there isn't a register number for r11d, we must use r11 instead
819 const Register DwarfFinalStackProbed =
821 ? Register(getX86SubSuperRegister(FinalStackProbed, 64))
822 : FinalStackProbed;
823
826 nullptr, TRI->getDwarfRegNum(DwarfFinalStackProbed, true)));
828 MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset));
829 }
830 }
831
832 // allocate a page
833 BuildStackAdjustment(*testMBB, testMBB->end(), DL, -StackProbeSize,
834 /*InEpilogue=*/false)
836
837 // touch the page
838 addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
840 StackPtr, false, 0)
841 .addImm(0)
843
844 // cmp with stack pointer bound
845 BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
847 .addReg(FinalStackProbed)
849
850 // jump
851 BuildMI(testMBB, DL, TII.get(X86::JCC_1))
852 .addMBB(testMBB)
855 testMBB->addSuccessor(testMBB);
856 testMBB->addSuccessor(tailMBB);
857
858 // BB management
859 tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end());
861 MBB.addSuccessor(testMBB);
862
863 // handle tail
864 const uint64_t TailOffset = Offset % StackProbeSize;
865 MachineBasicBlock::iterator TailMBBIter = tailMBB->begin();
866 if (TailOffset) {
867 BuildStackAdjustment(*tailMBB, TailMBBIter, DL, -TailOffset,
868 /*InEpilogue=*/false)
870 }
871
872 // after the loop, switch back to stack pointer for CFI
873 if (!HasFP && NeedsDwarfCFI) {
874 // x32 uses the same DWARF register numbers as x86-64,
875 // so there isn't a register number for esp, we must use rsp instead
876 const Register DwarfStackPtr =
880
881 BuildCFI(*tailMBB, TailMBBIter, DL,
883 nullptr, TRI->getDwarfRegNum(DwarfStackPtr, true)));
884 }
885
886 // Update Live In information
887 fullyRecomputeLiveIns({tailMBB, testMBB});
888}
889
890void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
892 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
894 assert(STI.is64Bit() && "different expansion needed for 32 bit");
895 assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
897 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
898
899 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
901 "Inline stack probe loop will clobber live EFLAGS.");
902
903 // RAX contains the number of bytes of desired stack adjustment.
904 // The handling here assumes this value has already been updated so as to
905 // maintain stack alignment.
906 //
907 // We need to exit with RSP modified by this amount and execute suitable
908 // page touches to notify the OS that we're growing the stack responsibly.
909 // All stack probing must be done without modifying RSP.
910 //
911 // MBB:
912 // SizeReg = RAX;
913 // ZeroReg = 0
914 // CopyReg = RSP
915 // Flags, TestReg = CopyReg - SizeReg
916 // FinalReg = !Flags.Ovf ? TestReg : ZeroReg
917 // LimitReg = gs magic thread env access
918 // if FinalReg >= LimitReg goto ContinueMBB
919 // RoundBB:
920 // RoundReg = page address of FinalReg
921 // LoopMBB:
922 // LoopReg = PHI(LimitReg,ProbeReg)
923 // ProbeReg = LoopReg - PageSize
924 // [ProbeReg] = 0
925 // if (ProbeReg > RoundReg) goto LoopMBB
926 // ContinueMBB:
927 // RSP = RSP - RAX
928 // [rest of original MBB]
929
930 // Set up the new basic blocks
931 MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB);
932 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
933 MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB);
934
935 MachineFunction::iterator MBBIter = std::next(MBB.getIterator());
936 MF.insert(MBBIter, RoundMBB);
937 MF.insert(MBBIter, LoopMBB);
938 MF.insert(MBBIter, ContinueMBB);
939
940 // Split MBB and move the tail portion down to ContinueMBB.
941 MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI);
942 ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end());
944
945 // Some useful constants
946 const int64_t ThreadEnvironmentStackLimit = 0x10;
947 const int64_t PageSize = 0x1000;
948 const int64_t PageMask = ~(PageSize - 1);
949
950 // Registers we need. For the normal case we use virtual
951 // registers. For the prolog expansion we use RAX, RCX and RDX.
953 const TargetRegisterClass *RegClass = &X86::GR64RegClass;
954 const Register
955 SizeReg = InProlog ? X86::RAX : MRI.createVirtualRegister(RegClass),
956 ZeroReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
957 CopyReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
958 TestReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
959 FinalReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
960 RoundedReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
961 LimitReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
962 JoinReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
963 ProbeReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass);
964
965 // SP-relative offsets where we can save RCX and RDX.
966 int64_t RCXShadowSlot = 0;
967 int64_t RDXShadowSlot = 0;
968
969 // If inlining in the prolog, save RCX and RDX.
970 if (InProlog) {
971 // Compute the offsets. We need to account for things already
972 // pushed onto the stack at this point: return address, frame
973 // pointer (if used), and callee saves.
975 const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
976 const bool HasFP = hasFP(MF);
977
978 // Check if we need to spill RCX and/or RDX.
979 // Here we assume that no earlier prologue instruction changes RCX and/or
980 // RDX, so checking the block live-ins is enough.
981 const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX);
982 const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX);
983 int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
984 // Assign the initial slot to both registers, then change RDX's slot if both
985 // need to be spilled.
986 if (IsRCXLiveIn)
987 RCXShadowSlot = InitSlot;
988 if (IsRDXLiveIn)
989 RDXShadowSlot = InitSlot;
990 if (IsRDXLiveIn && IsRCXLiveIn)
991 RDXShadowSlot += 8;
992 // Emit the saves if needed.
993 if (IsRCXLiveIn)
994 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
995 RCXShadowSlot)
996 .addReg(X86::RCX);
997 if (IsRDXLiveIn)
998 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
999 RDXShadowSlot)
1000 .addReg(X86::RDX);
1001 } else {
1002 // Not in the prolog. Copy RAX to a virtual reg.
1003 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
1004 }
1005
1006 // Add code to MBB to check for overflow and set the new target stack pointer
1007 // to zero if so.
1008 BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg)
1009 .addReg(ZeroReg, RegState::Undef)
1010 .addReg(ZeroReg, RegState::Undef);
1011 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP);
1012 BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
1013 .addReg(CopyReg)
1014 .addReg(SizeReg);
1015 BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg)
1016 .addReg(TestReg)
1017 .addReg(ZeroReg)
1019
1020 // FinalReg now holds final stack pointer value, or zero if
1021 // allocation would overflow. Compare against the current stack
1022 // limit from the thread environment block. Note this limit is the
1023 // lowest touched page on the stack, not the point at which the OS
1024 // will cause an overflow exception, so this is just an optimization
1025 // to avoid unnecessarily touching pages that are below the current
1026 // SP but already committed to the stack by the OS.
1027 BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg)
1028 .addReg(0)
1029 .addImm(1)
1030 .addReg(0)
1031 .addImm(ThreadEnvironmentStackLimit)
1032 .addReg(X86::GS);
1033 BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
1034 // Jump if the desired stack pointer is at or above the stack limit.
1035 BuildMI(&MBB, DL, TII.get(X86::JCC_1))
1036 .addMBB(ContinueMBB)
1038
1039 // Add code to roundMBB to round the final stack pointer to a page boundary.
1040 RoundMBB->addLiveIn(FinalReg);
1041 BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
1042 .addReg(FinalReg)
1043 .addImm(PageMask);
1044 BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB);
1045
1046 // LimitReg now holds the current stack limit, RoundedReg page-rounded
1047 // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
1048 // and probe until we reach RoundedReg.
1049 if (!InProlog) {
1050 BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg)
1051 .addReg(LimitReg)
1052 .addMBB(RoundMBB)
1053 .addReg(ProbeReg)
1054 .addMBB(LoopMBB);
1055 }
1056
1057 LoopMBB->addLiveIn(JoinReg);
1058 addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
1059 false, -PageSize);
1060
1061 // Probe by storing a byte onto the stack.
1062 BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi))
1063 .addReg(ProbeReg)
1064 .addImm(1)
1065 .addReg(0)
1066 .addImm(0)
1067 .addReg(0)
1068 .addImm(0);
1069
1070 LoopMBB->addLiveIn(RoundedReg);
1071 BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
1072 .addReg(RoundedReg)
1073 .addReg(ProbeReg);
1074 BuildMI(LoopMBB, DL, TII.get(X86::JCC_1))
1075 .addMBB(LoopMBB)
1077
1078 MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
1079
1080 // If in prolog, restore RDX and RCX.
1081 if (InProlog) {
1082 if (RCXShadowSlot) // It means we spilled RCX in the prologue.
1083 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1084 TII.get(X86::MOV64rm), X86::RCX),
1085 X86::RSP, false, RCXShadowSlot);
1086 if (RDXShadowSlot) // It means we spilled RDX in the prologue.
1087 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1088 TII.get(X86::MOV64rm), X86::RDX),
1089 X86::RSP, false, RDXShadowSlot);
1090 }
1091
1092 // Now that the probing is done, add code to continueMBB to update
1093 // the stack pointer for real.
1094 ContinueMBB->addLiveIn(SizeReg);
1095 BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
1096 .addReg(X86::RSP)
1097 .addReg(SizeReg);
1098
1099 // Add the control flow edges we need.
1100 MBB.addSuccessor(ContinueMBB);
1101 MBB.addSuccessor(RoundMBB);
1102 RoundMBB->addSuccessor(LoopMBB);
1103 LoopMBB->addSuccessor(ContinueMBB);
1104 LoopMBB->addSuccessor(LoopMBB);
1105
1106 // Mark all the instructions added to the prolog as frame setup.
1107 if (InProlog) {
1108 for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) {
1109 BeforeMBBI->setFlag(MachineInstr::FrameSetup);
1110 }
1111 for (MachineInstr &MI : *RoundMBB) {
1113 }
1114 for (MachineInstr &MI : *LoopMBB) {
1116 }
1117 for (MachineInstr &MI :
1118 llvm::make_range(ContinueMBB->begin(), ContinueMBBI)) {
1120 }
1121 }
1122}
1123
1124void X86FrameLowering::emitStackProbeCall(
1126 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
1127 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
1128 bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
1129
1130 // FIXME: Add indirect thunk support and remove this.
1131 if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
1132 report_fatal_error("Emitting stack probe calls on 64-bit with the large "
1133 "code model and indirect thunks not yet implemented.");
1134
1135 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
1137 "Stack probe calls will clobber live EFLAGS.");
1138
1139 unsigned CallOp;
1140 if (Is64Bit)
1141 CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
1142 else
1143 CallOp = X86::CALLpcrel32;
1144
1146
1148 MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI);
1149
1150 // All current stack probes take AX and SP as input, clobber flags, and
1151 // preserve all registers. x86_64 probes leave RSP unmodified.
1153 // For the large code model, we have to call through a register. Use R11,
1154 // as it is scratch in all supported calling conventions.
1155 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
1157 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
1158 } else {
1159 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp))
1161 }
1162
1163 unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX;
1164 unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP;
1170
1171 MachineInstr *ModInst = CI;
1172 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1173 // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
1174 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
1175 // themselves. They also does not clobber %rax so we can reuse it when
1176 // adjusting %rsp.
1177 // All other platforms do not specify a particular ABI for the stack probe
1178 // function, so we arbitrarily define it to not adjust %esp/%rsp itself.
1179 ModInst =
1181 .addReg(SP)
1182 .addReg(AX);
1183 }
1184
1185 // DebugInfo variable locations -- if there's an instruction number for the
1186 // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that
1187 // modifies SP.
1188 if (InstrNum) {
1189 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1190 // Label destination operand of the subtract.
1191 MF.makeDebugValueSubstitution(*InstrNum,
1192 {ModInst->getDebugInstrNum(), 0});
1193 } else {
1194 // Label the call. The operand number is the penultimate operand, zero
1195 // based.
1196 unsigned SPDefOperand = ModInst->getNumOperands() - 2;
1198 *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand});
1199 }
1200 }
1201
1202 if (InProlog) {
1203 // Apply the frame setup flag to all inserted instrs.
1204 for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
1205 ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
1206 }
1207}
1208
1209static unsigned calculateSetFPREG(uint64_t SPAdjust) {
1210 // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
1211 // and might require smaller successive adjustments.
1212 const uint64_t Win64MaxSEHOffset = 128;
1213 uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
1214 // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
1215 return SEHFrameOffset & -16;
1216}
1217
1218// If we're forcing a stack realignment we can't rely on just the frame
1219// info, we need to know the ABI stack alignment as well in case we
1220// have a call out. Otherwise just make sure we have some alignment - we'll
1221// go with the minimum SlotSize.
1223X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
1224 const MachineFrameInfo &MFI = MF.getFrameInfo();
1225 Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment.
1226 Align StackAlign = getStackAlign();
1227 bool HasRealign = MF.getFunction().hasFnAttribute("stackrealign");
1228 if (HasRealign) {
1229 if (MFI.hasCalls())
1230 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
1231 else if (MaxAlign < SlotSize)
1232 MaxAlign = Align(SlotSize);
1233 }
1234
1236 if (HasRealign)
1237 MaxAlign = (MaxAlign > 16) ? MaxAlign : Align(16);
1238 else
1239 MaxAlign = Align(16);
1240 }
1241 return MaxAlign.value();
1242}
1243
1244void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
1246 const DebugLoc &DL, unsigned Reg,
1247 uint64_t MaxAlign) const {
1248 uint64_t Val = -MaxAlign;
1249 unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
1250
1251 MachineFunction &MF = *MBB.getParent();
1253 const X86TargetLowering &TLI = *STI.getTargetLowering();
1254 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
1255 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
1256
1257 // We want to make sure that (in worst case) less than StackProbeSize bytes
1258 // are not probed after the AND. This assumption is used in
1259 // emitStackProbeInlineGeneric.
1260 if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
1261 {
1262 NumFrameLoopProbe++;
1263 MachineBasicBlock *entryMBB =
1265 MachineBasicBlock *headMBB =
1267 MachineBasicBlock *bodyMBB =
1269 MachineBasicBlock *footMBB =
1271
1273 MF.insert(MBBIter, entryMBB);
1274 MF.insert(MBBIter, headMBB);
1275 MF.insert(MBBIter, bodyMBB);
1276 MF.insert(MBBIter, footMBB);
1277 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
1278 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
1279 : Is64Bit ? X86::R11D
1280 : X86::EAX;
1281
1282 // Setup entry block
1283 {
1284
1285 entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI);
1286 BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
1289 MachineInstr *MI =
1290 BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed)
1291 .addReg(FinalStackProbed)
1292 .addImm(Val)
1294
1295 // The EFLAGS implicit def is dead.
1296 MI->getOperand(3).setIsDead();
1297
1298 BuildMI(entryMBB, DL,
1299 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1300 .addReg(FinalStackProbed)
1303 BuildMI(entryMBB, DL, TII.get(X86::JCC_1))
1304 .addMBB(&MBB)
1307 entryMBB->addSuccessor(headMBB);
1308 entryMBB->addSuccessor(&MBB);
1309 }
1310
1311 // Loop entry block
1312
1313 {
1314 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1315 BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
1317 .addImm(StackProbeSize)
1319
1320 BuildMI(headMBB, DL,
1321 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1323 .addReg(FinalStackProbed)
1325
1326 // jump to the footer if StackPtr < FinalStackProbed
1327 BuildMI(headMBB, DL, TII.get(X86::JCC_1))
1328 .addMBB(footMBB)
1331
1332 headMBB->addSuccessor(bodyMBB);
1333 headMBB->addSuccessor(footMBB);
1334 }
1335
1336 // setup loop body
1337 {
1338 addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc))
1340 StackPtr, false, 0)
1341 .addImm(0)
1343
1344 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1345 BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
1347 .addImm(StackProbeSize)
1349
1350 // cmp with stack pointer bound
1351 BuildMI(bodyMBB, DL,
1352 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1353 .addReg(FinalStackProbed)
1356
1357 // jump back while FinalStackProbed < StackPtr
1358 BuildMI(bodyMBB, DL, TII.get(X86::JCC_1))
1359 .addMBB(bodyMBB)
1362 bodyMBB->addSuccessor(bodyMBB);
1363 bodyMBB->addSuccessor(footMBB);
1364 }
1365
1366 // setup loop footer
1367 {
1368 BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr)
1369 .addReg(FinalStackProbed)
1371 addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc))
1373 StackPtr, false, 0)
1374 .addImm(0)
1376 footMBB->addSuccessor(&MBB);
1377 }
1378
1379 fullyRecomputeLiveIns({footMBB, bodyMBB, headMBB, &MBB});
1380 }
1381 } else {
1382 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
1383 .addReg(Reg)
1384 .addImm(Val)
1386
1387 // The EFLAGS implicit def is dead.
1388 MI->getOperand(3).setIsDead();
1389 }
1390}
1391
1393 // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
1394 // clobbered by any interrupt handler.
1395 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1396 "MF used frame lowering for wrong subtarget");
1397 const Function &Fn = MF.getFunction();
1398 const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
1399 return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone);
1400}
1401
1402/// Return true if we need to use the restricted Windows x64 prologue and
1403/// epilogue code patterns that can be described with WinCFI (.seh_*
1404/// directives).
1405bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const {
1406 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
1407}
1408
1409bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
1410 return !isWin64Prologue(MF) && MF.needsFrameMoves();
1411}
1412
1413/// Return true if an opcode is part of the REP group of instructions
1414static bool isOpcodeRep(unsigned Opcode) {
1415 switch (Opcode) {
1416 case X86::REPNE_PREFIX:
1417 case X86::REP_MOVSB_32:
1418 case X86::REP_MOVSB_64:
1419 case X86::REP_MOVSD_32:
1420 case X86::REP_MOVSD_64:
1421 case X86::REP_MOVSQ_32:
1422 case X86::REP_MOVSQ_64:
1423 case X86::REP_MOVSW_32:
1424 case X86::REP_MOVSW_64:
1425 case X86::REP_PREFIX:
1426 case X86::REP_STOSB_32:
1427 case X86::REP_STOSB_64:
1428 case X86::REP_STOSD_32:
1429 case X86::REP_STOSD_64:
1430 case X86::REP_STOSQ_32:
1431 case X86::REP_STOSQ_64:
1432 case X86::REP_STOSW_32:
1433 case X86::REP_STOSW_64:
1434 return true;
1435 default:
1436 break;
1437 }
1438 return false;
1439}
1440
1441/// emitPrologue - Push callee-saved registers onto the stack, which
1442/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
1443/// space for local variables. Also emit labels used by the exception handler to
1444/// generate the exception handling frames.
1445
1446/*
1447 Here's a gist of what gets emitted:
1448
1449 ; Establish frame pointer, if needed
1450 [if needs FP]
1451 push %rbp
1452 .cfi_def_cfa_offset 16
1453 .cfi_offset %rbp, -16
1454 .seh_pushreg %rpb
1455 mov %rsp, %rbp
1456 .cfi_def_cfa_register %rbp
1457
1458 ; Spill general-purpose registers
1459 [for all callee-saved GPRs]
1460 pushq %<reg>
1461 [if not needs FP]
1462 .cfi_def_cfa_offset (offset from RETADDR)
1463 .seh_pushreg %<reg>
1464
1465 ; If the required stack alignment > default stack alignment
1466 ; rsp needs to be re-aligned. This creates a "re-alignment gap"
1467 ; of unknown size in the stack frame.
1468 [if stack needs re-alignment]
1469 and $MASK, %rsp
1470
1471 ; Allocate space for locals
1472 [if target is Windows and allocated space > 4096 bytes]
1473 ; Windows needs special care for allocations larger
1474 ; than one page.
1475 mov $NNN, %rax
1476 call ___chkstk_ms/___chkstk
1477 sub %rax, %rsp
1478 [else]
1479 sub $NNN, %rsp
1480
1481 [if needs FP]
1482 .seh_stackalloc (size of XMM spill slots)
1483 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
1484 [else]
1485 .seh_stackalloc NNN
1486
1487 ; Spill XMMs
1488 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
1489 ; they may get spilled on any platform, if the current function
1490 ; calls @llvm.eh.unwind.init
1491 [if needs FP]
1492 [for all callee-saved XMM registers]
1493 movaps %<xmm reg>, -MMM(%rbp)
1494 [for all callee-saved XMM registers]
1495 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
1496 ; i.e. the offset relative to (%rbp - SEHFrameOffset)
1497 [else]
1498 [for all callee-saved XMM registers]
1499 movaps %<xmm reg>, KKK(%rsp)
1500 [for all callee-saved XMM registers]
1501 .seh_savexmm %<xmm reg>, KKK
1502
1503 .seh_endprologue
1504
1505 [if needs base pointer]
1506 mov %rsp, %rbx
1507 [if needs to restore base pointer]
1508 mov %rsp, -MMM(%rbp)
1509
1510 ; Emit CFI info
1511 [if needs FP]
1512 [for all callee-saved registers]
1513 .cfi_offset %<reg>, (offset from %rbp)
1514 [else]
1515 .cfi_def_cfa_offset (offset from RETADDR)
1516 [for all callee-saved registers]
1517 .cfi_offset %<reg>, (offset from %rsp)
1518
1519 Notes:
1520 - .seh directives are emitted only for Windows 64 ABI
1521 - .cv_fpo directives are emitted on win32 when emitting CodeView
1522 - .cfi directives are emitted for all other ABIs
1523 - for 32-bit code, substitute %e?? registers for %r??
1524*/
1525
1527 MachineBasicBlock &MBB) const {
1528 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1529 "MF used frame lowering for wrong subtarget");
1531 MachineFrameInfo &MFI = MF.getFrameInfo();
1532 const Function &Fn = MF.getFunction();
1533 MachineModuleInfo &MMI = MF.getMMI();
1535 uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
1536 uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate.
1537 bool IsFunclet = MBB.isEHFuncletEntry();
1539 if (Fn.hasPersonalityFn())
1540 Personality = classifyEHPersonality(Fn.getPersonalityFn());
1541 bool FnHasClrFunclet =
1542 MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
1543 bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
1544 bool HasFP = hasFP(MF);
1545 bool IsWin64Prologue = isWin64Prologue(MF);
1546 bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry();
1547 // FIXME: Emit FPO data for EH funclets.
1548 bool NeedsWinFPO =
1549 !IsFunclet && STI.isTargetWin32() && MMI.getModule()->getCodeViewFlag();
1550 bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO;
1551 bool NeedsDwarfCFI = needsDwarfCFI(MF);
1553 const Register MachineFramePtr =
1555 : FramePtr;
1556 Register BasePtr = TRI->getBaseRegister();
1557 bool HasWinCFI = false;
1558
1559 // Debug location must be unknown since the first debug location is used
1560 // to determine the end of the prologue.
1561 DebugLoc DL;
1562 Register ArgBaseReg;
1563
1564 // Emit extra prolog for argument stack slot reference.
1565 if (auto *MI = X86FI->getStackPtrSaveMI()) {
1566 // MI is lea instruction that created in X86ArgumentStackSlotPass.
1567 // Creat extra prolog for stack realignment.
1568 ArgBaseReg = MI->getOperand(0).getReg();
1569 // leal 4(%esp), %basereg
1570 // .cfi_def_cfa %basereg, 0
1571 // andl $-128, %esp
1572 // pushl -4(%basereg)
1573 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::LEA64r : X86::LEA32r),
1574 ArgBaseReg)
1576 .addImm(1)
1577 .addUse(X86::NoRegister)
1579 .addUse(X86::NoRegister)
1581 if (NeedsDwarfCFI) {
1582 // .cfi_def_cfa %basereg, 0
1583 unsigned DwarfStackPtr = TRI->getDwarfRegNum(ArgBaseReg, true);
1584 BuildCFI(MBB, MBBI, DL,
1585 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, 0),
1587 }
1588 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1589 int64_t Offset = -(int64_t)SlotSize;
1590 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm))
1591 .addReg(ArgBaseReg)
1592 .addImm(1)
1593 .addReg(X86::NoRegister)
1594 .addImm(Offset)
1595 .addReg(X86::NoRegister)
1597 }
1598
1599 // Space reserved for stack-based arguments when making a (ABI-guaranteed)
1600 // tail call.
1601 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
1602 if (TailCallArgReserveSize && IsWin64Prologue)
1603 report_fatal_error("Can't handle guaranteed tail call under win64 yet");
1604
1605 const bool EmitStackProbeCall =
1607 unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
1608
1609 if (HasFP && X86FI->hasSwiftAsyncContext()) {
1613 // The special symbol below is absolute and has a *value* suitable to be
1614 // combined with the frame pointer directly.
1615 BuildMI(MBB, MBBI, DL, TII.get(X86::OR64rm), MachineFramePtr)
1616 .addUse(MachineFramePtr)
1617 .addUse(X86::RIP)
1618 .addImm(1)
1619 .addUse(X86::NoRegister)
1620 .addExternalSymbol("swift_async_extendedFramePointerFlags",
1622 .addUse(X86::NoRegister);
1623 break;
1624 }
1625 [[fallthrough]];
1626
1628 assert(
1629 !IsWin64Prologue &&
1630 "win64 prologue does not set the bit 60 in the saved frame pointer");
1631 BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr)
1632 .addUse(MachineFramePtr)
1633 .addImm(60)
1635 break;
1636
1638 break;
1639 }
1640 }
1641
1642 // Re-align the stack on 64-bit if the x86-interrupt calling convention is
1643 // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
1644 // stack alignment.
1646 Fn.arg_size() == 2) {
1647 StackSize += 8;
1648 MFI.setStackSize(StackSize);
1649
1650 // Update the stack pointer by pushing a register. This is the instruction
1651 // emitted that would be end up being emitted by a call to `emitSPUpdate`.
1652 // Hard-coding the update to a push avoids emitting a second
1653 // `STACKALLOC_W_PROBING` instruction in the save block: We know that stack
1654 // probing isn't needed anyways for an 8-byte update.
1655 // Pushing a register leaves us in a similar situation to a regular
1656 // function call where we know that the address at (rsp-8) is writeable.
1657 // That way we avoid any off-by-ones with stack probing for additional
1658 // stack pointer updates later on.
1659 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1660 .addReg(X86::RAX, RegState::Undef)
1662 }
1663
1664 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
1665 // function, and use up to 128 bytes of stack space, don't have a frame
1666 // pointer, calls, or dynamic alloca then we do not need to adjust the
1667 // stack pointer (we fit in the Red Zone). We also check that we don't
1668 // push and pop from the stack.
1669 if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) &&
1670 !MFI.hasVarSizedObjects() && // No dynamic alloca.
1671 !MFI.adjustsStack() && // No calls.
1672 !EmitStackProbeCall && // No stack probes.
1673 !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
1674 !MF.shouldSplitStack()) { // Regular stack
1675 uint64_t MinSize =
1677 if (HasFP)
1678 MinSize += SlotSize;
1679 X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
1680 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
1681 MFI.setStackSize(StackSize);
1682 }
1683
1684 // Insert stack pointer adjustment for later moving of return addr. Only
1685 // applies to tail call optimized functions where the callee argument stack
1686 // size is bigger than the callers.
1687 if (TailCallArgReserveSize != 0) {
1688 BuildStackAdjustment(MBB, MBBI, DL, -(int)TailCallArgReserveSize,
1689 /*InEpilogue=*/false)
1691 }
1692
1693 // Mapping for machine moves:
1694 //
1695 // DST: VirtualFP AND
1696 // SRC: VirtualFP => DW_CFA_def_cfa_offset
1697 // ELSE => DW_CFA_def_cfa
1698 //
1699 // SRC: VirtualFP AND
1700 // DST: Register => DW_CFA_def_cfa_register
1701 //
1702 // ELSE
1703 // OFFSET < 0 => DW_CFA_offset_extended_sf
1704 // REG < 64 => DW_CFA_offset + Reg
1705 // ELSE => DW_CFA_offset_extended
1706
1707 uint64_t NumBytes = 0;
1708 int stackGrowth = -SlotSize;
1709
1710 // Find the funclet establisher parameter
1711 Register Establisher = X86::NoRegister;
1712 if (IsClrFunclet)
1713 Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX;
1714 else if (IsFunclet)
1715 Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX;
1716
1717 if (IsWin64Prologue && IsFunclet && !IsClrFunclet) {
1718 // Immediately spill establisher into the home slot.
1719 // The runtime cares about this.
1720 // MOV64mr %rdx, 16(%rsp)
1721 unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
1722 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16)
1723 .addReg(Establisher)
1725 MBB.addLiveIn(Establisher);
1726 }
1727
1728 if (HasFP) {
1729 assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved");
1730
1731 // Calculate required stack adjustment.
1732 uint64_t FrameSize = StackSize - SlotSize;
1733 NumBytes =
1734 FrameSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1735
1736 // Callee-saved registers are pushed on stack before the stack is realigned.
1737 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
1738 NumBytes = alignTo(NumBytes, MaxAlign);
1739
1740 // Save EBP/RBP into the appropriate stack slot.
1741 BuildMI(MBB, MBBI, DL,
1743 .addReg(MachineFramePtr, RegState::Kill)
1745
1746 if (NeedsDwarfCFI && !ArgBaseReg.isValid()) {
1747 // Mark the place where EBP/RBP was saved.
1748 // Define the current CFA rule to use the provided offset.
1749 assert(StackSize);
1750 BuildCFI(MBB, MBBI, DL,
1752 nullptr, -2 * stackGrowth + (int)TailCallArgReserveSize),
1754
1755 // Change the rule for the FramePtr to be an "offset" rule.
1756 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1757 BuildCFI(MBB, MBBI, DL,
1758 MCCFIInstruction::createOffset(nullptr, DwarfFramePtr,
1759 2 * stackGrowth -
1760 (int)TailCallArgReserveSize),
1762 }
1763
1764 if (NeedsWinCFI) {
1765 HasWinCFI = true;
1766 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1769 }
1770
1771 if (!IsFunclet) {
1772 if (X86FI->hasSwiftAsyncContext()) {
1773 assert(!IsWin64Prologue &&
1774 "win64 prologue does not store async context right below rbp");
1775 const auto &Attrs = MF.getFunction().getAttributes();
1776
1777 // Before we update the live frame pointer we have to ensure there's a
1778 // valid (or null) asynchronous context in its slot just before FP in
1779 // the frame record, so store it now.
1780 if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) {
1781 // We have an initial context in r14, store it just before the frame
1782 // pointer.
1783 MBB.addLiveIn(X86::R14);
1784 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1785 .addReg(X86::R14)
1787 } else {
1788 // No initial context, store null so that there's no pointer that
1789 // could be misused.
1790 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i32))
1791 .addImm(0)
1793 }
1794
1795 if (NeedsWinCFI) {
1796 HasWinCFI = true;
1797 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1798 .addImm(X86::R14)
1800 }
1801
1802 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr)
1803 .addUse(X86::RSP)
1804 .addImm(1)
1805 .addUse(X86::NoRegister)
1806 .addImm(8)
1807 .addUse(X86::NoRegister)
1809 BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri32), X86::RSP)
1810 .addUse(X86::RSP)
1811 .addImm(8)
1813 }
1814
1815 if (!IsWin64Prologue && !IsFunclet) {
1816 // Update EBP with the new base value.
1817 if (!X86FI->hasSwiftAsyncContext())
1818 BuildMI(MBB, MBBI, DL,
1819 TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
1820 FramePtr)
1823
1824 if (NeedsDwarfCFI) {
1825 if (ArgBaseReg.isValid()) {
1826 SmallString<64> CfaExpr;
1827 CfaExpr.push_back(dwarf::DW_CFA_expression);
1828 uint8_t buffer[16];
1829 unsigned DwarfReg = TRI->getDwarfRegNum(MachineFramePtr, true);
1830 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
1831 CfaExpr.push_back(2);
1832 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
1833 CfaExpr.push_back(0);
1834 // DW_CFA_expression: reg5 DW_OP_breg5 +0
1835 BuildCFI(MBB, MBBI, DL,
1836 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
1838 } else {
1839 // Mark effective beginning of when frame pointer becomes valid.
1840 // Define the current CFA to use the EBP/RBP register.
1841 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1842 BuildCFI(
1843 MBB, MBBI, DL,
1844 MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr),
1846 }
1847 }
1848
1849 if (NeedsWinFPO) {
1850 // .cv_fpo_setframe $FramePtr
1851 HasWinCFI = true;
1852 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
1854 .addImm(0)
1856 }
1857 }
1858 }
1859 } else {
1860 assert(!IsFunclet && "funclets without FPs not yet implemented");
1861 NumBytes =
1862 StackSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1863 }
1864
1865 // Update the offset adjustment, which is mainly used by codeview to translate
1866 // from ESP to VFRAME relative local variable offsets.
1867 if (!IsFunclet) {
1868 if (HasFP && TRI->hasStackRealignment(MF))
1869 MFI.setOffsetAdjustment(-NumBytes);
1870 else
1871 MFI.setOffsetAdjustment(-StackSize);
1872 }
1873
1874 // For EH funclets, only allocate enough space for outgoing calls. Save the
1875 // NumBytes value that we would've used for the parent frame.
1876 unsigned ParentFrameNumBytes = NumBytes;
1877 if (IsFunclet)
1878 NumBytes = getWinEHFuncletFrameSize(MF);
1879
1880 // Skip the callee-saved push instructions.
1881 bool PushedRegs = false;
1882 int StackOffset = 2 * stackGrowth;
1884 auto IsCSPush = [&](const MachineBasicBlock::iterator &MBBI) {
1885 if (MBBI == MBB.end() || !MBBI->getFlag(MachineInstr::FrameSetup))
1886 return false;
1887 unsigned Opc = MBBI->getOpcode();
1888 return Opc == X86::PUSH32r || Opc == X86::PUSH64r || Opc == X86::PUSHP64r ||
1889 Opc == X86::PUSH2 || Opc == X86::PUSH2P;
1890 };
1891
1892 while (IsCSPush(MBBI)) {
1893 PushedRegs = true;
1894 Register Reg = MBBI->getOperand(0).getReg();
1895 LastCSPush = MBBI;
1896 ++MBBI;
1897 unsigned Opc = LastCSPush->getOpcode();
1898
1899 if (!HasFP && NeedsDwarfCFI) {
1900 // Mark callee-saved push instruction.
1901 // Define the current CFA rule to use the provided offset.
1902 assert(StackSize);
1903 // Compared to push, push2 introduces more stack offset (one more
1904 // register).
1905 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1906 StackOffset += stackGrowth;
1907 BuildCFI(MBB, MBBI, DL,
1910 StackOffset += stackGrowth;
1911 }
1912
1913 if (NeedsWinCFI) {
1914 HasWinCFI = true;
1915 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1916 .addImm(Reg)
1918 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1919 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1920 .addImm(LastCSPush->getOperand(1).getReg())
1922 }
1923 }
1924
1925 // Realign stack after we pushed callee-saved registers (so that we'll be
1926 // able to calculate their offsets from the frame pointer).
1927 // Don't do this for Win64, it needs to realign the stack after the prologue.
1928 if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) &&
1929 !ArgBaseReg.isValid()) {
1930 assert(HasFP && "There should be a frame pointer if stack is realigned.");
1931 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1932
1933 if (NeedsWinCFI) {
1934 HasWinCFI = true;
1935 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign))
1936 .addImm(MaxAlign)
1938 }
1939 }
1940
1941 // If there is an SUB32ri of ESP immediately before this instruction, merge
1942 // the two. This can be the case when tail call elimination is enabled and
1943 // the callee has more arguments then the caller.
1944 NumBytes -= mergeSPUpdates(MBB, MBBI, true);
1945
1946 // Adjust stack pointer: ESP -= numbytes.
1947
1948 // Windows and cygwin/mingw require a prologue helper routine when allocating
1949 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
1950 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
1951 // stack and adjust the stack pointer in one go. The 64-bit version of
1952 // __chkstk is only responsible for probing the stack. The 64-bit prologue is
1953 // responsible for adjusting the stack pointer. Touching the stack at 4K
1954 // increments is necessary to ensure that the guard pages used by the OS
1955 // virtual memory manager are allocated in correct sequence.
1956 uint64_t AlignedNumBytes = NumBytes;
1957 if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF))
1958 AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
1959 if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) {
1960 assert(!X86FI->getUsesRedZone() &&
1961 "The Red Zone is not accounted for in stack probes");
1962
1963 // Check whether EAX is livein for this block.
1964 bool isEAXAlive = isEAXLiveIn(MBB);
1965
1966 if (isEAXAlive) {
1967 if (Is64Bit) {
1968 // Save RAX
1969 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1970 .addReg(X86::RAX, RegState::Kill)
1972 } else {
1973 // Save EAX
1974 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
1975 .addReg(X86::EAX, RegState::Kill)
1977 }
1978 }
1979
1980 if (Is64Bit) {
1981 // Handle the 64-bit Windows ABI case where we need to call __chkstk.
1982 // Function prologue is responsible for adjusting the stack pointer.
1983 int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes;
1984 BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Alloc)), X86::RAX)
1985 .addImm(Alloc)
1987 } else {
1988 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
1989 // We'll also use 4 already allocated bytes for EAX.
1990 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
1991 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
1993 }
1994
1995 // Call __chkstk, __chkstk_ms, or __alloca.
1996 emitStackProbe(MF, MBB, MBBI, DL, true);
1997
1998 if (isEAXAlive) {
1999 // Restore RAX/EAX
2001 if (Is64Bit)
2002 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX),
2003 StackPtr, false, NumBytes - 8);
2004 else
2005 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX),
2006 StackPtr, false, NumBytes - 4);
2007 MI->setFlag(MachineInstr::FrameSetup);
2008 MBB.insert(MBBI, MI);
2009 }
2010 } else if (NumBytes) {
2011 emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false);
2012 }
2013
2014 if (NeedsWinCFI && NumBytes) {
2015 HasWinCFI = true;
2016 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
2017 .addImm(NumBytes)
2019 }
2020
2021 int SEHFrameOffset = 0;
2022 unsigned SPOrEstablisher;
2023 if (IsFunclet) {
2024 if (IsClrFunclet) {
2025 // The establisher parameter passed to a CLR funclet is actually a pointer
2026 // to the (mostly empty) frame of its nearest enclosing funclet; we have
2027 // to find the root function establisher frame by loading the PSPSym from
2028 // the intermediate frame.
2029 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2030 MachinePointerInfo NoInfo;
2031 MBB.addLiveIn(Establisher);
2032 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher),
2033 Establisher, false, PSPSlotOffset)
2036 ;
2037 // Save the root establisher back into the current funclet's (mostly
2038 // empty) frame, in case a sub-funclet or the GC needs it.
2039 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr,
2040 false, PSPSlotOffset)
2041 .addReg(Establisher)
2043 NoInfo,
2046 }
2047 SPOrEstablisher = Establisher;
2048 } else {
2049 SPOrEstablisher = StackPtr;
2050 }
2051
2052 if (IsWin64Prologue && HasFP) {
2053 // Set RBP to a small fixed offset from RSP. In the funclet case, we base
2054 // this calculation on the incoming establisher, which holds the value of
2055 // RSP from the parent frame at the end of the prologue.
2056 SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
2057 if (SEHFrameOffset)
2058 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
2059 SPOrEstablisher, false, SEHFrameOffset);
2060 else
2061 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr)
2062 .addReg(SPOrEstablisher);
2063
2064 // If this is not a funclet, emit the CFI describing our frame pointer.
2065 if (NeedsWinCFI && !IsFunclet) {
2066 assert(!NeedsWinFPO && "this setframe incompatible with FPO data");
2067 HasWinCFI = true;
2068 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
2070 .addImm(SEHFrameOffset)
2072 if (isAsynchronousEHPersonality(Personality))
2073 MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset;
2074 }
2075 } else if (IsFunclet && STI.is32Bit()) {
2076 // Reset EBP / ESI to something good for funclets.
2078 // If we're a catch funclet, we can be returned to via catchret. Save ESP
2079 // into the registration node so that the runtime will restore it for us.
2080 if (!MBB.isCleanupFuncletEntry()) {
2081 assert(Personality == EHPersonality::MSVC_CXX);
2082 Register FrameReg;
2084 int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed();
2085 // ESP is the first field, so no extra displacement is needed.
2086 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg,
2087 false, EHRegOffset)
2088 .addReg(X86::ESP);
2089 }
2090 }
2091
2092 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
2093 const MachineInstr &FrameInstr = *MBBI;
2094 ++MBBI;
2095
2096 if (NeedsWinCFI) {
2097 int FI;
2098 if (Register Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
2099 if (X86::FR64RegClass.contains(Reg)) {
2100 int Offset;
2101 Register IgnoredFrameReg;
2102 if (IsWin64Prologue && IsFunclet)
2103 Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg);
2104 else
2105 Offset =
2106 getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() +
2107 SEHFrameOffset;
2108
2109 HasWinCFI = true;
2110 assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
2111 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
2112 .addImm(Reg)
2113 .addImm(Offset)
2115 }
2116 }
2117 }
2118 }
2119
2120 if (NeedsWinCFI && HasWinCFI)
2121 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
2123
2124 if (FnHasClrFunclet && !IsFunclet) {
2125 // Save the so-called Initial-SP (i.e. the value of the stack pointer
2126 // immediately after the prolog) into the PSPSlot so that funclets
2127 // and the GC can recover it.
2128 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2129 auto PSPInfo = MachinePointerInfo::getFixedStack(
2131 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false,
2132 PSPSlotOffset)
2137 }
2138
2139 // Realign stack after we spilled callee-saved registers (so that we'll be
2140 // able to calculate their offsets from the frame pointer).
2141 // Win64 requires aligning the stack after the prologue.
2142 if (IsWin64Prologue && TRI->hasStackRealignment(MF)) {
2143 assert(HasFP && "There should be a frame pointer if stack is realigned.");
2144 BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign);
2145 }
2146
2147 // We already dealt with stack realignment and funclets above.
2148 if (IsFunclet && STI.is32Bit())
2149 return;
2150
2151 // If we need a base pointer, set it up here. It's whatever the value
2152 // of the stack pointer is at this point. Any variable size objects
2153 // will be allocated after this, so we can still use the base pointer
2154 // to reference locals.
2155 if (TRI->hasBasePointer(MF)) {
2156 // Update the base pointer with the current stack pointer.
2157 unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
2158 BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
2159 .addReg(SPOrEstablisher)
2161 if (X86FI->getRestoreBasePointer()) {
2162 // Stash value of base pointer. Saving RSP instead of EBP shortens
2163 // dependence chain. Used by SjLj EH.
2164 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2165 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), FramePtr, true,
2167 .addReg(SPOrEstablisher)
2169 }
2170
2171 if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
2172 // Stash the value of the frame pointer relative to the base pointer for
2173 // Win32 EH. This supports Win32 EH, which does the inverse of the above:
2174 // it recovers the frame pointer from the base pointer rather than the
2175 // other way around.
2176 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2177 Register UsedReg;
2178 int Offset =
2179 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
2180 .getFixed();
2181 assert(UsedReg == BasePtr);
2182 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
2185 }
2186 }
2187 if (ArgBaseReg.isValid()) {
2188 // Save argument base pointer.
2189 auto *MI = X86FI->getStackPtrSaveMI();
2190 int FI = MI->getOperand(1).getIndex();
2191 unsigned MOVmr = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
2192 // movl %basereg, offset(%ebp)
2193 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), FI)
2194 .addReg(ArgBaseReg)
2196 }
2197
2198 if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
2199 // Mark end of stack pointer adjustment.
2200 if (!HasFP && NumBytes) {
2201 // Define the current CFA rule to use the provided offset.
2202 assert(StackSize);
2203 BuildCFI(
2204 MBB, MBBI, DL,
2205 MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth),
2207 }
2208
2209 // Emit DWARF info specifying the offsets of the callee-saved registers.
2211 }
2212
2213 // X86 Interrupt handling function cannot assume anything about the direction
2214 // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
2215 // in each prologue of interrupt handler function.
2216 //
2217 // Create "cld" instruction only in these cases:
2218 // 1. The interrupt handling function uses any of the "rep" instructions.
2219 // 2. Interrupt handling function calls another function.
2220 // 3. If there are any inline asm blocks, as we do not know what they do
2221 //
2222 // TODO: We should also emit cld if we detect the use of std, but as of now,
2223 // the compiler does not even emit that instruction or even define it, so in
2224 // practice, this would only happen with inline asm, which we cover anyway.
2226 bool NeedsCLD = false;
2227
2228 for (const MachineBasicBlock &B : MF) {
2229 for (const MachineInstr &MI : B) {
2230 if (MI.isCall()) {
2231 NeedsCLD = true;
2232 break;
2233 }
2234
2235 if (isOpcodeRep(MI.getOpcode())) {
2236 NeedsCLD = true;
2237 break;
2238 }
2239
2240 if (MI.isInlineAsm()) {
2241 // TODO: Parse asm for rep instructions or call sites?
2242 // For now, let's play it safe and emit a cld instruction
2243 // just in case.
2244 NeedsCLD = true;
2245 break;
2246 }
2247 }
2248 }
2249
2250 if (NeedsCLD) {
2251 BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
2253 }
2254 }
2255
2256 // At this point we know if the function has WinCFI or not.
2257 MF.setHasWinCFI(HasWinCFI);
2258}
2259
2261 const MachineFunction &MF) const {
2262 // We can't use LEA instructions for adjusting the stack pointer if we don't
2263 // have a frame pointer in the Win64 ABI. Only ADD instructions may be used
2264 // to deallocate the stack.
2265 // This means that we can use LEA for SP in two situations:
2266 // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
2267 // 2. We *have* a frame pointer which means we are permitted to use LEA.
2268 return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
2269}
2270
2272 switch (MI.getOpcode()) {
2273 case X86::CATCHRET:
2274 case X86::CLEANUPRET:
2275 return true;
2276 default:
2277 return false;
2278 }
2279 llvm_unreachable("impossible");
2280}
2281
2282// CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
2283// stack. It holds a pointer to the bottom of the root function frame. The
2284// establisher frame pointer passed to a nested funclet may point to the
2285// (mostly empty) frame of its parent funclet, but it will need to find
2286// the frame of the root function to access locals. To facilitate this,
2287// every funclet copies the pointer to the bottom of the root function
2288// frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
2289// same offset for the PSPSym in the root function frame that's used in the
2290// funclets' frames allows each funclet to dynamically accept any ancestor
2291// frame as its establisher argument (the runtime doesn't guarantee the
2292// immediate parent for some reason lost to history), and also allows the GC,
2293// which uses the PSPSym for some bookkeeping, to find it in any funclet's
2294// frame with only a single offset reported for the entire method.
2295unsigned
2296X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
2297 const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
2298 Register SPReg;
2299 int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg,
2300 /*IgnoreSPUpdates*/ true)
2301 .getFixed();
2302 assert(Offset >= 0 && SPReg == TRI->getStackRegister());
2303 return static_cast<unsigned>(Offset);
2304}
2305
2306unsigned
2307X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
2309 // This is the size of the pushed CSRs.
2310 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2311 // This is the size of callee saved XMMs.
2312 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2313 unsigned XMMSize =
2314 WinEHXMMSlotInfo.size() * TRI->getSpillSize(X86::VR128RegClass);
2315 // This is the amount of stack a funclet needs to allocate.
2316 unsigned UsedSize;
2317 EHPersonality Personality =
2319 if (Personality == EHPersonality::CoreCLR) {
2320 // CLR funclets need to hold enough space to include the PSPSym, at the
2321 // same offset from the stack pointer (immediately after the prolog) as it
2322 // resides at in the main function.
2323 UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
2324 } else {
2325 // Other funclets just need enough stack for outgoing call arguments.
2326 UsedSize = MF.getFrameInfo().getMaxCallFrameSize();
2327 }
2328 // RBP is not included in the callee saved register block. After pushing RBP,
2329 // everything is 16 byte aligned. Everything we allocate before an outgoing
2330 // call must also be 16 byte aligned.
2331 unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlign());
2332 // Subtract out the size of the callee saved registers. This is how much stack
2333 // each funclet will allocate.
2334 return FrameSizeMinusRBP + XMMSize - CSSize;
2335}
2336
2337static bool isTailCallOpcode(unsigned Opc) {
2338 return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi ||
2339 Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 ||
2340 Opc == X86::TCRETURNdi64 || Opc == X86::TCRETURNmi64;
2341}
2342
2344 MachineBasicBlock &MBB) const {
2345 const MachineFrameInfo &MFI = MF.getFrameInfo();
2348 MachineBasicBlock::iterator MBBI = Terminator;
2349 DebugLoc DL;
2350 if (MBBI != MBB.end())
2351 DL = MBBI->getDebugLoc();
2352 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
2353 const bool Is64BitILP32 = STI.isTarget64BitILP32();
2355 Register MachineFramePtr =
2356 Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
2357
2358 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2359 bool NeedsWin64CFI =
2360 IsWin64Prologue && MF.getFunction().needsUnwindTableEntry();
2361 bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI);
2362
2363 // Get the number of bytes to allocate from the FrameInfo.
2364 uint64_t StackSize = MFI.getStackSize();
2365 uint64_t MaxAlign = calculateMaxStackAlign(MF);
2366 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2367 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
2368 bool HasFP = hasFP(MF);
2369 uint64_t NumBytes = 0;
2370
2371 bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() &&
2373 MF.needsFrameMoves();
2374
2375 Register ArgBaseReg;
2376 if (auto *MI = X86FI->getStackPtrSaveMI()) {
2377 unsigned Opc = X86::LEA32r;
2378 Register StackReg = X86::ESP;
2379 ArgBaseReg = MI->getOperand(0).getReg();
2380 if (STI.is64Bit()) {
2381 Opc = X86::LEA64r;
2382 StackReg = X86::RSP;
2383 }
2384 // leal -4(%basereg), %esp
2385 // .cfi_def_cfa %esp, 4
2386 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackReg)
2387 .addUse(ArgBaseReg)
2388 .addImm(1)
2389 .addUse(X86::NoRegister)
2390 .addImm(-(int64_t)SlotSize)
2391 .addUse(X86::NoRegister)
2393 if (NeedsDwarfCFI) {
2394 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackReg, true);
2395 BuildCFI(MBB, MBBI, DL,
2396 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2398 --MBBI;
2399 }
2400 --MBBI;
2401 }
2402
2403 if (IsFunclet) {
2404 assert(HasFP && "EH funclets without FP not yet implemented");
2405 NumBytes = getWinEHFuncletFrameSize(MF);
2406 } else if (HasFP) {
2407 // Calculate required stack adjustment.
2408 uint64_t FrameSize = StackSize - SlotSize;
2409 NumBytes = FrameSize - CSSize - TailCallArgReserveSize;
2410
2411 // Callee-saved registers were pushed on stack before the stack was
2412 // realigned.
2413 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
2414 NumBytes = alignTo(FrameSize, MaxAlign);
2415 } else {
2416 NumBytes = StackSize - CSSize - TailCallArgReserveSize;
2417 }
2418 uint64_t SEHStackAllocAmt = NumBytes;
2419
2420 // AfterPop is the position to insert .cfi_restore.
2422 if (HasFP) {
2423 if (X86FI->hasSwiftAsyncContext()) {
2424 // Discard the context.
2425 int Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
2426 emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/ true);
2427 }
2428 // Pop EBP.
2429 BuildMI(MBB, MBBI, DL,
2431 MachineFramePtr)
2433
2434 // We need to reset FP to its untagged state on return. Bit 60 is currently
2435 // used to show the presence of an extended frame.
2436 if (X86FI->hasSwiftAsyncContext()) {
2437 BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8), MachineFramePtr)
2438 .addUse(MachineFramePtr)
2439 .addImm(60)
2441 }
2442
2443 if (NeedsDwarfCFI) {
2444 if (!ArgBaseReg.isValid()) {
2445 unsigned DwarfStackPtr =
2446 TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
2447 BuildCFI(MBB, MBBI, DL,
2448 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2450 }
2451 if (!MBB.succ_empty() && !MBB.isReturnBlock()) {
2452 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
2453 BuildCFI(MBB, AfterPop, DL,
2454 MCCFIInstruction::createRestore(nullptr, DwarfFramePtr),
2456 --MBBI;
2457 --AfterPop;
2458 }
2459 --MBBI;
2460 }
2461 }
2462
2463 MachineBasicBlock::iterator FirstCSPop = MBBI;
2464 // Skip the callee-saved pop instructions.
2465 while (MBBI != MBB.begin()) {
2466 MachineBasicBlock::iterator PI = std::prev(MBBI);
2467 unsigned Opc = PI->getOpcode();
2468
2469 if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
2470 if (!PI->getFlag(MachineInstr::FrameDestroy) ||
2471 (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::BTR64ri8 &&
2472 Opc != X86::ADD64ri32 && Opc != X86::POPP64r && Opc != X86::POP2 &&
2473 Opc != X86::POP2P && Opc != X86::LEA64r))
2474 break;
2475 FirstCSPop = PI;
2476 }
2477
2478 --MBBI;
2479 }
2480 if (ArgBaseReg.isValid()) {
2481 // Restore argument base pointer.
2482 auto *MI = X86FI->getStackPtrSaveMI();
2483 int FI = MI->getOperand(1).getIndex();
2484 unsigned MOVrm = Is64Bit ? X86::MOV64rm : X86::MOV32rm;
2485 // movl offset(%ebp), %basereg
2486 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVrm), ArgBaseReg), FI)
2488 }
2489 MBBI = FirstCSPop;
2490
2491 if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET)
2492 emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator);
2493
2494 if (MBBI != MBB.end())
2495 DL = MBBI->getDebugLoc();
2496 // If there is an ADD32ri or SUB32ri of ESP immediately before this
2497 // instruction, merge the two instructions.
2498 if (NumBytes || MFI.hasVarSizedObjects())
2499 NumBytes += mergeSPUpdates(MBB, MBBI, true);
2500
2501 // If dynamic alloca is used, then reset esp to point to the last callee-saved
2502 // slot before popping them off! Same applies for the case, when stack was
2503 // realigned. Don't do this if this was a funclet epilogue, since the funclets
2504 // will not do realignment or dynamic stack allocation.
2505 if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) &&
2506 !IsFunclet) {
2507 if (TRI->hasStackRealignment(MF))
2508 MBBI = FirstCSPop;
2509 unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
2510 uint64_t LEAAmount =
2511 IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
2512
2513 if (X86FI->hasSwiftAsyncContext())
2514 LEAAmount -= 16;
2515
2516 // There are only two legal forms of epilogue:
2517 // - add SEHAllocationSize, %rsp
2518 // - lea SEHAllocationSize(%FramePtr), %rsp
2519 //
2520 // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
2521 // However, we may use this sequence if we have a frame pointer because the
2522 // effects of the prologue can safely be undone.
2523 if (LEAAmount != 0) {
2524 unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
2526 false, LEAAmount);
2527 --MBBI;
2528 } else {
2529 unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
2530 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr).addReg(FramePtr);
2531 --MBBI;
2532 }
2533 } else if (NumBytes) {
2534 // Adjust stack pointer back: ESP += numbytes.
2535 emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
2536 if (!HasFP && NeedsDwarfCFI) {
2537 // Define the current CFA rule to use the provided offset.
2538 BuildCFI(MBB, MBBI, DL,
2540 nullptr, CSSize + TailCallArgReserveSize + SlotSize),
2542 }
2543 --MBBI;
2544 }
2545
2546 // Windows unwinder will not invoke function's exception handler if IP is
2547 // either in prologue or in epilogue. This behavior causes a problem when a
2548 // call immediately precedes an epilogue, because the return address points
2549 // into the epilogue. To cope with that, we insert an epilogue marker here,
2550 // then replace it with a 'nop' if it ends up immediately after a CALL in the
2551 // final emitted code.
2552 if (NeedsWin64CFI && MF.hasWinCFI())
2553 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
2554
2555 if (!HasFP && NeedsDwarfCFI) {
2556 MBBI = FirstCSPop;
2557 int64_t Offset = -CSSize - SlotSize;
2558 // Mark callee-saved pop instruction.
2559 // Define the current CFA rule to use the provided offset.
2560 while (MBBI != MBB.end()) {
2562 unsigned Opc = PI->getOpcode();
2563 ++MBBI;
2564 if (Opc == X86::POP32r || Opc == X86::POP64r || Opc == X86::POPP64r ||
2565 Opc == X86::POP2 || Opc == X86::POP2P) {
2566 Offset += SlotSize;
2567 // Compared to pop, pop2 introduces more stack offset (one more
2568 // register).
2569 if (Opc == X86::POP2 || Opc == X86::POP2P)
2570 Offset += SlotSize;
2571 BuildCFI(MBB, MBBI, DL,
2574 }
2575 }
2576 }
2577
2578 // Emit DWARF info specifying the restores of the callee-saved registers.
2579 // For epilogue with return inside or being other block without successor,
2580 // no need to generate .cfi_restore for callee-saved registers.
2581 if (NeedsDwarfCFI && !MBB.succ_empty())
2582 emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false);
2583
2584 if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
2585 // Add the return addr area delta back since we are not tail calling.
2586 int Offset = -1 * X86FI->getTCReturnAddrDelta();
2587 assert(Offset >= 0 && "TCDelta should never be positive");
2588 if (Offset) {
2589 // Check for possible merge with preceding ADD instruction.
2590 Offset += mergeSPUpdates(MBB, Terminator, true);
2591 emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true);
2592 }
2593 }
2594
2595 // Emit tilerelease for AMX kernel.
2597 BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
2598}
2599
2601 int FI,
2602 Register &FrameReg) const {
2603 const MachineFrameInfo &MFI = MF.getFrameInfo();
2604
2605 bool IsFixed = MFI.isFixedObjectIndex(FI);
2606 // We can't calculate offset from frame pointer if the stack is realigned,
2607 // so enforce usage of stack/base pointer. The base pointer is used when we
2608 // have dynamic allocas in addition to dynamic realignment.
2609 if (TRI->hasBasePointer(MF))
2610 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister();
2611 else if (TRI->hasStackRealignment(MF))
2612 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister();
2613 else
2614 FrameReg = TRI->getFrameRegister(MF);
2615
2616 // Offset will hold the offset from the stack pointer at function entry to the
2617 // object.
2618 // We need to factor in additional offsets applied during the prologue to the
2619 // frame, base, and stack pointer depending on which is used.
2622 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2623 uint64_t StackSize = MFI.getStackSize();
2624 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2625 int64_t FPDelta = 0;
2626
2627 // In an x86 interrupt, remove the offset we added to account for the return
2628 // address from any stack object allocated in the caller's frame. Interrupts
2629 // do not have a standard return address. Fixed objects in the current frame,
2630 // such as SSE register spills, should not get this treatment.
2632 Offset >= 0) {
2634 }
2635
2636 if (IsWin64Prologue) {
2637 assert(!MFI.hasCalls() || (StackSize % 16) == 8);
2638
2639 // Calculate required stack adjustment.
2640 uint64_t FrameSize = StackSize - SlotSize;
2641 // If required, include space for extra hidden slot for stashing base
2642 // pointer.
2643 if (X86FI->getRestoreBasePointer())
2644 FrameSize += SlotSize;
2645 uint64_t NumBytes = FrameSize - CSSize;
2646
2647 uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
2648 if (FI && FI == X86FI->getFAIndex())
2649 return StackOffset::getFixed(-SEHFrameOffset);
2650
2651 // FPDelta is the offset from the "traditional" FP location of the old base
2652 // pointer followed by return address and the location required by the
2653 // restricted Win64 prologue.
2654 // Add FPDelta to all offsets below that go through the frame pointer.
2655 FPDelta = FrameSize - SEHFrameOffset;
2656 assert((!MFI.hasCalls() || (FPDelta % 16) == 0) &&
2657 "FPDelta isn't aligned per the Win64 ABI!");
2658 }
2659
2660 if (FrameReg == TRI->getFramePtr()) {
2661 // Skip saved EBP/RBP
2662 Offset += SlotSize;
2663
2664 // Account for restricted Windows prologue.
2665 Offset += FPDelta;
2666
2667 // Skip the RETADDR move area
2668 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2669 if (TailCallReturnAddrDelta < 0)
2670 Offset -= TailCallReturnAddrDelta;
2671
2673 }
2674
2675 // FrameReg is either the stack pointer or a base pointer. But the base is
2676 // located at the end of the statically known StackSize so the distinction
2677 // doesn't really matter.
2678 if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF))
2679 assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
2680 return StackOffset::getFixed(Offset + StackSize);
2681}
2682
2684 Register &FrameReg) const {
2685 const MachineFrameInfo &MFI = MF.getFrameInfo();
2687 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2688 const auto it = WinEHXMMSlotInfo.find(FI);
2689
2690 if (it == WinEHXMMSlotInfo.end())
2691 return getFrameIndexReference(MF, FI, FrameReg).getFixed();
2692
2693 FrameReg = TRI->getStackRegister();
2695 it->second;
2696}
2697
2700 Register &FrameReg,
2701 int Adjustment) const {
2702 const MachineFrameInfo &MFI = MF.getFrameInfo();
2703 FrameReg = TRI->getStackRegister();
2704 return StackOffset::getFixed(MFI.getObjectOffset(FI) -
2705 getOffsetOfLocalArea() + Adjustment);
2706}
2707
2710 int FI, Register &FrameReg,
2711 bool IgnoreSPUpdates) const {
2712
2713 const MachineFrameInfo &MFI = MF.getFrameInfo();
2714 // Does not include any dynamic realign.
2715 const uint64_t StackSize = MFI.getStackSize();
2716 // LLVM arranges the stack as follows:
2717 // ...
2718 // ARG2
2719 // ARG1
2720 // RETADDR
2721 // PUSH RBP <-- RBP points here
2722 // PUSH CSRs
2723 // ~~~~~~~ <-- possible stack realignment (non-win64)
2724 // ...
2725 // STACK OBJECTS
2726 // ... <-- RSP after prologue points here
2727 // ~~~~~~~ <-- possible stack realignment (win64)
2728 //
2729 // if (hasVarSizedObjects()):
2730 // ... <-- "base pointer" (ESI/RBX) points here
2731 // DYNAMIC ALLOCAS
2732 // ... <-- RSP points here
2733 //
2734 // Case 1: In the simple case of no stack realignment and no dynamic
2735 // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
2736 // with fixed offsets from RSP.
2737 //
2738 // Case 2: In the case of stack realignment with no dynamic allocas, fixed
2739 // stack objects are addressed with RBP and regular stack objects with RSP.
2740 //
2741 // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
2742 // to address stack arguments for outgoing calls and nothing else. The "base
2743 // pointer" points to local variables, and RBP points to fixed objects.
2744 //
2745 // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
2746 // answer we give is relative to the SP after the prologue, and not the
2747 // SP in the middle of the function.
2748
2749 if (MFI.isFixedObjectIndex(FI) && TRI->hasStackRealignment(MF) &&
2750 !STI.isTargetWin64())
2751 return getFrameIndexReference(MF, FI, FrameReg);
2752
2753 // If !hasReservedCallFrame the function might have SP adjustement in the
2754 // body. So, even though the offset is statically known, it depends on where
2755 // we are in the function.
2756 if (!IgnoreSPUpdates && !hasReservedCallFrame(MF))
2757 return getFrameIndexReference(MF, FI, FrameReg);
2758
2759 // We don't handle tail calls, and shouldn't be seeing them either.
2761 "we don't handle this case!");
2762
2763 // This is how the math works out:
2764 //
2765 // %rsp grows (i.e. gets lower) left to right. Each box below is
2766 // one word (eight bytes). Obj0 is the stack slot we're trying to
2767 // get to.
2768 //
2769 // ----------------------------------
2770 // | BP | Obj0 | Obj1 | ... | ObjN |
2771 // ----------------------------------
2772 // ^ ^ ^ ^
2773 // A B C E
2774 //
2775 // A is the incoming stack pointer.
2776 // (B - A) is the local area offset (-8 for x86-64) [1]
2777 // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2]
2778 //
2779 // |(E - B)| is the StackSize (absolute value, positive). For a
2780 // stack that grown down, this works out to be (B - E). [3]
2781 //
2782 // E is also the value of %rsp after stack has been set up, and we
2783 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now
2784 // (C - E) == (C - A) - (B - A) + (B - E)
2785 // { Using [1], [2] and [3] above }
2786 // == getObjectOffset - LocalAreaOffset + StackSize
2787
2788 return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize);
2789}
2790
2793 std::vector<CalleeSavedInfo> &CSI) const {
2794 MachineFrameInfo &MFI = MF.getFrameInfo();
2796
2797 unsigned CalleeSavedFrameSize = 0;
2798 unsigned XMMCalleeSavedFrameSize = 0;
2799 auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2800 int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
2801
2802 int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2803
2804 if (TailCallReturnAddrDelta < 0) {
2805 // create RETURNADDR area
2806 // arg
2807 // arg
2808 // RETADDR
2809 // { ...
2810 // RETADDR area
2811 // ...
2812 // }
2813 // [EBP]
2814 MFI.CreateFixedObject(-TailCallReturnAddrDelta,
2815 TailCallReturnAddrDelta - SlotSize, true);
2816 }
2817
2818 // Spill the BasePtr if it's used.
2819 if (this->TRI->hasBasePointer(MF)) {
2820 // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
2821 if (MF.hasEHFunclets()) {
2823 X86FI->setHasSEHFramePtrSave(true);
2824 X86FI->setSEHFramePtrSaveIndex(FI);
2825 }
2826 }
2827
2828 if (hasFP(MF)) {
2829 // emitPrologue always spills frame register the first thing.
2830 SpillSlotOffset -= SlotSize;
2831 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2832
2833 // The async context lives directly before the frame pointer, and we
2834 // allocate a second slot to preserve stack alignment.
2835 if (X86FI->hasSwiftAsyncContext()) {
2836 SpillSlotOffset -= SlotSize;
2837 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2838 SpillSlotOffset -= SlotSize;
2839 }
2840
2841 // Since emitPrologue and emitEpilogue will handle spilling and restoring of
2842 // the frame register, we can delete it from CSI list and not have to worry
2843 // about avoiding it later.
2844 Register FPReg = TRI->getFrameRegister(MF);
2845 for (unsigned i = 0; i < CSI.size(); ++i) {
2846 if (TRI->regsOverlap(CSI[i].getReg(), FPReg)) {
2847 CSI.erase(CSI.begin() + i);
2848 break;
2849 }
2850 }
2851 }
2852
2853 // Strategy:
2854 // 1. Use push2 when
2855 // a) number of CSR > 1 if no need padding
2856 // b) number of CSR > 2 if need padding
2857 // 2. When the number of CSR push is odd
2858 // a. Start to use push2 from the 1st push if stack is 16B aligned.
2859 // b. Start to use push2 from the 2nd push if stack is not 16B aligned.
2860 // 3. When the number of CSR push is even, start to use push2 from the 1st
2861 // push and make the stack 16B aligned before the push
2862 unsigned NumRegsForPush2 = 0;
2863 if (STI.hasPush2Pop2()) {
2864 unsigned NumCSGPR = llvm::count_if(CSI, [](const CalleeSavedInfo &I) {
2865 return X86::GR64RegClass.contains(I.getReg());
2866 });
2867 bool NeedPadding = (SpillSlotOffset % 16 != 0) && (NumCSGPR % 2 == 0);
2868 bool UsePush2Pop2 = NeedPadding ? NumCSGPR > 2 : NumCSGPR > 1;
2869 X86FI->setPadForPush2Pop2(NeedPadding && UsePush2Pop2);
2870 NumRegsForPush2 = UsePush2Pop2 ? alignDown(NumCSGPR, 2) : 0;
2871 if (X86FI->padForPush2Pop2()) {
2872 SpillSlotOffset -= SlotSize;
2873 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2874 }
2875 }
2876
2877 // Assign slots for GPRs. It increases frame size.
2878 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2879 Register Reg = I.getReg();
2880
2881 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2882 continue;
2883
2884 // A CSR is a candidate for push2/pop2 when it's slot offset is 16B aligned
2885 // or only an odd number of registers in the candidates.
2886 if (X86FI->getNumCandidatesForPush2Pop2() < NumRegsForPush2 &&
2887 (SpillSlotOffset % 16 == 0 ||
2888 X86FI->getNumCandidatesForPush2Pop2() % 2))
2889 X86FI->addCandidateForPush2Pop2(Reg);
2890
2891 SpillSlotOffset -= SlotSize;
2892 CalleeSavedFrameSize += SlotSize;
2893
2894 int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2895 I.setFrameIdx(SlotIndex);
2896 }
2897
2898 // Adjust the offset of spill slot as we know the accurate callee saved frame
2899 // size.
2900 if (X86FI->getRestoreBasePointer()) {
2901 SpillSlotOffset -= SlotSize;
2902 CalleeSavedFrameSize += SlotSize;
2903
2904 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2905 // TODO: saving the slot index is better?
2906 X86FI->setRestoreBasePointer(CalleeSavedFrameSize);
2907 }
2908 assert(X86FI->getNumCandidatesForPush2Pop2() % 2 == 0 &&
2909 "Expect even candidates for push2/pop2");
2910 if (X86FI->getNumCandidatesForPush2Pop2())
2911 ++NumFunctionUsingPush2Pop2;
2912 X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
2913 MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
2914
2915 // Assign slots for XMMs.
2916 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2917 Register Reg = I.getReg();
2918 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
2919 continue;
2920
2921 // If this is k-register make sure we lookup via the largest legal type.
2922 MVT VT = MVT::Other;
2923 if (X86::VK16RegClass.contains(Reg))
2924 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
2925
2926 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2927 unsigned Size = TRI->getSpillSize(*RC);
2928 Align Alignment = TRI->getSpillAlign(*RC);
2929 // ensure alignment
2930 assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86");
2931 SpillSlotOffset = -alignTo(-SpillSlotOffset, Alignment);
2932
2933 // spill into slot
2934 SpillSlotOffset -= Size;
2935 int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
2936 I.setFrameIdx(SlotIndex);
2937 MFI.ensureMaxAlignment(Alignment);
2938
2939 // Save the start offset and size of XMM in stack frame for funclets.
2940 if (X86::VR128RegClass.contains(Reg)) {
2941 WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize;
2942 XMMCalleeSavedFrameSize += Size;
2943 }
2944 }
2945
2946 return true;
2947}
2948
2953
2954 // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
2955 // for us, and there are no XMM CSRs on Win32.
2956 if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows())
2957 return true;
2958
2959 // Push GPRs. It increases frame size.
2960 const MachineFunction &MF = *MBB.getParent();
2962 if (X86FI->padForPush2Pop2())
2963 emitSPUpdate(MBB, MI, DL, -(int64_t)SlotSize, /*InEpilogue=*/false);
2964
2965 // Update LiveIn of the basic block and decide whether we can add a kill flag
2966 // to the use.
2967 auto UpdateLiveInCheckCanKill = [&](Register Reg) {
2968 const MachineRegisterInfo &MRI = MF.getRegInfo();
2969 // Do not set a kill flag on values that are also marked as live-in. This
2970 // happens with the @llvm-returnaddress intrinsic and with arguments
2971 // passed in callee saved registers.
2972 // Omitting the kill flags is conservatively correct even if the live-in
2973 // is not used after all.
2974 if (MRI.isLiveIn(Reg))
2975 return false;
2976 MBB.addLiveIn(Reg);
2977 // Check if any subregister is live-in
2978 for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg)
2979 if (MRI.isLiveIn(*AReg))
2980 return false;
2981 return true;
2982 };
2983 auto UpdateLiveInGetKillRegState = [&](Register Reg) {
2984 return getKillRegState(UpdateLiveInCheckCanKill(Reg));
2985 };
2986
2987 for (auto RI = CSI.rbegin(), RE = CSI.rend(); RI != RE; ++RI) {
2988 Register Reg = RI->getReg();
2989 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2990 continue;
2991
2992 if (X86FI->isCandidateForPush2Pop2(Reg)) {
2993 Register Reg2 = (++RI)->getReg();
2995 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
2996 .addReg(Reg2, UpdateLiveInGetKillRegState(Reg2))
2998 } else {
2999 BuildMI(MBB, MI, DL, TII.get(getPUSHOpcode(STI)))
3000 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
3002 }
3003 }
3004
3005 if (X86FI->getRestoreBasePointer()) {
3006 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
3007 Register BaseReg = this->TRI->getBaseRegister();
3008 BuildMI(MBB, MI, DL, TII.get(Opc))
3009 .addReg(BaseReg, getKillRegState(true))
3011 }
3012
3013 // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
3014 // It can be done by spilling XMMs to stack frame.
3015 for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
3016 Register Reg = I.getReg();
3017 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3018 continue;
3019
3020 // If this is k-register make sure we lookup via the largest legal type.
3021 MVT VT = MVT::Other;
3022 if (X86::VK16RegClass.contains(Reg))
3023 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3024
3025 // Add the callee-saved register as live-in. It's killed at the spill.
3026 MBB.addLiveIn(Reg);
3027 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3028
3029 TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI,
3030 Register());
3031 --MI;
3032 MI->setFlag(MachineInstr::FrameSetup);
3033 ++MI;
3034 }
3035
3036 return true;
3037}
3038
3039void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB,
3041 MachineInstr *CatchRet) const {
3042 // SEH shouldn't use catchret.
3045 "SEH should not use CATCHRET");
3046 const DebugLoc &DL = CatchRet->getDebugLoc();
3047 MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB();
3048
3049 // Fill EAX/RAX with the address of the target block.
3050 if (STI.is64Bit()) {
3051 // LEA64r CatchRetTarget(%rip), %rax
3052 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX)
3053 .addReg(X86::RIP)
3054 .addImm(0)
3055 .addReg(0)
3056 .addMBB(CatchRetTarget)
3057 .addReg(0);
3058 } else {
3059 // MOV32ri $CatchRetTarget, %eax
3060 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
3061 .addMBB(CatchRetTarget);
3062 }
3063
3064 // Record that we've taken the address of CatchRetTarget and no longer just
3065 // reference it in a terminator.
3066 CatchRetTarget->setMachineBlockAddressTaken();
3067}
3068
3072 if (CSI.empty())
3073 return false;
3074
3075 if (MI != MBB.end() && isFuncletReturnInstr(*MI) && STI.isOSWindows()) {
3076 // Don't restore CSRs in 32-bit EH funclets. Matches
3077 // spillCalleeSavedRegisters.
3078 if (STI.is32Bit())
3079 return true;
3080 // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
3081 // funclets. emitEpilogue transforms these to normal jumps.
3082 if (MI->getOpcode() == X86::CATCHRET) {
3083 const Function &F = MBB.getParent()->getFunction();
3084 bool IsSEH = isAsynchronousEHPersonality(
3085 classifyEHPersonality(F.getPersonalityFn()));
3086 if (IsSEH)
3087 return true;
3088 }
3089 }
3090
3092
3093 // Reload XMMs from stack frame.
3094 for (const CalleeSavedInfo &I : CSI) {
3095 Register Reg = I.getReg();
3096 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3097 continue;
3098
3099 // If this is k-register make sure we lookup via the largest legal type.
3100 MVT VT = MVT::Other;
3101 if (X86::VK16RegClass.contains(Reg))
3102 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3103
3104 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3105 TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, TRI,
3106 Register());
3107 }
3108
3109 // Clear the stack slot for spill base pointer register.
3110 MachineFunction &MF = *MBB.getParent();
3112 if (X86FI->getRestoreBasePointer()) {
3113 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
3114 Register BaseReg = this->TRI->getBaseRegister();
3115 BuildMI(MBB, MI, DL, TII.get(Opc), BaseReg)
3117 }
3118
3119 // POP GPRs.
3120 for (auto I = CSI.begin(), E = CSI.end(); I != E; ++I) {
3121 Register Reg = I->getReg();
3122 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3123 continue;
3124
3125 if (X86FI->isCandidateForPush2Pop2(Reg))
3126 BuildMI(MBB, MI, DL, TII.get(getPOP2Opcode(STI)), Reg)
3129 else
3130 BuildMI(MBB, MI, DL, TII.get(getPOPOpcode(STI)), Reg)
3132 }
3133 if (X86FI->padForPush2Pop2())
3134 emitSPUpdate(MBB, MI, DL, SlotSize, /*InEpilogue=*/true);
3135
3136 return true;
3137}
3138
3140 BitVector &SavedRegs,
3141 RegScavenger *RS) const {
3143
3144 // Spill the BasePtr if it's used.
3145 if (TRI->hasBasePointer(MF)) {
3146 Register BasePtr = TRI->getBaseRegister();
3147 if (STI.isTarget64BitILP32())
3148 BasePtr = getX86SubSuperRegister(BasePtr, 64);
3149 SavedRegs.set(BasePtr);
3150 }
3151}
3152
3153static bool HasNestArgument(const MachineFunction *MF) {
3154 const Function &F = MF->getFunction();
3155 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
3156 I++) {
3157 if (I->hasNestAttr() && !I->use_empty())
3158 return true;
3159 }
3160 return false;
3161}
3162
3163/// GetScratchRegister - Get a temp register for performing work in the
3164/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
3165/// and the properties of the function either one or two registers will be
3166/// needed. Set primary to true for the first register, false for the second.
3167static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64,
3168 const MachineFunction &MF, bool Primary) {
3170
3171 // Erlang stuff.
3173 if (Is64Bit)
3174 return Primary ? X86::R14 : X86::R13;
3175 else
3176 return Primary ? X86::EBX : X86::EDI;
3177 }
3178
3179 if (Is64Bit) {
3180 if (IsLP64)
3181 return Primary ? X86::R11 : X86::R12;
3182 else
3183 return Primary ? X86::R11D : X86::R12D;
3184 }
3185
3186 bool IsNested = HasNestArgument(&MF);
3187
3191 if (IsNested)
3192 report_fatal_error("Segmented stacks does not support fastcall with "
3193 "nested function.");
3194 return Primary ? X86::EAX : X86::ECX;
3195 }
3196 if (IsNested)
3197 return Primary ? X86::EDX : X86::EAX;
3198 return Primary ? X86::ECX : X86::EAX;
3199}
3200
3201// The stack limit in the TCB is set to this many bytes above the actual stack
3202// limit.
3204
3206 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3207 MachineFrameInfo &MFI = MF.getFrameInfo();
3208 uint64_t StackSize;
3209 unsigned TlsReg, TlsOffset;
3210 DebugLoc DL;
3211
3212 // To support shrink-wrapping we would need to insert the new blocks
3213 // at the right place and update the branches to PrologueMBB.
3214 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3215
3216 unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3217 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3218 "Scratch register is live-in");
3219
3220 if (MF.getFunction().isVarArg())
3221 report_fatal_error("Segmented stacks do not support vararg functions.");
3222 if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
3225 report_fatal_error("Segmented stacks not supported on this platform.");
3226
3227 // Eventually StackSize will be calculated by a link-time pass; which will
3228 // also decide whether checking code needs to be injected into this particular
3229 // prologue.
3230 StackSize = MFI.getStackSize();
3231
3232 if (!MFI.needsSplitStackProlog())
3233 return;
3234
3238 bool IsNested = false;
3239
3240 // We need to know if the function has a nest argument only in 64 bit mode.
3241 if (Is64Bit)
3242 IsNested = HasNestArgument(&MF);
3243
3244 // The MOV R10, RAX needs to be in a different block, since the RET we emit in
3245 // allocMBB needs to be last (terminating) instruction.
3246
3247 for (const auto &LI : PrologueMBB.liveins()) {
3248 allocMBB->addLiveIn(LI);
3249 checkMBB->addLiveIn(LI);
3250 }
3251
3252 if (IsNested)
3253 allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
3254
3255 MF.push_front(allocMBB);
3256 MF.push_front(checkMBB);
3257
3258 // When the frame size is less than 256 we just compare the stack
3259 // boundary directly to the value of the stack pointer, per gcc.
3260 bool CompareStackPointer = StackSize < kSplitStackAvailable;
3261
3262 // Read the limit off the current stacklet off the stack_guard location.
3263 if (Is64Bit) {
3264 if (STI.isTargetLinux()) {
3265 TlsReg = X86::FS;
3266 TlsOffset = IsLP64 ? 0x70 : 0x40;
3267 } else if (STI.isTargetDarwin()) {
3268 TlsReg = X86::GS;
3269 TlsOffset = 0x60 + 90 * 8; // See pthread_machdep.h. Steal TLS slot 90.
3270 } else if (STI.isTargetWin64()) {
3271 TlsReg = X86::GS;
3272 TlsOffset = 0x28; // pvArbitrary, reserved for application use
3273 } else if (STI.isTargetFreeBSD()) {
3274 TlsReg = X86::FS;
3275 TlsOffset = 0x18;
3276 } else if (STI.isTargetDragonFly()) {
3277 TlsReg = X86::FS;
3278 TlsOffset = 0x20; // use tls_tcb.tcb_segstack
3279 } else {
3280 report_fatal_error("Segmented stacks not supported on this platform.");
3281 }
3282
3283 if (CompareStackPointer)
3284 ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
3285 else
3286 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r),
3287 ScratchReg)
3288 .addReg(X86::RSP)
3289 .addImm(1)
3290 .addReg(0)
3291 .addImm(-StackSize)
3292 .addReg(0);
3293
3294 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm))
3295 .addReg(ScratchReg)
3296 .addReg(0)
3297 .addImm(1)
3298 .addReg(0)
3299 .addImm(TlsOffset)
3300 .addReg(TlsReg);
3301 } else {
3302 if (STI.isTargetLinux()) {
3303 TlsReg = X86::GS;
3304 TlsOffset = 0x30;
3305 } else if (STI.isTargetDarwin()) {
3306 TlsReg = X86::GS;
3307 TlsOffset = 0x48 + 90 * 4;
3308 } else if (STI.isTargetWin32()) {
3309 TlsReg = X86::FS;
3310 TlsOffset = 0x14; // pvArbitrary, reserved for application use
3311 } else if (STI.isTargetDragonFly()) {
3312 TlsReg = X86::FS;
3313 TlsOffset = 0x10; // use tls_tcb.tcb_segstack
3314 } else if (STI.isTargetFreeBSD()) {
3315 report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
3316 } else {
3317 report_fatal_error("Segmented stacks not supported on this platform.");
3318 }
3319
3320 if (CompareStackPointer)
3321 ScratchReg = X86::ESP;
3322 else
3323 BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg)
3324 .addReg(X86::ESP)
3325 .addImm(1)
3326 .addReg(0)
3327 .addImm(-StackSize)
3328 .addReg(0);
3329
3332 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3333 .addReg(ScratchReg)
3334 .addReg(0)
3335 .addImm(0)
3336 .addReg(0)
3337 .addImm(TlsOffset)
3338 .addReg(TlsReg);
3339 } else if (STI.isTargetDarwin()) {
3340
3341 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
3342 unsigned ScratchReg2;
3343 bool SaveScratch2;
3344 if (CompareStackPointer) {
3345 // The primary scratch register is available for holding the TLS offset.
3346 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3347 SaveScratch2 = false;
3348 } else {
3349 // Need to use a second register to hold the TLS offset
3350 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
3351
3352 // Unfortunately, with fastcc the second scratch register may hold an
3353 // argument.
3354 SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
3355 }
3356
3357 // If Scratch2 is live-in then it needs to be saved.
3358 assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
3359 "Scratch register is live-in and not saved");
3360
3361 if (SaveScratch2)
3362 BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
3363 .addReg(ScratchReg2, RegState::Kill);
3364
3365 BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
3366 .addImm(TlsOffset);
3367 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3368 .addReg(ScratchReg)
3369 .addReg(ScratchReg2)
3370 .addImm(1)
3371 .addReg(0)
3372 .addImm(0)
3373 .addReg(TlsReg);
3374
3375 if (SaveScratch2)
3376 BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
3377 }
3378 }
3379
3380 // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
3381 // It jumps to normal execution of the function body.
3382 BuildMI(checkMBB, DL, TII.get(X86::JCC_1))
3383 .addMBB(&PrologueMBB)
3385
3386 // On 32 bit we first push the arguments size and then the frame size. On 64
3387 // bit, we pass the stack frame size in r10 and the argument size in r11.
3388 if (Is64Bit) {
3389 // Functions with nested arguments use R10, so it needs to be saved across
3390 // the call to _morestack
3391
3392 const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
3393 const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
3394 const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
3395 const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
3396
3397 if (IsNested)
3398 BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
3399
3400 BuildMI(allocMBB, DL, TII.get(getMOVriOpcode(IsLP64, StackSize)), Reg10)
3401 .addImm(StackSize);
3402 BuildMI(allocMBB, DL,
3404 Reg11)
3405 .addImm(X86FI->getArgumentStackSize());
3406 } else {
3407 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i))
3408 .addImm(X86FI->getArgumentStackSize());
3409 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i)).addImm(StackSize);
3410 }
3411
3412 // __morestack is in libgcc
3414 // Under the large code model, we cannot assume that __morestack lives
3415 // within 2^31 bytes of the call site, so we cannot use pc-relative
3416 // addressing. We cannot perform the call via a temporary register,
3417 // as the rax register may be used to store the static chain, and all
3418 // other suitable registers may be either callee-save or used for
3419 // parameter passing. We cannot use the stack at this point either
3420 // because __morestack manipulates the stack directly.
3421 //
3422 // To avoid these issues, perform an indirect call via a read-only memory
3423 // location containing the address.
3424 //
3425 // This solution is not perfect, as it assumes that the .rodata section
3426 // is laid out within 2^31 bytes of each function body, but this seems
3427 // to be sufficient for JIT.
3428 // FIXME: Add retpoline support and remove the error here..
3430 report_fatal_error("Emitting morestack calls on 64-bit with the large "
3431 "code model and thunks not yet implemented.");
3432 BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
3433 .addReg(X86::RIP)
3434 .addImm(0)
3435 .addReg(0)
3436 .addExternalSymbol("__morestack_addr")
3437 .addReg(0);
3438 } else {
3439 if (Is64Bit)
3440 BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
3441 .addExternalSymbol("__morestack");
3442 else
3443 BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
3444 .addExternalSymbol("__morestack");
3445 }
3446
3447 if (IsNested)
3448 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
3449 else
3450 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
3451
3452 allocMBB->addSuccessor(&PrologueMBB);
3453
3454 checkMBB->addSuccessor(allocMBB, BranchProbability::getZero());
3455 checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne());
3456
3457#ifdef EXPENSIVE_CHECKS
3458 MF.verify();
3459#endif
3460}
3461
3462/// Lookup an ERTS parameter in the !hipe.literals named metadata node.
3463/// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets
3464/// to fields it needs, through a named metadata node "hipe.literals" containing
3465/// name-value pairs.
3466static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD,
3467 const StringRef LiteralName) {
3468 for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) {
3469 MDNode *Node = HiPELiteralsMD->getOperand(i);
3470 if (Node->getNumOperands() != 2)
3471 continue;
3472 MDString *NodeName = dyn_cast<MDString>(Node->getOperand(0));
3473 ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Node->getOperand(1));
3474 if (!NodeName || !NodeVal)
3475 continue;
3476 ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(NodeVal->getValue());
3477 if (ValConst && NodeName->getString() == LiteralName) {
3478 return ValConst->getZExtValue();
3479 }
3480 }
3481
3482 report_fatal_error("HiPE literal " + LiteralName +
3483 " required but not provided");
3484}
3485
3486// Return true if there are no non-ehpad successors to MBB and there are no
3487// non-meta instructions between MBBI and MBB.end().
3490 return llvm::all_of(
3491 MBB.successors(),
3492 [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) &&
3493 std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) {
3494 return MI.isMetaInstruction();
3495 });
3496}
3497
3498/// Erlang programs may need a special prologue to handle the stack size they
3499/// might need at runtime. That is because Erlang/OTP does not implement a C
3500/// stack but uses a custom implementation of hybrid stack/heap architecture.
3501/// (for more information see Eric Stenman's Ph.D. thesis:
3502/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
3503///
3504/// CheckStack:
3505/// temp0 = sp - MaxStack
3506/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3507/// OldStart:
3508/// ...
3509/// IncStack:
3510/// call inc_stack # doubles the stack space
3511/// temp0 = sp - MaxStack
3512/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3514 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3515 MachineFrameInfo &MFI = MF.getFrameInfo();
3516 DebugLoc DL;
3517
3518 // To support shrink-wrapping we would need to insert the new blocks
3519 // at the right place and update the branches to PrologueMBB.
3520 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3521
3522 // HiPE-specific values
3523 NamedMDNode *HiPELiteralsMD =
3524 MF.getMMI().getModule()->getNamedMetadata("hipe.literals");
3525 if (!HiPELiteralsMD)
3527 "Can't generate HiPE prologue without runtime parameters");
3528 const unsigned HipeLeafWords = getHiPELiteral(
3529 HiPELiteralsMD, Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS");
3530 const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
3531 const unsigned Guaranteed = HipeLeafWords * SlotSize;
3532 unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs
3533 ? MF.getFunction().arg_size() - CCRegisteredArgs
3534 : 0;
3535 unsigned MaxStack = MFI.getStackSize() + CallerStkArity * SlotSize + SlotSize;
3536
3538 "HiPE prologue is only supported on Linux operating systems.");
3539
3540 // Compute the largest caller's frame that is needed to fit the callees'
3541 // frames. This 'MaxStack' is computed from:
3542 //
3543 // a) the fixed frame size, which is the space needed for all spilled temps,
3544 // b) outgoing on-stack parameter areas, and
3545 // c) the minimum stack space this function needs to make available for the
3546 // functions it calls (a tunable ABI property).
3547 if (MFI.hasCalls()) {
3548 unsigned MoreStackForCalls = 0;
3549
3550 for (auto &MBB : MF) {
3551 for (auto &MI : MBB) {
3552 if (!MI.isCall())
3553 continue;
3554
3555 // Get callee operand.
3556 const MachineOperand &MO = MI.getOperand(0);
3557
3558 // Only take account of global function calls (no closures etc.).
3559 if (!MO.isGlobal())
3560 continue;
3561
3562 const Function *F = dyn_cast<Function>(MO.getGlobal());
3563 if (!F)
3564 continue;
3565
3566 // Do not update 'MaxStack' for primitive and built-in functions
3567 // (encoded with names either starting with "erlang."/"bif_" or not
3568 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
3569 // "_", such as the BIF "suspend_0") as they are executed on another
3570 // stack.
3571 if (F->getName().contains("erlang.") || F->getName().contains("bif_") ||
3572 F->getName().find_first_of("._") == StringRef::npos)
3573 continue;
3574
3575 unsigned CalleeStkArity = F->arg_size() > CCRegisteredArgs
3576 ? F->arg_size() - CCRegisteredArgs
3577 : 0;
3578 if (HipeLeafWords - 1 > CalleeStkArity)
3579 MoreStackForCalls =
3580 std::max(MoreStackForCalls,
3581 (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
3582 }
3583 }
3584 MaxStack += MoreStackForCalls;
3585 }
3586
3587 // If the stack frame needed is larger than the guaranteed then runtime checks
3588 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
3589 if (MaxStack > Guaranteed) {
3590 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
3591 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
3592
3593 for (const auto &LI : PrologueMBB.liveins()) {
3594 stackCheckMBB->addLiveIn(LI);
3595 incStackMBB->addLiveIn(LI);
3596 }
3597
3598 MF.push_front(incStackMBB);
3599 MF.push_front(stackCheckMBB);
3600
3601 unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
3602 unsigned LEAop, CMPop, CALLop;
3603 SPLimitOffset = getHiPELiteral(HiPELiteralsMD, "P_NSP_LIMIT");
3604 if (Is64Bit) {
3605 SPReg = X86::RSP;
3606 PReg = X86::RBP;
3607 LEAop = X86::LEA64r;
3608 CMPop = X86::CMP64rm;
3609 CALLop = X86::CALL64pcrel32;
3610 } else {
3611 SPReg = X86::ESP;
3612 PReg = X86::EBP;
3613 LEAop = X86::LEA32r;
3614 CMPop = X86::CMP32rm;
3615 CALLop = X86::CALLpcrel32;
3616 }
3617
3618 ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3619 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3620 "HiPE prologue scratch register is live-in");
3621
3622 // Create new MBB for StackCheck:
3623 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3624 false, -MaxStack);
3625 // SPLimitOffset is in a fixed heap location (pointed by BP).
3626 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3627 PReg, false, SPLimitOffset);
3628 BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1))
3629 .addMBB(&PrologueMBB)
3631
3632 // Create new MBB for IncStack:
3633 BuildMI(incStackMBB, DL, TII.get(CALLop)).addExternalSymbol("inc_stack_0");
3634 addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3635 false, -MaxStack);
3636 addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3637 PReg, false, SPLimitOffset);
3638 BuildMI(incStackMBB, DL, TII.get(X86::JCC_1))
3639 .addMBB(incStackMBB)
3641
3642 stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
3643 stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
3644 incStackMBB->addSuccessor(&PrologueMBB, {99, 100});
3645 incStackMBB->addSuccessor(incStackMBB, {1, 100});
3646 }
3647#ifdef EXPENSIVE_CHECKS
3648 MF.verify();
3649#endif
3650}
3651
3652bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
3654 const DebugLoc &DL,
3655 int Offset) const {
3656 if (Offset <= 0)
3657 return false;
3658
3659 if (Offset % SlotSize)
3660 return false;
3661
3662 int NumPops = Offset / SlotSize;
3663 // This is only worth it if we have at most 2 pops.
3664 if (NumPops != 1 && NumPops != 2)
3665 return false;
3666
3667 // Handle only the trivial case where the adjustment directly follows
3668 // a call. This is the most common one, anyway.
3669 if (MBBI == MBB.begin())
3670 return false;
3671 MachineBasicBlock::iterator Prev = std::prev(MBBI);
3672 if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
3673 return false;
3674
3675 unsigned Regs[2];
3676 unsigned FoundRegs = 0;
3677
3679 const MachineOperand &RegMask = Prev->getOperand(1);
3680
3681 auto &RegClass =
3682 Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass;
3683 // Try to find up to NumPops free registers.
3684 for (auto Candidate : RegClass) {
3685 // Poor man's liveness:
3686 // Since we're immediately after a call, any register that is clobbered
3687 // by the call and not defined by it can be considered dead.
3688 if (!RegMask.clobbersPhysReg(Candidate))
3689 continue;
3690
3691 // Don't clobber reserved registers
3692 if (MRI.isReserved(Candidate))
3693 continue;
3694
3695 bool IsDef = false;
3696 for (const MachineOperand &MO : Prev->implicit_operands()) {
3697 if (MO.isReg() && MO.isDef() &&
3698 TRI->isSuperOrSubRegisterEq(MO.getReg(), Candidate)) {
3699 IsDef = true;
3700 break;
3701 }
3702 }
3703
3704 if (IsDef)
3705 continue;
3706
3707 Regs[FoundRegs++] = Candidate;
3708 if (FoundRegs == (unsigned)NumPops)
3709 break;
3710 }
3711
3712 if (FoundRegs == 0)
3713 return false;
3714
3715 // If we found only one free register, but need two, reuse the same one twice.
3716 while (FoundRegs < (unsigned)NumPops)
3717 Regs[FoundRegs++] = Regs[0];
3718
3719 for (int i = 0; i < NumPops; ++i)
3720 BuildMI(MBB, MBBI, DL, TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r),
3721 Regs[i]);
3722
3723 return true;
3724}
3725
3729 bool reserveCallFrame = hasReservedCallFrame(MF);
3730 unsigned Opcode = I->getOpcode();
3731 bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
3732 DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased.
3733 uint64_t Amount = TII.getFrameSize(*I);
3734 uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0;
3735 I = MBB.erase(I);
3736 auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
3737
3738 // Try to avoid emitting dead SP adjustments if the block end is unreachable,
3739 // typically because the function is marked noreturn (abort, throw,
3740 // assert_fail, etc).
3741 if (isDestroy && blockEndIsUnreachable(MBB, I))
3742 return I;
3743
3744 if (!reserveCallFrame) {
3745 // If the stack pointer can be changed after prologue, turn the
3746 // adjcallstackup instruction into a 'sub ESP, <amt>' and the
3747 // adjcallstackdown instruction into 'add ESP, <amt>'
3748
3749 // We need to keep the stack aligned properly. To do this, we round the
3750 // amount of space needed for the outgoing arguments up to the next
3751 // alignment boundary.
3752 Amount = alignTo(Amount, getStackAlign());
3753
3754 const Function &F = MF.getFunction();
3755 bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
3756 bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves();
3757
3758 // If we have any exception handlers in this function, and we adjust
3759 // the SP before calls, we may need to indicate this to the unwinder
3760 // using GNU_ARGS_SIZE. Note that this may be necessary even when
3761 // Amount == 0, because the preceding function may have set a non-0
3762 // GNU_ARGS_SIZE.
3763 // TODO: We don't need to reset this between subsequent functions,
3764 // if it didn't change.
3765 bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty();
3766
3767 if (HasDwarfEHHandlers && !isDestroy &&
3769 BuildCFI(MBB, InsertPos, DL,
3770 MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
3771
3772 if (Amount == 0)
3773 return I;
3774
3775 // Factor out the amount that gets handled inside the sequence
3776 // (Pushes of argument for frame setup, callee pops for frame destroy)
3777 Amount -= InternalAmt;
3778
3779 // TODO: This is needed only if we require precise CFA.
3780 // If this is a callee-pop calling convention, emit a CFA adjust for
3781 // the amount the callee popped.
3782 if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
3783 BuildCFI(MBB, InsertPos, DL,
3784 MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
3785
3786 // Add Amount to SP to destroy a frame, or subtract to setup.
3787 int64_t StackAdjustment = isDestroy ? Amount : -Amount;
3788
3789 if (StackAdjustment) {
3790 // Merge with any previous or following adjustment instruction. Note: the
3791 // instructions merged with here do not have CFI, so their stack
3792 // adjustments do not feed into CfaAdjustment.
3793 StackAdjustment += mergeSPUpdates(MBB, InsertPos, true);
3794 StackAdjustment += mergeSPUpdates(MBB, InsertPos, false);
3795
3796 if (StackAdjustment) {
3797 if (!(F.hasMinSize() &&
3798 adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
3799 BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
3800 /*InEpilogue=*/false);
3801 }
3802 }
3803
3804 if (DwarfCFI && !hasFP(MF)) {
3805 // If we don't have FP, but need to generate unwind information,
3806 // we need to set the correct CFA offset after the stack adjustment.
3807 // How much we adjust the CFA offset depends on whether we're emitting
3808 // CFI only for EH purposes or for debugging. EH only requires the CFA
3809 // offset to be correct at each call site, while for debugging we want
3810 // it to be more precise.
3811
3812 int64_t CfaAdjustment = -StackAdjustment;
3813 // TODO: When not using precise CFA, we also need to adjust for the
3814 // InternalAmt here.
3815 if (CfaAdjustment) {
3816 BuildCFI(
3817 MBB, InsertPos, DL,
3818 MCCFIInstruction::createAdjustCfaOffset(nullptr, CfaAdjustment));
3819 }
3820 }
3821
3822 return I;
3823 }
3824
3825 if (InternalAmt) {
3828 while (CI != B && !std::prev(CI)->isCall())
3829 --CI;
3830 BuildStackAdjustment(MBB, CI, DL, -InternalAmt, /*InEpilogue=*/false);
3831 }
3832
3833 return I;
3834}
3835
3837 assert(MBB.getParent() && "Block is not attached to a function!");
3838 const MachineFunction &MF = *MBB.getParent();
3839 if (!MBB.isLiveIn(X86::EFLAGS))
3840 return true;
3841
3842 // If stack probes have to loop inline or call, that will clobber EFLAGS.
3843 // FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock.
3845 const X86TargetLowering &TLI = *STI.getTargetLowering();
3846 if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF))
3847 return false;
3848
3850 return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext();
3851}
3852
3854 assert(MBB.getParent() && "Block is not attached to a function!");
3855
3856 // Win64 has strict requirements in terms of epilogue and we are
3857 // not taking a chance at messing with them.
3858 // I.e., unless this block is already an exit block, we can't use
3859 // it as an epilogue.
3860 if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
3861 return false;
3862
3863 // Swift async context epilogue has a BTR instruction that clobbers parts of
3864 // EFLAGS.
3865 const MachineFunction &MF = *MBB.getParent();
3868
3870 return true;
3871
3872 // If we cannot use LEA to adjust SP, we may need to use ADD, which
3873 // clobbers the EFLAGS. Check that we do not need to preserve it,
3874 // otherwise, conservatively assume this is not
3875 // safe to insert the epilogue here.
3877}
3878
3880 // If we may need to emit frameless compact unwind information, give
3881 // up as this is currently broken: PR25614.
3882 bool CompactUnwind =
3884 return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF) ||
3885 !CompactUnwind) &&
3886 // The lowering of segmented stack and HiPE only support entry
3887 // blocks as prologue blocks: PR26107. This limitation may be
3888 // lifted if we fix:
3889 // - adjustForSegmentedStacks
3890 // - adjustForHiPEPrologue
3892 !MF.shouldSplitStack();
3893}
3894
3897 const DebugLoc &DL, bool RestoreSP) const {
3898 assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
3899 assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
3900 assert(STI.is32Bit() && !Uses64BitFramePtr &&
3901 "restoring EBP/ESI on non-32-bit target");
3902
3903 MachineFunction &MF = *MBB.getParent();
3905 Register BasePtr = TRI->getBaseRegister();
3906 WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo();
3908 MachineFrameInfo &MFI = MF.getFrameInfo();
3909
3910 // FIXME: Don't set FrameSetup flag in catchret case.
3911
3912 int FI = FuncInfo.EHRegNodeFrameIndex;
3913 int EHRegSize = MFI.getObjectSize(FI);
3914
3915 if (RestoreSP) {
3916 // MOV32rm -EHRegSize(%ebp), %esp
3917 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP),
3918 X86::EBP, true, -EHRegSize)
3920 }
3921
3922 Register UsedReg;
3923 int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg).getFixed();
3924 int EndOffset = -EHRegOffset - EHRegSize;
3925 FuncInfo.EHRegNodeEndOffset = EndOffset;
3926
3927 if (UsedReg == FramePtr) {
3928 // ADD $offset, %ebp
3929 unsigned ADDri = getADDriOpcode(false);
3930 BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
3932 .addImm(EndOffset)
3934 ->getOperand(3)
3935 .setIsDead();
3936 assert(EndOffset >= 0 &&
3937 "end of registration object above normal EBP position!");
3938 } else if (UsedReg == BasePtr) {
3939 // LEA offset(%ebp), %esi
3940 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr),
3941 FramePtr, false, EndOffset)
3943 // MOV32rm SavedEBPOffset(%esi), %ebp
3944 assert(X86FI->getHasSEHFramePtrSave());
3945 int Offset =
3946 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
3947 .getFixed();
3948 assert(UsedReg == BasePtr);
3949 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr),
3950 UsedReg, true, Offset)
3952 } else {
3953 llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
3954 }
3955 return MBBI;
3956}
3957
3959 return TRI->getSlotSize();
3960}
3961
3964 return StackPtr;
3965}
3966
3970 Register FrameRegister = RI->getFrameRegister(MF);
3971 if (getInitialCFARegister(MF) == FrameRegister &&
3973 DwarfFrameBase FrameBase;
3974 FrameBase.Kind = DwarfFrameBase::CFA;
3975 FrameBase.Location.Offset =
3977 return FrameBase;
3978 }
3979
3980 return DwarfFrameBase{DwarfFrameBase::Register, {FrameRegister}};
3981}
3982
3983namespace {
3984// Struct used by orderFrameObjects to help sort the stack objects.
3985struct X86FrameSortingObject {
3986 bool IsValid = false; // true if we care about this Object.
3987 unsigned ObjectIndex = 0; // Index of Object into MFI list.
3988 unsigned ObjectSize = 0; // Size of Object in bytes.
3989 Align ObjectAlignment = Align(1); // Alignment of Object in bytes.
3990 unsigned ObjectNumUses = 0; // Object static number of uses.
3991};
3992
3993// The comparison function we use for std::sort to order our local
3994// stack symbols. The current algorithm is to use an estimated
3995// "density". This takes into consideration the size and number of
3996// uses each object has in order to roughly minimize code size.
3997// So, for example, an object of size 16B that is referenced 5 times
3998// will get higher priority than 4 4B objects referenced 1 time each.
3999// It's not perfect and we may be able to squeeze a few more bytes out of
4000// it (for example : 0(esp) requires fewer bytes, symbols allocated at the
4001// fringe end can have special consideration, given their size is less
4002// important, etc.), but the algorithmic complexity grows too much to be
4003// worth the extra gains we get. This gets us pretty close.
4004// The final order leaves us with objects with highest priority going
4005// at the end of our list.
4006struct X86FrameSortingComparator {
4007 inline bool operator()(const X86FrameSortingObject &A,
4008 const X86FrameSortingObject &B) const {
4009 uint64_t DensityAScaled, DensityBScaled;
4010
4011 // For consistency in our comparison, all invalid objects are placed
4012 // at the end. This also allows us to stop walking when we hit the
4013 // first invalid item after it's all sorted.
4014 if (!A.IsValid)
4015 return false;
4016 if (!B.IsValid)
4017 return true;
4018
4019 // The density is calculated by doing :
4020 // (double)DensityA = A.ObjectNumUses / A.ObjectSize
4021 // (double)DensityB = B.ObjectNumUses / B.ObjectSize
4022 // Since this approach may cause inconsistencies in
4023 // the floating point <, >, == comparisons, depending on the floating
4024 // point model with which the compiler was built, we're going
4025 // to scale both sides by multiplying with
4026 // A.ObjectSize * B.ObjectSize. This ends up factoring away
4027 // the division and, with it, the need for any floating point
4028 // arithmetic.
4029 DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) *
4030 static_cast<uint64_t>(B.ObjectSize);
4031 DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) *
4032 static_cast<uint64_t>(A.ObjectSize);
4033
4034 // If the two densities are equal, prioritize highest alignment
4035 // objects. This allows for similar alignment objects
4036 // to be packed together (given the same density).
4037 // There's room for improvement here, also, since we can pack
4038 // similar alignment (different density) objects next to each
4039 // other to save padding. This will also require further
4040 // complexity/iterations, and the overall gain isn't worth it,
4041 // in general. Something to keep in mind, though.
4042 if (DensityAScaled == DensityBScaled)
4043 return A.ObjectAlignment < B.ObjectAlignment;
4044
4045 return DensityAScaled < DensityBScaled;
4046 }
4047};
4048} // namespace
4049
4050// Order the symbols in the local stack.
4051// We want to place the local stack objects in some sort of sensible order.
4052// The heuristic we use is to try and pack them according to static number
4053// of uses and size of object in order to minimize code size.
4055 const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
4056 const MachineFrameInfo &MFI = MF.getFrameInfo();
4057
4058 // Don't waste time if there's nothing to do.
4059 if (ObjectsToAllocate.empty())
4060 return;
4061
4062 // Create an array of all MFI objects. We won't need all of these
4063 // objects, but we're going to create a full array of them to make
4064 // it easier to index into when we're counting "uses" down below.
4065 // We want to be able to easily/cheaply access an object by simply
4066 // indexing into it, instead of having to search for it every time.
4067 std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd());
4068
4069 // Walk the objects we care about and mark them as such in our working
4070 // struct.
4071 for (auto &Obj : ObjectsToAllocate) {
4072 SortingObjects[Obj].IsValid = true;
4073 SortingObjects[Obj].ObjectIndex = Obj;
4074 SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj);
4075 // Set the size.
4076 int ObjectSize = MFI.getObjectSize(Obj);
4077 if (ObjectSize == 0)
4078 // Variable size. Just use 4.
4079 SortingObjects[Obj].ObjectSize = 4;
4080 else
4081 SortingObjects[Obj].ObjectSize = ObjectSize;
4082 }
4083
4084 // Count the number of uses for each object.
4085 for (auto &MBB : MF) {
4086 for (auto &MI : MBB) {
4087 if (MI.isDebugInstr())
4088 continue;
4089 for (const MachineOperand &MO : MI.operands()) {
4090 // Check to see if it's a local stack symbol.
4091 if (!MO.isFI())
4092 continue;
4093 int Index = MO.getIndex();
4094 // Check to see if it falls within our range, and is tagged
4095 // to require ordering.
4096 if (Index >= 0 && Index < MFI.getObjectIndexEnd() &&
4097 SortingObjects[Index].IsValid)
4098 SortingObjects[Index].ObjectNumUses++;
4099 }
4100 }
4101 }
4102
4103 // Sort the objects using X86FrameSortingAlgorithm (see its comment for
4104 // info).
4105 llvm::stable_sort(SortingObjects, X86FrameSortingComparator());
4106
4107 // Now modify the original list to represent the final order that
4108 // we want. The order will depend on whether we're going to access them
4109 // from the stack pointer or the frame pointer. For SP, the list should
4110 // end up with the END containing objects that we want with smaller offsets.
4111 // For FP, it should be flipped.
4112 int i = 0;
4113 for (auto &Obj : SortingObjects) {
4114 // All invalid items are sorted at the end, so it's safe to stop.
4115 if (!Obj.IsValid)
4116 break;
4117 ObjectsToAllocate[i++] = Obj.ObjectIndex;
4118 }
4119
4120 // Flip it if we're accessing off of the FP.
4121 if (!TRI->hasStackRealignment(MF) && hasFP(MF))
4122 std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end());
4123}
4124
4125unsigned
4127 // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
4128 unsigned Offset = 16;
4129 // RBP is immediately pushed.
4130 Offset += SlotSize;
4131 // All callee-saved registers are then pushed.
4132 Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
4133 // Every funclet allocates enough stack space for the largest outgoing call.
4134 Offset += getWinEHFuncletFrameSize(MF);
4135 return Offset;
4136}
4137
4139 MachineFunction &MF, RegScavenger *RS) const {
4140 // Mark the function as not having WinCFI. We will set it back to true in
4141 // emitPrologue if it gets called and emits CFI.
4142 MF.setHasWinCFI(false);
4143
4144 // If we are using Windows x64 CFI, ensure that the stack is always 8 byte
4145 // aligned. The format doesn't support misaligned stack adjustments.
4148
4149 // If this function isn't doing Win64-style C++ EH, we don't need to do
4150 // anything.
4151 if (STI.is64Bit() && MF.hasEHFunclets() &&
4154 adjustFrameForMsvcCxxEh(MF);
4155 }
4156}
4157
4158void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const {
4159 // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
4160 // relative to RSP after the prologue. Find the offset of the last fixed
4161 // object, so that we can allocate a slot immediately following it. If there
4162 // were no fixed objects, use offset -SlotSize, which is immediately after the
4163 // return address. Fixed objects have negative frame indices.
4164 MachineFrameInfo &MFI = MF.getFrameInfo();
4165 WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
4166 int64_t MinFixedObjOffset = -SlotSize;
4167 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I)
4168 MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
4169
4170 for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
4171 for (WinEHHandlerType &H : TBME.HandlerArray) {
4172 int FrameIndex = H.CatchObj.FrameIndex;
4173 if (FrameIndex != INT_MAX) {
4174 // Ensure alignment.
4175 unsigned Align = MFI.getObjectAlign(FrameIndex).value();
4176 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align;
4177 MinFixedObjOffset -= MFI.getObjectSize(FrameIndex);
4178 MFI.setObjectOffset(FrameIndex, MinFixedObjOffset);
4179 }
4180 }
4181 }
4182
4183 // Ensure alignment.
4184 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8;
4185 int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
4186 int UnwindHelpFI =
4187 MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false);
4188 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
4189
4190 // Store -2 into UnwindHelp on function entry. We have to scan forwards past
4191 // other frame setup instructions.
4192 MachineBasicBlock &MBB = MF.front();
4193 auto MBBI = MBB.begin();
4194 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
4195 ++MBBI;
4196
4198 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
4199 UnwindHelpFI)
4200 .addImm(-2);
4201}
4202
4204 MachineFunction &MF, RegScavenger *RS) const {
4205 auto *X86FI = MF.getInfo<X86MachineFunctionInfo>();
4206
4207 if (STI.is32Bit() && MF.hasEHFunclets())
4209 // We have emitted prolog and epilog. Don't need stack pointer saving
4210 // instruction any more.
4211 if (MachineInstr *MI = X86FI->getStackPtrSaveMI()) {
4212 MI->eraseFromParent();
4213 X86FI->setStackPtrSaveMI(nullptr);
4214 }
4215}
4216
4218 MachineFunction &MF) const {
4219 // 32-bit functions have to restore stack pointers when control is transferred
4220 // back to the parent function. These blocks are identified as eh pads that
4221 // are not funclet entries.
4222 bool IsSEH = isAsynchronousEHPersonality(
4224 for (MachineBasicBlock &MBB : MF) {
4225 bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry();
4226 if (NeedsRestore)
4228 /*RestoreSP=*/IsSEH);
4229 }
4230}
unsigned const MachineRegisterInfo * MRI
static bool isFuncletReturnInstr(const MachineInstr &MI)
static const uint64_t kSplitStackAvailable
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Given that RA is a live value
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static cl::opt< int > PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), cl::init(4096), cl::Hidden)
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static bool isTailCallOpcode(unsigned Opc)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Module.h This file contains the declarations for the Module class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static bool is64Bit(const char *name)
static unsigned calculateSetFPREG(uint64_t SPAdjust)
static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary)
GetScratchRegister - Get a temp register for performing work in the segmented stack and the Erlang/Hi...
static unsigned getADDriOpcode(bool IsLP64)
static unsigned getPUSH2Opcode(const X86Subtarget &ST)
static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm)
static unsigned getLEArOpcode(bool IsLP64)
static unsigned getSUBriOpcode(bool IsLP64)
static bool flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB)
Check if the flags need to be preserved before the terminators.
static bool isOpcodeRep(unsigned Opcode)
Return true if an opcode is part of the REP group of instructions.
static unsigned getANDriOpcode(bool IsLP64, int64_t Imm)
static bool isEAXLiveIn(MachineBasicBlock &MBB)
static unsigned getADDrrOpcode(bool IsLP64)
static bool HasNestArgument(const MachineFunction *MF)
static unsigned getPOPOpcode(const X86Subtarget &ST)
static unsigned getPOP2Opcode(const X86Subtarget &ST)
static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD, const StringRef LiteralName)
Lookup an ERTS parameter in the !hipe.literals named metadata node.
static bool blockEndIsUnreachable(const MachineBasicBlock &MBB, MachineBasicBlock::const_iterator MBBI)
static unsigned getSUBrrOpcode(bool IsLP64)
static unsigned getPUSHOpcode(const X86Subtarget &ST)
static const unsigned FramePtr
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
reverse_iterator rend() const
Definition: ArrayRef.h:157
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
reverse_iterator rbegin() const
Definition: ArrayRef.h:156
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
BitVector & reset()
Definition: BitVector.h:392
BitVector & set()
Definition: BitVector.h:351
iterator_range< const_set_bits_iterator > set_bits() const
Definition: BitVector.h:140
static BranchProbability getOne()
static BranchProbability getZero()
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:274
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition: Function.h:868
Constant * getPersonalityFn() const
Get the personality function associated with this function.
Definition: Function.cpp:1963
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:350
size_t arg_size() const
Definition: Function.h:864
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition: Function.h:673
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:225
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:719
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:793
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:565
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:600
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:573
static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_restore says that the rule for Register is now the same as it was at the beginning of the functi...
Definition: MCDwarf.h:633
OpType getOperation() const
Definition: MCDwarf.h:680
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:558
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int Adjustment, SMLoc Loc={})
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Definition: MCDwarf.h:581
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition: MCDwarf.h:664
static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int Size, SMLoc Loc={})
A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE.
Definition: MCDwarf.h:670
const MCObjectFileInfo * getObjectFileInfo() const
Definition: MCContext.h:416
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:414
MCSection * getCompactUnwindSection() const
MCRegAliasIterator enumerates all registers aliasing Reg.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
Metadata node.
Definition: Metadata.h:1067
A single uniqued string.
Definition: Metadata.h:720
StringRef getString() const
Definition: Metadata.cpp:610
Machine Value Type.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isReturnBlock() const
Convenience function that returns true if the block ends in a return instruction.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
@ LQR_Live
Register is known to be (at least partially) live.
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
bool isCleanupFuncletEntry() const
Returns true if this is the entry block of a cleanup funclet.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool adjustsStack() const
Return true if this function adjusts the stack – e.g., when calling another function.
void ensureMaxAlignment(Align Alignment)
Make sure the function is at least Align bytes aligned.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
void setObjectOffset(int ObjectIdx, int64_t SPOffset)
Set the stack frame offset of the specified object.
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool hasOpaqueSPAdjustment() const
Returns true if the function contains opaque dynamic stack adjustments.
void setCVBytesOfCalleeSavedRegisters(unsigned S)
int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
void setOffsetAdjustment(int Adj)
Set the correction for frame offsets.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasCopyImplyingStackAdjustment() const
Returns true if the function contains operations which will lower down to instructions which manipula...
bool hasStackObjects() const
Return true if there are any stack objects in this function.
int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset, bool IsImmutable=false)
Create a spill slot at a fixed location on the stack.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
void setStackSize(uint64_t Size)
Set the size of the stack.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
bool needsFrameMoves() const
True if this function needs frame moves for debug or exceptions.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
bool callsUnwindInit() const
void push_front(MachineBasicBlock *MBB)
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const std::vector< LandingPadInfo > & getLandingPads() const
Return a reference to the landing pad info for the current function.
MachineModuleInfo & getMMI() const
bool shouldSplitStack() const
Should we be emitting segmented stack stuff for the function.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:572
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:498
unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
const Module * getModule() const
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
bool isLiveIn(Register Reg) const
NamedMDNode * getNamedMetadata(const Twine &Name) const
Return the first NamedMDNode in the module with the specified name.
Definition: Module.cpp:262
unsigned getCodeViewFlag() const
Returns the CodeView Version by checking module flags.
Definition: Module.cpp:585
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
iterator end() const
Definition: ArrayRef.h:357
iterator begin() const
Definition: ArrayRef.h:356
A tuple of MDNodes.
Definition: Metadata.h:1729
MDNode * getOperand(unsigned i) const
Definition: Metadata.cpp:1381
unsigned getNumOperands() const
Definition: Metadata.cpp:1377
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:65
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:254
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:49
static StackOffset getFixed(int64_t Fixed)
Definition: TypeSize.h:42
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
static constexpr size_t npos
Definition: StringRef.h:52
Information about stack frame layout on the target.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
int getOffsetOfLocalArea() const
getOffsetOfLocalArea - This method returns the offset of the local area from the stack pointer on ent...
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
const Triple & getTargetTriple() const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
SwiftAsyncFramePointerMode SwiftAsyncFramePointer
Control when and how the Swift async frame pointer bit should be set.
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual Register getFrameRegister(const MachineFunction &MF) const =0
Debug information queries.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
bool isOSWindows() const
Tests whether the OS is Windows.
Definition: Triple.h:624
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Definition: Triple.h:558
Value wrapper in the Metadata hierarchy.
Definition: Metadata.h:450
Value * getValue() const
Definition: Metadata.h:490
bool has128ByteRedZone(const MachineFunction &MF) const
Return true if the function has a redzone (accessible bytes past the frame of the top of stack functi...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
bool needsFrameIndexResolution(const MachineFunction &MF) const override
X86FrameLowering(const X86Subtarget &STI, MaybeAlign StackAlignOverride)
const X86RegisterInfo * TRI
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
MachineBasicBlock::iterator restoreWin32EHStackPointers(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool RestoreSP=false) const
Sets up EBP and optionally ESI based on the incoming EBP value.
int getInitialCFAOffset(const MachineFunction &MF) const override
Return initial CFA offset value i.e.
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a prologue for the target.
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
void emitStackProbe(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, std::optional< MachineFunction::DebugInstrOperandPair > InstrNum=std::nullopt) const
Emit target stack probe code.
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsPrologue) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
StackOffset getFrameIndexReferenceSP(const MachineFunction &MF, int FI, Register &SPReg, int Adjustment) const
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const override
Replace a StackProbe inline-stub with the actual probe code inline.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
const X86InstrInfo & TII
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &DL, int64_t NumBytes, bool InEpilogue) const
Emit a series of instructions to increment / decrement the stack pointer by a constant value.
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a epilogue for the target.
bool Is64Bit
Is64Bit implies that x86_64 instructions are available.
Register getInitialCFARegister(const MachineFunction &MF) const override
Return initial CFA register value i.e.
bool Uses64BitFramePtr
True if the 64-bit frame or stack pointer should be used.
unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Adjust the prologue to have the function use segmented stacks.
DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const override
Return the frame base information to be encoded in the DWARF subprogram debug info.
void emitCalleeSavedFrameMovesFullCFA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const override
Emits Dwarf Info specifying offsets of callee saved registers and frame pointer.
int getWin64EHFrameIndexRef(const MachineFunction &MF, int FI, Register &SPReg) const
bool canUseLEAForSPInEpilogue(const MachineFunction &MF) const
Check that LEA can be used on SP in an epilogue sequence for MF.
bool stackProbeFunctionModifiesSP() const override
Does the stack probe function call return with a modified stack pointer?
void orderFrameObjects(const MachineFunction &MF, SmallVectorImpl< int > &ObjectsToAllocate) const override
Order the symbols in the local stack.
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst, MachineInstr::MIFlag Flag=MachineInstr::NoFlags) const
Wraps up getting a CFI index and building a MachineInstr for it.
int mergeSPUpdates(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, bool doMergeWithPrevious) const
Check the instruction before/after the passed instruction.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
StackOffset getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, Register &FrameReg, bool IgnoreSPUpdates) const override
Same as getFrameIndexReference, except that the stack pointer (as opposed to the frame pointer) will ...
void restoreWinEHStackPointersInParent(MachineFunction &MF) const
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
void adjustForHiPEPrologue(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Erlang programs may need a special prologue to handle the stack size they might need at runtime.
const X86Subtarget & STI
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects=true) const override
int64_t getFrameAdjustment(const MachineInstr &I) const
Returns the stack pointer adjustment that happens inside the frame setup..destroy sequence (e....
Definition: X86InstrInfo.h:215
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
bool isCandidateForPush2Pop2(Register Reg) const
void setRestoreBasePointer(const MachineFunction *MF)
DenseMap< int, unsigned > & getWinEHXMMSlotInfo()
MachineInstr * getStackPtrSaveMI() const
AMXProgModelEnum getAMXProgModel() const
void addCandidateForPush2Pop2(Register Reg)
void setStackPtrSaveMI(MachineInstr *MI)
void setCalleeSavedFrameSize(unsigned bytes)
bool hasBasePointer(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const
findDeadCallerSavedReg - Return a caller-saved register that isn't live when it reaches the "return" ...
Register getStackRegister() const
unsigned getSlotSize() const
Register getFramePtr() const
Returns physical register used as frame pointer.
Register getBaseRegister() const
bool isOSWindows() const
Definition: X86Subtarget.h:322
const X86TargetLowering * getTargetLowering() const override
Definition: X86Subtarget.h:118
bool isTargetDragonFly() const
Definition: X86Subtarget.h:282
bool isTargetWindowsMSVC() const
Definition: X86Subtarget.h:300
bool isTarget64BitILP32() const
Is this x86_64 with the ILP32 programming model (x32 ABI)?
Definition: X86Subtarget.h:173
bool isTargetDarwin() const
Definition: X86Subtarget.h:280
bool isTargetWin64() const
Definition: X86Subtarget.h:324
bool isTarget64BitLP64() const
Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
Definition: X86Subtarget.h:178
bool swiftAsyncContextIsDynamicallySet() const
Return whether FrameLowering should always set the "extended frame present" bit in FP,...
Definition: X86Subtarget.h:386
bool isTargetWindowsCoreCLR() const
Definition: X86Subtarget.h:304
const X86InstrInfo * getInstrInfo() const override
Definition: X86Subtarget.h:122
bool isCallingConvWin64(CallingConv::ID CC) const
Definition: X86Subtarget.h:337
bool isTargetFreeBSD() const
Definition: X86Subtarget.h:281
bool isTargetNaCl64() const
Definition: X86Subtarget.h:296
bool isTargetWin32() const
Definition: X86Subtarget.h:326
bool useIndirectThunkCalls() const
Definition: X86Subtarget.h:218
bool isTargetLinux() const
Definition: X86Subtarget.h:290
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
StringRef getStackProbeSymbolName(const MachineFunction &MF) const override
Returns the name of the symbol used to emit stack probes or the empty string if not applicable.
bool hasStackProbeSymbol(const MachineFunction &MF) const override
Returns true if stack probing through a function call is requested.
unsigned getStackProbeSize(const MachineFunction &MF) const
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint16_t StackAdjustment(const RuntimeFunction &RF)
StackAdjustment - calculated stack adjustment in words.
Definition: ARMWinEH.h:199
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition: CallingConv.h:53
@ X86_INTR
x86 hardware interrupt context.
Definition: CallingConv.h:173
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ X86_FastCall
'fast' analog of X86_StdCall.
Definition: CallingConv.h:103
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:387
CallingConvention
Definition: Dwarf.h:738
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
@ DwarfCFI
DWARF-like instruction based exceptions.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:547
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
static const MachineInstrBuilder & addRegOffset(const MachineInstrBuilder &MIB, unsigned Reg, bool isKill, int Offset)
addRegOffset - This function is used to add a memory reference of the form [Reg + Offset],...
@ Always
Always set the bit.
@ DeploymentBased
Determine whether to set the bit statically or dynamically based on the deployment target.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a SLEB128 value to an output stream.
Definition: LEB128.h:23
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition: LEB128.h:80
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
Definition: LivePhysRegs.h:215
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Pair of physical register and lane mask.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
union llvm::TargetFrameLowering::DwarfFrameBase::@241 Location
enum llvm::TargetFrameLowering::DwarfFrameBase::FrameBaseKind Kind
SmallVector< WinEHTryBlockMapEntry, 4 > TryBlockMap
Definition: WinEHFuncInfo.h:97
SmallVector< WinEHHandlerType, 1 > HandlerArray
Definition: WinEHFuncInfo.h:76