LLVM 20.0.0git
X86FrameLowering.cpp
Go to the documentation of this file.
1//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86FrameLowering.h"
15#include "X86InstrBuilder.h"
16#include "X86InstrInfo.h"
18#include "X86Subtarget.h"
19#include "X86TargetMachine.h"
20#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/DataLayout.h"
31#include "llvm/IR/Function.h"
32#include "llvm/IR/Module.h"
33#include "llvm/MC/MCAsmInfo.h"
35#include "llvm/MC/MCSymbol.h"
36#include "llvm/Support/Debug.h"
37#include "llvm/Support/LEB128.h"
39#include <cstdlib>
40
41#define DEBUG_TYPE "x86-fl"
42
43STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
44STATISTIC(NumFrameExtraProbe,
45 "Number of extra stack probes generated in prologue");
46STATISTIC(NumFunctionUsingPush2Pop2, "Number of funtions using push2/pop2");
47
48using namespace llvm;
49
51 MaybeAlign StackAlignOverride)
52 : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(),
53 STI.is64Bit() ? -8 : -4),
54 STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
55 // Cache a bunch of frame-related predicates for this subtarget.
57 Is64Bit = STI.is64Bit();
59 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
62}
63
65 return !MF.getFrameInfo().hasVarSizedObjects() &&
66 !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
67 !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
68}
69
70/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
71/// call frame pseudos can be simplified. Having a FP, as in the default
72/// implementation, is not sufficient here since we can't always use it.
73/// Use a more nuanced condition.
75 const MachineFunction &MF) const {
76 return hasReservedCallFrame(MF) ||
77 MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
78 (hasFP(MF) && !TRI->hasStackRealignment(MF)) ||
80}
81
82// needsFrameIndexResolution - Do we need to perform FI resolution for
83// this function. Normally, this is required only when the function
84// has any stack objects. However, FI resolution actually has another job,
85// not apparent from the title - it resolves callframesetup/destroy
86// that were not simplified earlier.
87// So, this is required for x86 functions that have push sequences even
88// when there are no stack objects.
90 const MachineFunction &MF) const {
91 return MF.getFrameInfo().hasStackObjects() ||
92 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
93}
94
95/// hasFP - Return true if the specified function should have a dedicated frame
96/// pointer register. This is true if the function has variable sized allocas
97/// or if frame pointer elimination is disabled.
99 const MachineFrameInfo &MFI = MF.getFrameInfo();
100 return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
101 TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
105 MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
106 MFI.hasStackMap() || MFI.hasPatchPoint() ||
107 (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment()));
108}
109
110static unsigned getSUBriOpcode(bool IsLP64) {
111 return IsLP64 ? X86::SUB64ri32 : X86::SUB32ri;
112}
113
114static unsigned getADDriOpcode(bool IsLP64) {
115 return IsLP64 ? X86::ADD64ri32 : X86::ADD32ri;
116}
117
118static unsigned getSUBrrOpcode(bool IsLP64) {
119 return IsLP64 ? X86::SUB64rr : X86::SUB32rr;
120}
121
122static unsigned getADDrrOpcode(bool IsLP64) {
123 return IsLP64 ? X86::ADD64rr : X86::ADD32rr;
124}
125
126static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
127 return IsLP64 ? X86::AND64ri32 : X86::AND32ri;
128}
129
130static unsigned getLEArOpcode(bool IsLP64) {
131 return IsLP64 ? X86::LEA64r : X86::LEA32r;
132}
133
134static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) {
135 if (Use64BitReg) {
136 if (isUInt<32>(Imm))
137 return X86::MOV32ri64;
138 if (isInt<32>(Imm))
139 return X86::MOV64ri32;
140 return X86::MOV64ri;
141 }
142 return X86::MOV32ri;
143}
144
145// Push-Pop Acceleration (PPX) hint is used to indicate that the POP reads the
146// value written by the PUSH from the stack. The processor tracks these marked
147// instructions internally and fast-forwards register data between matching PUSH
148// and POP instructions, without going through memory or through the training
149// loop of the Fast Store Forwarding Predictor (FSFP). Instead, a more efficient
150// memory-renaming optimization can be used.
151//
152// The PPX hint is purely a performance hint. Instructions with this hint have
153// the same functional semantics as those without. PPX hints set by the
154// compiler that violate the balancing rule may turn off the PPX optimization,
155// but they will not affect program semantics.
156//
157// Hence, PPX is used for balanced spill/reloads (Exceptions and setjmp/longjmp
158// are not considered).
159//
160// PUSH2 and POP2 are instructions for (respectively) pushing/popping 2
161// GPRs at a time to/from the stack.
162static unsigned getPUSHOpcode(const X86Subtarget &ST) {
163 return ST.is64Bit() ? (ST.hasPPX() ? X86::PUSHP64r : X86::PUSH64r)
164 : X86::PUSH32r;
165}
166static unsigned getPOPOpcode(const X86Subtarget &ST) {
167 return ST.is64Bit() ? (ST.hasPPX() ? X86::POPP64r : X86::POP64r)
168 : X86::POP32r;
169}
170static unsigned getPUSH2Opcode(const X86Subtarget &ST) {
171 return ST.hasPPX() ? X86::PUSH2P : X86::PUSH2;
172}
173static unsigned getPOP2Opcode(const X86Subtarget &ST) {
174 return ST.hasPPX() ? X86::POP2P : X86::POP2;
175}
176
179 unsigned Reg = RegMask.PhysReg;
180
181 if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
182 Reg == X86::AH || Reg == X86::AL)
183 return true;
184 }
185
186 return false;
187}
188
189/// Check if the flags need to be preserved before the terminators.
190/// This would be the case, if the eflags is live-in of the region
191/// composed by the terminators or live-out of that region, without
192/// being defined by a terminator.
193static bool
195 for (const MachineInstr &MI : MBB.terminators()) {
196 bool BreakNext = false;
197 for (const MachineOperand &MO : MI.operands()) {
198 if (!MO.isReg())
199 continue;
200 Register Reg = MO.getReg();
201 if (Reg != X86::EFLAGS)
202 continue;
203
204 // This terminator needs an eflags that is not defined
205 // by a previous another terminator:
206 // EFLAGS is live-in of the region composed by the terminators.
207 if (!MO.isDef())
208 return true;
209 // This terminator defines the eflags, i.e., we don't need to preserve it.
210 // However, we still need to check this specific terminator does not
211 // read a live-in value.
212 BreakNext = true;
213 }
214 // We found a definition of the eflags, no need to preserve them.
215 if (BreakNext)
216 return false;
217 }
218
219 // None of the terminators use or define the eflags.
220 // Check if they are live-out, that would imply we need to preserve them.
221 for (const MachineBasicBlock *Succ : MBB.successors())
222 if (Succ->isLiveIn(X86::EFLAGS))
223 return true;
224
225 return false;
226}
227
228/// emitSPUpdate - Emit a series of instructions to increment / decrement the
229/// stack pointer by a constant value.
232 const DebugLoc &DL, int64_t NumBytes,
233 bool InEpilogue) const {
234 bool isSub = NumBytes < 0;
235 uint64_t Offset = isSub ? -NumBytes : NumBytes;
238
239 uint64_t Chunk = (1LL << 31) - 1;
240
244 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
245
246 // It's ok to not take into account large chunks when probing, as the
247 // allocation is split in smaller chunks anyway.
248 if (EmitInlineStackProbe && !InEpilogue) {
249
250 // This pseudo-instruction is going to be expanded, potentially using a
251 // loop, by inlineStackProbe().
252 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)).addImm(Offset);
253 return;
254 } else if (Offset > Chunk) {
255 // Rather than emit a long series of instructions for large offsets,
256 // load the offset into a register and do one sub/add
257 unsigned Reg = 0;
258 unsigned Rax = (unsigned)(Is64Bit ? X86::RAX : X86::EAX);
259
260 if (isSub && !isEAXLiveIn(MBB))
261 Reg = Rax;
262 else
264
265 unsigned AddSubRROpc =
267 if (Reg) {
269 .addImm(Offset)
270 .setMIFlag(Flag);
271 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr)
273 .addReg(Reg);
274 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
275 return;
276 } else if (Offset > 8 * Chunk) {
277 // If we would need more than 8 add or sub instructions (a >16GB stack
278 // frame), it's worth spilling RAX to materialize this immediate.
279 // pushq %rax
280 // movabsq +-$Offset+-SlotSize, %rax
281 // addq %rsp, %rax
282 // xchg %rax, (%rsp)
283 // movq (%rsp), %rsp
284 assert(Is64Bit && "can't have 32-bit 16GB stack frame");
285 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
287 .setMIFlag(Flag);
288 // Subtract is not commutative, so negate the offset and always use add.
289 // Subtract 8 less and add 8 more to account for the PUSH we just did.
290 if (isSub)
291 Offset = -(Offset - SlotSize);
292 else
295 .addImm(Offset)
296 .setMIFlag(Flag);
297 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax)
298 .addReg(Rax)
300 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
301 // Exchange the new SP in RAX with the top of the stack.
303 BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax),
304 StackPtr, false, 0);
305 // Load new SP from the top of the stack into RSP.
306 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr),
307 StackPtr, false, 0);
308 return;
309 }
310 }
311
312 while (Offset) {
313 uint64_t ThisVal = std::min(Offset, Chunk);
314 if (ThisVal == SlotSize) {
315 // Use push / pop for slot sized adjustments as a size optimization. We
316 // need to find a dead register when using pop.
317 unsigned Reg = isSub ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
319 if (Reg) {
320 unsigned Opc = isSub ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
321 : (Is64Bit ? X86::POP64r : X86::POP32r);
322 BuildMI(MBB, MBBI, DL, TII.get(Opc))
323 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub))
324 .setMIFlag(Flag);
325 Offset -= ThisVal;
326 continue;
327 }
328 }
329
330 BuildStackAdjustment(MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue)
331 .setMIFlag(Flag);
332
333 Offset -= ThisVal;
334 }
335}
336
337MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
339 const DebugLoc &DL, int64_t Offset, bool InEpilogue) const {
340 assert(Offset != 0 && "zero offset stack adjustment requested");
341
342 // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue
343 // is tricky.
344 bool UseLEA;
345 if (!InEpilogue) {
346 // Check if inserting the prologue at the beginning
347 // of MBB would require to use LEA operations.
348 // We need to use LEA operations if EFLAGS is live in, because
349 // it means an instruction will read it before it gets defined.
350 UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS);
351 } else {
352 // If we can use LEA for SP but we shouldn't, check that none
353 // of the terminators uses the eflags. Otherwise we will insert
354 // a ADD that will redefine the eflags and break the condition.
355 // Alternatively, we could move the ADD, but this may not be possible
356 // and is an optimization anyway.
358 if (UseLEA && !STI.useLeaForSP())
360 // If that assert breaks, that means we do not do the right thing
361 // in canUseAsEpilogue.
363 "We shouldn't have allowed this insertion point");
364 }
365
367 if (UseLEA) {
370 StackPtr),
371 StackPtr, false, Offset);
372 } else {
373 bool IsSub = Offset < 0;
374 uint64_t AbsOffset = IsSub ? -Offset : Offset;
375 const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr)
377 MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
379 .addImm(AbsOffset);
380 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
381 }
382 return MI;
383}
384
387 bool doMergeWithPrevious) const {
388 if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
389 (!doMergeWithPrevious && MBBI == MBB.end()))
390 return 0;
391
392 MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
393
395 // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI
396 // instruction, and that there are no DBG_VALUE or other instructions between
397 // ADD/SUB/LEA and its corresponding CFI instruction.
398 /* TODO: Add support for the case where there are multiple CFI instructions
399 below the ADD/SUB/LEA, e.g.:
400 ...
401 add
402 cfi_def_cfa_offset
403 cfi_offset
404 ...
405 */
406 if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
407 PI = std::prev(PI);
408
409 unsigned Opc = PI->getOpcode();
410 int Offset = 0;
411
412 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
413 PI->getOperand(0).getReg() == StackPtr) {
414 assert(PI->getOperand(1).getReg() == StackPtr);
415 Offset = PI->getOperand(2).getImm();
416 } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
417 PI->getOperand(0).getReg() == StackPtr &&
418 PI->getOperand(1).getReg() == StackPtr &&
419 PI->getOperand(2).getImm() == 1 &&
420 PI->getOperand(3).getReg() == X86::NoRegister &&
421 PI->getOperand(5).getReg() == X86::NoRegister) {
422 // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
423 Offset = PI->getOperand(4).getImm();
424 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
425 PI->getOperand(0).getReg() == StackPtr) {
426 assert(PI->getOperand(1).getReg() == StackPtr);
427 Offset = -PI->getOperand(2).getImm();
428 } else
429 return 0;
430
431 PI = MBB.erase(PI);
432 if (PI != MBB.end() && PI->isCFIInstruction()) {
433 auto CIs = MBB.getParent()->getFrameInstructions();
434 MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()];
437 PI = MBB.erase(PI);
438 }
439 if (!doMergeWithPrevious)
441
442 return Offset;
443}
444
447 const DebugLoc &DL,
448 const MCCFIInstruction &CFIInst,
449 MachineInstr::MIFlag Flag) const {
451 unsigned CFIIndex = MF.addFrameInst(CFIInst);
452
454 MF.getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true);
455
456 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
457 .addCFIIndex(CFIIndex)
458 .setMIFlag(Flag);
459}
460
461/// Emits Dwarf Info specifying offsets of callee saved registers and
462/// frame pointer. This is called only when basic block sections are enabled.
466 if (!hasFP(MF)) {
468 return;
469 }
472 const Register MachineFramePtr =
474 : FramePtr;
475 unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true);
476 // Offset = space for return address + size of the frame pointer itself.
477 int64_t Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4);
479 MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset));
481}
482
485 const DebugLoc &DL, bool IsPrologue) const {
487 MachineFrameInfo &MFI = MF.getFrameInfo();
490
491 // Add callee saved registers to move list.
492 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
493
494 // Calculate offsets.
495 for (const CalleeSavedInfo &I : CSI) {
496 int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
497 Register Reg = I.getReg();
498 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
499
500 if (IsPrologue) {
501 if (X86FI->getStackPtrSaveMI()) {
502 // +2*SlotSize because there is return address and ebp at the bottom
503 // of the stack.
504 // | retaddr |
505 // | ebp |
506 // | |<--ebp
507 Offset += 2 * SlotSize;
508 SmallString<64> CfaExpr;
509 CfaExpr.push_back(dwarf::DW_CFA_expression);
510 uint8_t buffer[16];
511 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
512 CfaExpr.push_back(2);
514 const Register MachineFramePtr =
517 : FramePtr;
518 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
519 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
520 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
522 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
524 } else {
526 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
527 }
528 } else {
530 MCCFIInstruction::createRestore(nullptr, DwarfReg));
531 }
532 }
533 if (auto *MI = X86FI->getStackPtrSaveMI()) {
534 int FI = MI->getOperand(1).getIndex();
535 int64_t Offset = MFI.getObjectOffset(FI) + 2 * SlotSize;
536 SmallString<64> CfaExpr;
538 const Register MachineFramePtr =
541 : FramePtr;
542 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
543 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
544 uint8_t buffer[16];
545 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
546 CfaExpr.push_back(dwarf::DW_OP_deref);
547
548 SmallString<64> DefCfaExpr;
549 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
550 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
551 DefCfaExpr.append(CfaExpr.str());
552 // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref
554 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
556 }
557}
558
559void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
560 MachineBasicBlock &MBB) const {
561 const MachineFunction &MF = *MBB.getParent();
562
563 // Insertion point.
565
566 // Fake a debug loc.
567 DebugLoc DL;
568 if (MBBI != MBB.end())
569 DL = MBBI->getDebugLoc();
570
571 // Zero out FP stack if referenced. Do this outside of the loop below so that
572 // it's done only once.
573 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
574 for (MCRegister Reg : RegsToZero.set_bits()) {
575 if (!X86::RFP80RegClass.contains(Reg))
576 continue;
577
578 unsigned NumFPRegs = ST.is64Bit() ? 8 : 7;
579 for (unsigned i = 0; i != NumFPRegs; ++i)
580 BuildMI(MBB, MBBI, DL, TII.get(X86::LD_F0));
581
582 for (unsigned i = 0; i != NumFPRegs; ++i)
583 BuildMI(MBB, MBBI, DL, TII.get(X86::ST_FPrr)).addReg(X86::ST0);
584 break;
585 }
586
587 // For GPRs, we only care to clear out the 32-bit register.
588 BitVector GPRsToZero(TRI->getNumRegs());
589 for (MCRegister Reg : RegsToZero.set_bits())
590 if (TRI->isGeneralPurposeRegister(MF, Reg)) {
591 GPRsToZero.set(getX86SubSuperRegister(Reg, 32));
592 RegsToZero.reset(Reg);
593 }
594
595 // Zero out the GPRs first.
596 for (MCRegister Reg : GPRsToZero.set_bits())
598
599 // Zero out the remaining registers.
600 for (MCRegister Reg : RegsToZero.set_bits())
602}
603
606 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
607 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
610 if (InProlog) {
611 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING))
612 .addImm(0 /* no explicit stack size */);
613 } else {
614 emitStackProbeInline(MF, MBB, MBBI, DL, false);
615 }
616 } else {
617 emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum);
618 }
619}
620
622 return STI.isOSWindows() && !STI.isTargetWin64();
623}
624
626 MachineBasicBlock &PrologMBB) const {
627 auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
628 return MI.getOpcode() == X86::STACKALLOC_W_PROBING;
629 });
630 if (Where != PrologMBB.end()) {
631 DebugLoc DL = PrologMBB.findDebugLoc(Where);
632 emitStackProbeInline(MF, PrologMBB, Where, DL, true);
633 Where->eraseFromParent();
634 }
635}
636
637void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
640 const DebugLoc &DL,
641 bool InProlog) const {
643 if (STI.isTargetWindowsCoreCLR() && STI.is64Bit())
644 emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog);
645 else
646 emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog);
647}
648
649void X86FrameLowering::emitStackProbeInlineGeneric(
651 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
652 MachineInstr &AllocWithProbe = *MBBI;
653 uint64_t Offset = AllocWithProbe.getOperand(0).getImm();
654
657 assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&
658 "different expansion expected for CoreCLR 64 bit");
659
660 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
661 uint64_t ProbeChunk = StackProbeSize * 8;
662
663 uint64_t MaxAlign =
664 TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
665
666 // Synthesize a loop or unroll it, depending on the number of iterations.
667 // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
668 // between the unaligned rsp and current rsp.
669 if (Offset > ProbeChunk) {
670 emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
671 MaxAlign % StackProbeSize);
672 } else {
673 emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
674 MaxAlign % StackProbeSize);
675 }
676}
677
678void X86FrameLowering::emitStackProbeInlineGenericBlock(
681 uint64_t AlignOffset) const {
682
683 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
684 const bool HasFP = hasFP(MF);
687 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
688 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
689
690 uint64_t CurrentOffset = 0;
691
692 assert(AlignOffset < StackProbeSize);
693
694 // If the offset is so small it fits within a page, there's nothing to do.
695 if (StackProbeSize < Offset + AlignOffset) {
696
697 uint64_t StackAdjustment = StackProbeSize - AlignOffset;
698 BuildStackAdjustment(MBB, MBBI, DL, -StackAdjustment, /*InEpilogue=*/false)
700 if (!HasFP && NeedsDwarfCFI) {
701 BuildCFI(
702 MBB, MBBI, DL,
703 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
704 }
705
706 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
708 StackPtr, false, 0)
709 .addImm(0)
711 NumFrameExtraProbe++;
712 CurrentOffset = StackProbeSize - AlignOffset;
713 }
714
715 // For the next N - 1 pages, just probe. I tried to take advantage of
716 // natural probes but it implies much more logic and there was very few
717 // interesting natural probes to interleave.
718 while (CurrentOffset + StackProbeSize < Offset) {
719 BuildStackAdjustment(MBB, MBBI, DL, -StackProbeSize, /*InEpilogue=*/false)
721
722 if (!HasFP && NeedsDwarfCFI) {
723 BuildCFI(
724 MBB, MBBI, DL,
725 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize));
726 }
727 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
729 StackPtr, false, 0)
730 .addImm(0)
732 NumFrameExtraProbe++;
733 CurrentOffset += StackProbeSize;
734 }
735
736 // No need to probe the tail, it is smaller than a Page.
737 uint64_t ChunkSize = Offset - CurrentOffset;
738 if (ChunkSize == SlotSize) {
739 // Use push for slot sized adjustments as a size optimization,
740 // like emitSPUpdate does when not probing.
741 unsigned Reg = Is64Bit ? X86::RAX : X86::EAX;
742 unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
743 BuildMI(MBB, MBBI, DL, TII.get(Opc))
746 } else {
747 BuildStackAdjustment(MBB, MBBI, DL, -ChunkSize, /*InEpilogue=*/false)
749 }
750 // No need to adjust Dwarf CFA offset here, the last position of the stack has
751 // been defined
752}
753
754void X86FrameLowering::emitStackProbeInlineGenericLoop(
757 uint64_t AlignOffset) const {
758 assert(Offset && "null offset");
759
760 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
762 "Inline stack probe loop will clobber live EFLAGS.");
763
764 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
765 const bool HasFP = hasFP(MF);
768 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
769 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
770
771 if (AlignOffset) {
772 if (AlignOffset < StackProbeSize) {
773 // Perform a first smaller allocation followed by a probe.
774 BuildStackAdjustment(MBB, MBBI, DL, -AlignOffset, /*InEpilogue=*/false)
776
777 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
779 StackPtr, false, 0)
780 .addImm(0)
782 NumFrameExtraProbe++;
783 Offset -= AlignOffset;
784 }
785 }
786
787 // Synthesize a loop
788 NumFrameLoopProbe++;
789 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
790
791 MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB);
792 MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB);
793
795 MF.insert(MBBIter, testMBB);
796 MF.insert(MBBIter, tailMBB);
797
798 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
799 : Is64Bit ? X86::R11D
800 : X86::EAX;
801
802 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
805
806 // save loop bound
807 {
808 const unsigned BoundOffset = alignDown(Offset, StackProbeSize);
809 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
810 BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
811 .addReg(FinalStackProbed)
812 .addImm(BoundOffset)
814
815 // while in the loop, use loop-invariant reg for CFI,
816 // instead of the stack pointer, which changes during the loop
817 if (!HasFP && NeedsDwarfCFI) {
818 // x32 uses the same DWARF register numbers as x86-64,
819 // so there isn't a register number for r11d, we must use r11 instead
820 const Register DwarfFinalStackProbed =
822 ? Register(getX86SubSuperRegister(FinalStackProbed, 64))
823 : FinalStackProbed;
824
827 nullptr, TRI->getDwarfRegNum(DwarfFinalStackProbed, true)));
829 MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset));
830 }
831 }
832
833 // allocate a page
834 BuildStackAdjustment(*testMBB, testMBB->end(), DL, -StackProbeSize,
835 /*InEpilogue=*/false)
837
838 // touch the page
839 addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
841 StackPtr, false, 0)
842 .addImm(0)
844
845 // cmp with stack pointer bound
846 BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
848 .addReg(FinalStackProbed)
850
851 // jump
852 BuildMI(testMBB, DL, TII.get(X86::JCC_1))
853 .addMBB(testMBB)
856 testMBB->addSuccessor(testMBB);
857 testMBB->addSuccessor(tailMBB);
858
859 // BB management
860 tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end());
862 MBB.addSuccessor(testMBB);
863
864 // handle tail
865 const uint64_t TailOffset = Offset % StackProbeSize;
866 MachineBasicBlock::iterator TailMBBIter = tailMBB->begin();
867 if (TailOffset) {
868 BuildStackAdjustment(*tailMBB, TailMBBIter, DL, -TailOffset,
869 /*InEpilogue=*/false)
871 }
872
873 // after the loop, switch back to stack pointer for CFI
874 if (!HasFP && NeedsDwarfCFI) {
875 // x32 uses the same DWARF register numbers as x86-64,
876 // so there isn't a register number for esp, we must use rsp instead
877 const Register DwarfStackPtr =
881
882 BuildCFI(*tailMBB, TailMBBIter, DL,
884 nullptr, TRI->getDwarfRegNum(DwarfStackPtr, true)));
885 }
886
887 // Update Live In information
888 fullyRecomputeLiveIns({tailMBB, testMBB});
889}
890
891void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
893 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
895 assert(STI.is64Bit() && "different expansion needed for 32 bit");
896 assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
898 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
899
900 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
902 "Inline stack probe loop will clobber live EFLAGS.");
903
904 // RAX contains the number of bytes of desired stack adjustment.
905 // The handling here assumes this value has already been updated so as to
906 // maintain stack alignment.
907 //
908 // We need to exit with RSP modified by this amount and execute suitable
909 // page touches to notify the OS that we're growing the stack responsibly.
910 // All stack probing must be done without modifying RSP.
911 //
912 // MBB:
913 // SizeReg = RAX;
914 // ZeroReg = 0
915 // CopyReg = RSP
916 // Flags, TestReg = CopyReg - SizeReg
917 // FinalReg = !Flags.Ovf ? TestReg : ZeroReg
918 // LimitReg = gs magic thread env access
919 // if FinalReg >= LimitReg goto ContinueMBB
920 // RoundBB:
921 // RoundReg = page address of FinalReg
922 // LoopMBB:
923 // LoopReg = PHI(LimitReg,ProbeReg)
924 // ProbeReg = LoopReg - PageSize
925 // [ProbeReg] = 0
926 // if (ProbeReg > RoundReg) goto LoopMBB
927 // ContinueMBB:
928 // RSP = RSP - RAX
929 // [rest of original MBB]
930
931 // Set up the new basic blocks
932 MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB);
933 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
934 MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB);
935
936 MachineFunction::iterator MBBIter = std::next(MBB.getIterator());
937 MF.insert(MBBIter, RoundMBB);
938 MF.insert(MBBIter, LoopMBB);
939 MF.insert(MBBIter, ContinueMBB);
940
941 // Split MBB and move the tail portion down to ContinueMBB.
942 MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI);
943 ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end());
945
946 // Some useful constants
947 const int64_t ThreadEnvironmentStackLimit = 0x10;
948 const int64_t PageSize = 0x1000;
949 const int64_t PageMask = ~(PageSize - 1);
950
951 // Registers we need. For the normal case we use virtual
952 // registers. For the prolog expansion we use RAX, RCX and RDX.
954 const TargetRegisterClass *RegClass = &X86::GR64RegClass;
955 const Register
956 SizeReg = InProlog ? X86::RAX : MRI.createVirtualRegister(RegClass),
957 ZeroReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
958 CopyReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
959 TestReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
960 FinalReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
961 RoundedReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
962 LimitReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
963 JoinReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
964 ProbeReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass);
965
966 // SP-relative offsets where we can save RCX and RDX.
967 int64_t RCXShadowSlot = 0;
968 int64_t RDXShadowSlot = 0;
969
970 // If inlining in the prolog, save RCX and RDX.
971 if (InProlog) {
972 // Compute the offsets. We need to account for things already
973 // pushed onto the stack at this point: return address, frame
974 // pointer (if used), and callee saves.
976 const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
977 const bool HasFP = hasFP(MF);
978
979 // Check if we need to spill RCX and/or RDX.
980 // Here we assume that no earlier prologue instruction changes RCX and/or
981 // RDX, so checking the block live-ins is enough.
982 const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX);
983 const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX);
984 int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
985 // Assign the initial slot to both registers, then change RDX's slot if both
986 // need to be spilled.
987 if (IsRCXLiveIn)
988 RCXShadowSlot = InitSlot;
989 if (IsRDXLiveIn)
990 RDXShadowSlot = InitSlot;
991 if (IsRDXLiveIn && IsRCXLiveIn)
992 RDXShadowSlot += 8;
993 // Emit the saves if needed.
994 if (IsRCXLiveIn)
995 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
996 RCXShadowSlot)
997 .addReg(X86::RCX);
998 if (IsRDXLiveIn)
999 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
1000 RDXShadowSlot)
1001 .addReg(X86::RDX);
1002 } else {
1003 // Not in the prolog. Copy RAX to a virtual reg.
1004 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
1005 }
1006
1007 // Add code to MBB to check for overflow and set the new target stack pointer
1008 // to zero if so.
1009 BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg)
1010 .addReg(ZeroReg, RegState::Undef)
1011 .addReg(ZeroReg, RegState::Undef);
1012 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP);
1013 BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
1014 .addReg(CopyReg)
1015 .addReg(SizeReg);
1016 BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg)
1017 .addReg(TestReg)
1018 .addReg(ZeroReg)
1020
1021 // FinalReg now holds final stack pointer value, or zero if
1022 // allocation would overflow. Compare against the current stack
1023 // limit from the thread environment block. Note this limit is the
1024 // lowest touched page on the stack, not the point at which the OS
1025 // will cause an overflow exception, so this is just an optimization
1026 // to avoid unnecessarily touching pages that are below the current
1027 // SP but already committed to the stack by the OS.
1028 BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg)
1029 .addReg(0)
1030 .addImm(1)
1031 .addReg(0)
1032 .addImm(ThreadEnvironmentStackLimit)
1033 .addReg(X86::GS);
1034 BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
1035 // Jump if the desired stack pointer is at or above the stack limit.
1036 BuildMI(&MBB, DL, TII.get(X86::JCC_1))
1037 .addMBB(ContinueMBB)
1039
1040 // Add code to roundMBB to round the final stack pointer to a page boundary.
1041 RoundMBB->addLiveIn(FinalReg);
1042 BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
1043 .addReg(FinalReg)
1044 .addImm(PageMask);
1045 BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB);
1046
1047 // LimitReg now holds the current stack limit, RoundedReg page-rounded
1048 // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
1049 // and probe until we reach RoundedReg.
1050 if (!InProlog) {
1051 BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg)
1052 .addReg(LimitReg)
1053 .addMBB(RoundMBB)
1054 .addReg(ProbeReg)
1055 .addMBB(LoopMBB);
1056 }
1057
1058 LoopMBB->addLiveIn(JoinReg);
1059 addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
1060 false, -PageSize);
1061
1062 // Probe by storing a byte onto the stack.
1063 BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi))
1064 .addReg(ProbeReg)
1065 .addImm(1)
1066 .addReg(0)
1067 .addImm(0)
1068 .addReg(0)
1069 .addImm(0);
1070
1071 LoopMBB->addLiveIn(RoundedReg);
1072 BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
1073 .addReg(RoundedReg)
1074 .addReg(ProbeReg);
1075 BuildMI(LoopMBB, DL, TII.get(X86::JCC_1))
1076 .addMBB(LoopMBB)
1078
1079 MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
1080
1081 // If in prolog, restore RDX and RCX.
1082 if (InProlog) {
1083 if (RCXShadowSlot) // It means we spilled RCX in the prologue.
1084 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1085 TII.get(X86::MOV64rm), X86::RCX),
1086 X86::RSP, false, RCXShadowSlot);
1087 if (RDXShadowSlot) // It means we spilled RDX in the prologue.
1088 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1089 TII.get(X86::MOV64rm), X86::RDX),
1090 X86::RSP, false, RDXShadowSlot);
1091 }
1092
1093 // Now that the probing is done, add code to continueMBB to update
1094 // the stack pointer for real.
1095 ContinueMBB->addLiveIn(SizeReg);
1096 BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
1097 .addReg(X86::RSP)
1098 .addReg(SizeReg);
1099
1100 // Add the control flow edges we need.
1101 MBB.addSuccessor(ContinueMBB);
1102 MBB.addSuccessor(RoundMBB);
1103 RoundMBB->addSuccessor(LoopMBB);
1104 LoopMBB->addSuccessor(ContinueMBB);
1105 LoopMBB->addSuccessor(LoopMBB);
1106
1107 // Mark all the instructions added to the prolog as frame setup.
1108 if (InProlog) {
1109 for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) {
1110 BeforeMBBI->setFlag(MachineInstr::FrameSetup);
1111 }
1112 for (MachineInstr &MI : *RoundMBB) {
1114 }
1115 for (MachineInstr &MI : *LoopMBB) {
1117 }
1118 for (MachineInstr &MI :
1119 llvm::make_range(ContinueMBB->begin(), ContinueMBBI)) {
1121 }
1122 }
1123}
1124
1125void X86FrameLowering::emitStackProbeCall(
1127 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
1128 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
1129 bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
1130
1131 // FIXME: Add indirect thunk support and remove this.
1132 if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
1133 report_fatal_error("Emitting stack probe calls on 64-bit with the large "
1134 "code model and indirect thunks not yet implemented.");
1135
1136 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
1138 "Stack probe calls will clobber live EFLAGS.");
1139
1140 unsigned CallOp;
1141 if (Is64Bit)
1142 CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
1143 else
1144 CallOp = X86::CALLpcrel32;
1145
1147
1149 MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI);
1150
1151 // All current stack probes take AX and SP as input, clobber flags, and
1152 // preserve all registers. x86_64 probes leave RSP unmodified.
1154 // For the large code model, we have to call through a register. Use R11,
1155 // as it is scratch in all supported calling conventions.
1156 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
1158 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
1159 } else {
1160 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp))
1162 }
1163
1164 unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX;
1165 unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP;
1171
1172 MachineInstr *ModInst = CI;
1173 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1174 // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
1175 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
1176 // themselves. They also does not clobber %rax so we can reuse it when
1177 // adjusting %rsp.
1178 // All other platforms do not specify a particular ABI for the stack probe
1179 // function, so we arbitrarily define it to not adjust %esp/%rsp itself.
1180 ModInst =
1182 .addReg(SP)
1183 .addReg(AX);
1184 }
1185
1186 // DebugInfo variable locations -- if there's an instruction number for the
1187 // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that
1188 // modifies SP.
1189 if (InstrNum) {
1190 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1191 // Label destination operand of the subtract.
1192 MF.makeDebugValueSubstitution(*InstrNum,
1193 {ModInst->getDebugInstrNum(), 0});
1194 } else {
1195 // Label the call. The operand number is the penultimate operand, zero
1196 // based.
1197 unsigned SPDefOperand = ModInst->getNumOperands() - 2;
1199 *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand});
1200 }
1201 }
1202
1203 if (InProlog) {
1204 // Apply the frame setup flag to all inserted instrs.
1205 for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
1206 ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
1207 }
1208}
1209
1210static unsigned calculateSetFPREG(uint64_t SPAdjust) {
1211 // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
1212 // and might require smaller successive adjustments.
1213 const uint64_t Win64MaxSEHOffset = 128;
1214 uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
1215 // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
1216 return SEHFrameOffset & -16;
1217}
1218
1219// If we're forcing a stack realignment we can't rely on just the frame
1220// info, we need to know the ABI stack alignment as well in case we
1221// have a call out. Otherwise just make sure we have some alignment - we'll
1222// go with the minimum SlotSize.
1224X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
1225 const MachineFrameInfo &MFI = MF.getFrameInfo();
1226 Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment.
1227 Align StackAlign = getStackAlign();
1228 bool HasRealign = MF.getFunction().hasFnAttribute("stackrealign");
1229 if (HasRealign) {
1230 if (MFI.hasCalls())
1231 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
1232 else if (MaxAlign < SlotSize)
1233 MaxAlign = Align(SlotSize);
1234 }
1235
1237 if (HasRealign)
1238 MaxAlign = (MaxAlign > 16) ? MaxAlign : Align(16);
1239 else
1240 MaxAlign = Align(16);
1241 }
1242 return MaxAlign.value();
1243}
1244
1245void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
1247 const DebugLoc &DL, unsigned Reg,
1248 uint64_t MaxAlign) const {
1249 uint64_t Val = -MaxAlign;
1250 unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
1251
1252 MachineFunction &MF = *MBB.getParent();
1254 const X86TargetLowering &TLI = *STI.getTargetLowering();
1255 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
1256 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
1257
1258 // We want to make sure that (in worst case) less than StackProbeSize bytes
1259 // are not probed after the AND. This assumption is used in
1260 // emitStackProbeInlineGeneric.
1261 if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
1262 {
1263 NumFrameLoopProbe++;
1264 MachineBasicBlock *entryMBB =
1266 MachineBasicBlock *headMBB =
1268 MachineBasicBlock *bodyMBB =
1270 MachineBasicBlock *footMBB =
1272
1274 MF.insert(MBBIter, entryMBB);
1275 MF.insert(MBBIter, headMBB);
1276 MF.insert(MBBIter, bodyMBB);
1277 MF.insert(MBBIter, footMBB);
1278 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
1279 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
1280 : Is64Bit ? X86::R11D
1281 : X86::EAX;
1282
1283 // Setup entry block
1284 {
1285
1286 entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI);
1287 BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
1290 MachineInstr *MI =
1291 BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed)
1292 .addReg(FinalStackProbed)
1293 .addImm(Val)
1295
1296 // The EFLAGS implicit def is dead.
1297 MI->getOperand(3).setIsDead();
1298
1299 BuildMI(entryMBB, DL,
1300 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1301 .addReg(FinalStackProbed)
1304 BuildMI(entryMBB, DL, TII.get(X86::JCC_1))
1305 .addMBB(&MBB)
1308 entryMBB->addSuccessor(headMBB);
1309 entryMBB->addSuccessor(&MBB);
1310 }
1311
1312 // Loop entry block
1313
1314 {
1315 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1316 BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
1318 .addImm(StackProbeSize)
1320
1321 BuildMI(headMBB, DL,
1322 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1324 .addReg(FinalStackProbed)
1326
1327 // jump to the footer if StackPtr < FinalStackProbed
1328 BuildMI(headMBB, DL, TII.get(X86::JCC_1))
1329 .addMBB(footMBB)
1332
1333 headMBB->addSuccessor(bodyMBB);
1334 headMBB->addSuccessor(footMBB);
1335 }
1336
1337 // setup loop body
1338 {
1339 addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc))
1341 StackPtr, false, 0)
1342 .addImm(0)
1344
1345 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1346 BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
1348 .addImm(StackProbeSize)
1350
1351 // cmp with stack pointer bound
1352 BuildMI(bodyMBB, DL,
1353 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1354 .addReg(FinalStackProbed)
1357
1358 // jump back while FinalStackProbed < StackPtr
1359 BuildMI(bodyMBB, DL, TII.get(X86::JCC_1))
1360 .addMBB(bodyMBB)
1363 bodyMBB->addSuccessor(bodyMBB);
1364 bodyMBB->addSuccessor(footMBB);
1365 }
1366
1367 // setup loop footer
1368 {
1369 BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr)
1370 .addReg(FinalStackProbed)
1372 addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc))
1374 StackPtr, false, 0)
1375 .addImm(0)
1377 footMBB->addSuccessor(&MBB);
1378 }
1379
1380 fullyRecomputeLiveIns({footMBB, bodyMBB, headMBB, &MBB});
1381 }
1382 } else {
1383 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
1384 .addReg(Reg)
1385 .addImm(Val)
1387
1388 // The EFLAGS implicit def is dead.
1389 MI->getOperand(3).setIsDead();
1390 }
1391}
1392
1394 // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
1395 // clobbered by any interrupt handler.
1396 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1397 "MF used frame lowering for wrong subtarget");
1398 const Function &Fn = MF.getFunction();
1399 const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
1400 return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone);
1401}
1402
1403/// Return true if we need to use the restricted Windows x64 prologue and
1404/// epilogue code patterns that can be described with WinCFI (.seh_*
1405/// directives).
1406bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const {
1407 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
1408}
1409
1410bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
1411 return !isWin64Prologue(MF) && MF.needsFrameMoves();
1412}
1413
1414/// Return true if an opcode is part of the REP group of instructions
1415static bool isOpcodeRep(unsigned Opcode) {
1416 switch (Opcode) {
1417 case X86::REPNE_PREFIX:
1418 case X86::REP_MOVSB_32:
1419 case X86::REP_MOVSB_64:
1420 case X86::REP_MOVSD_32:
1421 case X86::REP_MOVSD_64:
1422 case X86::REP_MOVSQ_32:
1423 case X86::REP_MOVSQ_64:
1424 case X86::REP_MOVSW_32:
1425 case X86::REP_MOVSW_64:
1426 case X86::REP_PREFIX:
1427 case X86::REP_STOSB_32:
1428 case X86::REP_STOSB_64:
1429 case X86::REP_STOSD_32:
1430 case X86::REP_STOSD_64:
1431 case X86::REP_STOSQ_32:
1432 case X86::REP_STOSQ_64:
1433 case X86::REP_STOSW_32:
1434 case X86::REP_STOSW_64:
1435 return true;
1436 default:
1437 break;
1438 }
1439 return false;
1440}
1441
1442/// emitPrologue - Push callee-saved registers onto the stack, which
1443/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
1444/// space for local variables. Also emit labels used by the exception handler to
1445/// generate the exception handling frames.
1446
1447/*
1448 Here's a gist of what gets emitted:
1449
1450 ; Establish frame pointer, if needed
1451 [if needs FP]
1452 push %rbp
1453 .cfi_def_cfa_offset 16
1454 .cfi_offset %rbp, -16
1455 .seh_pushreg %rpb
1456 mov %rsp, %rbp
1457 .cfi_def_cfa_register %rbp
1458
1459 ; Spill general-purpose registers
1460 [for all callee-saved GPRs]
1461 pushq %<reg>
1462 [if not needs FP]
1463 .cfi_def_cfa_offset (offset from RETADDR)
1464 .seh_pushreg %<reg>
1465
1466 ; If the required stack alignment > default stack alignment
1467 ; rsp needs to be re-aligned. This creates a "re-alignment gap"
1468 ; of unknown size in the stack frame.
1469 [if stack needs re-alignment]
1470 and $MASK, %rsp
1471
1472 ; Allocate space for locals
1473 [if target is Windows and allocated space > 4096 bytes]
1474 ; Windows needs special care for allocations larger
1475 ; than one page.
1476 mov $NNN, %rax
1477 call ___chkstk_ms/___chkstk
1478 sub %rax, %rsp
1479 [else]
1480 sub $NNN, %rsp
1481
1482 [if needs FP]
1483 .seh_stackalloc (size of XMM spill slots)
1484 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
1485 [else]
1486 .seh_stackalloc NNN
1487
1488 ; Spill XMMs
1489 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
1490 ; they may get spilled on any platform, if the current function
1491 ; calls @llvm.eh.unwind.init
1492 [if needs FP]
1493 [for all callee-saved XMM registers]
1494 movaps %<xmm reg>, -MMM(%rbp)
1495 [for all callee-saved XMM registers]
1496 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
1497 ; i.e. the offset relative to (%rbp - SEHFrameOffset)
1498 [else]
1499 [for all callee-saved XMM registers]
1500 movaps %<xmm reg>, KKK(%rsp)
1501 [for all callee-saved XMM registers]
1502 .seh_savexmm %<xmm reg>, KKK
1503
1504 .seh_endprologue
1505
1506 [if needs base pointer]
1507 mov %rsp, %rbx
1508 [if needs to restore base pointer]
1509 mov %rsp, -MMM(%rbp)
1510
1511 ; Emit CFI info
1512 [if needs FP]
1513 [for all callee-saved registers]
1514 .cfi_offset %<reg>, (offset from %rbp)
1515 [else]
1516 .cfi_def_cfa_offset (offset from RETADDR)
1517 [for all callee-saved registers]
1518 .cfi_offset %<reg>, (offset from %rsp)
1519
1520 Notes:
1521 - .seh directives are emitted only for Windows 64 ABI
1522 - .cv_fpo directives are emitted on win32 when emitting CodeView
1523 - .cfi directives are emitted for all other ABIs
1524 - for 32-bit code, substitute %e?? registers for %r??
1525*/
1526
1528 MachineBasicBlock &MBB) const {
1529 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1530 "MF used frame lowering for wrong subtarget");
1532 MachineFrameInfo &MFI = MF.getFrameInfo();
1533 const Function &Fn = MF.getFunction();
1535 uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
1536 uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate.
1537 bool IsFunclet = MBB.isEHFuncletEntry();
1539 if (Fn.hasPersonalityFn())
1540 Personality = classifyEHPersonality(Fn.getPersonalityFn());
1541 bool FnHasClrFunclet =
1542 MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
1543 bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
1544 bool HasFP = hasFP(MF);
1545 bool IsWin64Prologue = isWin64Prologue(MF);
1546 bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry();
1547 // FIXME: Emit FPO data for EH funclets.
1548 bool NeedsWinFPO = !IsFunclet && STI.isTargetWin32() &&
1550 bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO;
1551 bool NeedsDwarfCFI = needsDwarfCFI(MF);
1553 const Register MachineFramePtr =
1555 : FramePtr;
1556 Register BasePtr = TRI->getBaseRegister();
1557 bool HasWinCFI = false;
1558
1559 // Debug location must be unknown since the first debug location is used
1560 // to determine the end of the prologue.
1561 DebugLoc DL;
1562 Register ArgBaseReg;
1563
1564 // Emit extra prolog for argument stack slot reference.
1565 if (auto *MI = X86FI->getStackPtrSaveMI()) {
1566 // MI is lea instruction that created in X86ArgumentStackSlotPass.
1567 // Creat extra prolog for stack realignment.
1568 ArgBaseReg = MI->getOperand(0).getReg();
1569 // leal 4(%esp), %basereg
1570 // .cfi_def_cfa %basereg, 0
1571 // andl $-128, %esp
1572 // pushl -4(%basereg)
1573 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::LEA64r : X86::LEA32r),
1574 ArgBaseReg)
1576 .addImm(1)
1577 .addUse(X86::NoRegister)
1579 .addUse(X86::NoRegister)
1581 if (NeedsDwarfCFI) {
1582 // .cfi_def_cfa %basereg, 0
1583 unsigned DwarfStackPtr = TRI->getDwarfRegNum(ArgBaseReg, true);
1584 BuildCFI(MBB, MBBI, DL,
1585 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, 0),
1587 }
1588 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1589 int64_t Offset = -(int64_t)SlotSize;
1590 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm))
1591 .addReg(ArgBaseReg)
1592 .addImm(1)
1593 .addReg(X86::NoRegister)
1594 .addImm(Offset)
1595 .addReg(X86::NoRegister)
1597 }
1598
1599 // Space reserved for stack-based arguments when making a (ABI-guaranteed)
1600 // tail call.
1601 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
1602 if (TailCallArgReserveSize && IsWin64Prologue)
1603 report_fatal_error("Can't handle guaranteed tail call under win64 yet");
1604
1605 const bool EmitStackProbeCall =
1607 unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
1608
1609 if (HasFP && X86FI->hasSwiftAsyncContext()) {
1613 // The special symbol below is absolute and has a *value* suitable to be
1614 // combined with the frame pointer directly.
1615 BuildMI(MBB, MBBI, DL, TII.get(X86::OR64rm), MachineFramePtr)
1616 .addUse(MachineFramePtr)
1617 .addUse(X86::RIP)
1618 .addImm(1)
1619 .addUse(X86::NoRegister)
1620 .addExternalSymbol("swift_async_extendedFramePointerFlags",
1622 .addUse(X86::NoRegister);
1623 break;
1624 }
1625 [[fallthrough]];
1626
1628 assert(
1629 !IsWin64Prologue &&
1630 "win64 prologue does not set the bit 60 in the saved frame pointer");
1631 BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr)
1632 .addUse(MachineFramePtr)
1633 .addImm(60)
1635 break;
1636
1638 break;
1639 }
1640 }
1641
1642 // Re-align the stack on 64-bit if the x86-interrupt calling convention is
1643 // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
1644 // stack alignment.
1646 Fn.arg_size() == 2) {
1647 StackSize += 8;
1648 MFI.setStackSize(StackSize);
1649
1650 // Update the stack pointer by pushing a register. This is the instruction
1651 // emitted that would be end up being emitted by a call to `emitSPUpdate`.
1652 // Hard-coding the update to a push avoids emitting a second
1653 // `STACKALLOC_W_PROBING` instruction in the save block: We know that stack
1654 // probing isn't needed anyways for an 8-byte update.
1655 // Pushing a register leaves us in a similar situation to a regular
1656 // function call where we know that the address at (rsp-8) is writeable.
1657 // That way we avoid any off-by-ones with stack probing for additional
1658 // stack pointer updates later on.
1659 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1660 .addReg(X86::RAX, RegState::Undef)
1662 }
1663
1664 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
1665 // function, and use up to 128 bytes of stack space, don't have a frame
1666 // pointer, calls, or dynamic alloca then we do not need to adjust the
1667 // stack pointer (we fit in the Red Zone). We also check that we don't
1668 // push and pop from the stack.
1669 if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) &&
1670 !MFI.hasVarSizedObjects() && // No dynamic alloca.
1671 !MFI.adjustsStack() && // No calls.
1672 !EmitStackProbeCall && // No stack probes.
1673 !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
1674 !MF.shouldSplitStack()) { // Regular stack
1675 uint64_t MinSize =
1677 if (HasFP)
1678 MinSize += SlotSize;
1679 X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
1680 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
1681 MFI.setStackSize(StackSize);
1682 }
1683
1684 // Insert stack pointer adjustment for later moving of return addr. Only
1685 // applies to tail call optimized functions where the callee argument stack
1686 // size is bigger than the callers.
1687 if (TailCallArgReserveSize != 0) {
1688 BuildStackAdjustment(MBB, MBBI, DL, -(int)TailCallArgReserveSize,
1689 /*InEpilogue=*/false)
1691 }
1692
1693 // Mapping for machine moves:
1694 //
1695 // DST: VirtualFP AND
1696 // SRC: VirtualFP => DW_CFA_def_cfa_offset
1697 // ELSE => DW_CFA_def_cfa
1698 //
1699 // SRC: VirtualFP AND
1700 // DST: Register => DW_CFA_def_cfa_register
1701 //
1702 // ELSE
1703 // OFFSET < 0 => DW_CFA_offset_extended_sf
1704 // REG < 64 => DW_CFA_offset + Reg
1705 // ELSE => DW_CFA_offset_extended
1706
1707 uint64_t NumBytes = 0;
1708 int stackGrowth = -SlotSize;
1709
1710 // Find the funclet establisher parameter
1711 Register Establisher = X86::NoRegister;
1712 if (IsClrFunclet)
1713 Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX;
1714 else if (IsFunclet)
1715 Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX;
1716
1717 if (IsWin64Prologue && IsFunclet && !IsClrFunclet) {
1718 // Immediately spill establisher into the home slot.
1719 // The runtime cares about this.
1720 // MOV64mr %rdx, 16(%rsp)
1721 unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
1722 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16)
1723 .addReg(Establisher)
1725 MBB.addLiveIn(Establisher);
1726 }
1727
1728 if (HasFP) {
1729 assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved");
1730
1731 // Calculate required stack adjustment.
1732 uint64_t FrameSize = StackSize - SlotSize;
1733 NumBytes =
1734 FrameSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1735
1736 // Callee-saved registers are pushed on stack before the stack is realigned.
1737 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
1738 NumBytes = alignTo(NumBytes, MaxAlign);
1739
1740 // Save EBP/RBP into the appropriate stack slot.
1741 BuildMI(MBB, MBBI, DL,
1743 .addReg(MachineFramePtr, RegState::Kill)
1745
1746 if (NeedsDwarfCFI && !ArgBaseReg.isValid()) {
1747 // Mark the place where EBP/RBP was saved.
1748 // Define the current CFA rule to use the provided offset.
1749 assert(StackSize);
1750 BuildCFI(MBB, MBBI, DL,
1752 nullptr, -2 * stackGrowth + (int)TailCallArgReserveSize),
1754
1755 // Change the rule for the FramePtr to be an "offset" rule.
1756 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1757 BuildCFI(MBB, MBBI, DL,
1758 MCCFIInstruction::createOffset(nullptr, DwarfFramePtr,
1759 2 * stackGrowth -
1760 (int)TailCallArgReserveSize),
1762 }
1763
1764 if (NeedsWinCFI) {
1765 HasWinCFI = true;
1766 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1769 }
1770
1771 if (!IsFunclet) {
1772 if (X86FI->hasSwiftAsyncContext()) {
1773 assert(!IsWin64Prologue &&
1774 "win64 prologue does not store async context right below rbp");
1775 const auto &Attrs = MF.getFunction().getAttributes();
1776
1777 // Before we update the live frame pointer we have to ensure there's a
1778 // valid (or null) asynchronous context in its slot just before FP in
1779 // the frame record, so store it now.
1780 if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) {
1781 // We have an initial context in r14, store it just before the frame
1782 // pointer.
1783 MBB.addLiveIn(X86::R14);
1784 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1785 .addReg(X86::R14)
1787 } else {
1788 // No initial context, store null so that there's no pointer that
1789 // could be misused.
1790 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i32))
1791 .addImm(0)
1793 }
1794
1795 if (NeedsWinCFI) {
1796 HasWinCFI = true;
1797 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1798 .addImm(X86::R14)
1800 }
1801
1802 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr)
1803 .addUse(X86::RSP)
1804 .addImm(1)
1805 .addUse(X86::NoRegister)
1806 .addImm(8)
1807 .addUse(X86::NoRegister)
1809 BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri32), X86::RSP)
1810 .addUse(X86::RSP)
1811 .addImm(8)
1813 }
1814
1815 if (!IsWin64Prologue && !IsFunclet) {
1816 // Update EBP with the new base value.
1817 if (!X86FI->hasSwiftAsyncContext())
1818 BuildMI(MBB, MBBI, DL,
1819 TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
1820 FramePtr)
1823
1824 if (NeedsDwarfCFI) {
1825 if (ArgBaseReg.isValid()) {
1826 SmallString<64> CfaExpr;
1827 CfaExpr.push_back(dwarf::DW_CFA_expression);
1828 uint8_t buffer[16];
1829 unsigned DwarfReg = TRI->getDwarfRegNum(MachineFramePtr, true);
1830 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
1831 CfaExpr.push_back(2);
1832 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
1833 CfaExpr.push_back(0);
1834 // DW_CFA_expression: reg5 DW_OP_breg5 +0
1835 BuildCFI(MBB, MBBI, DL,
1836 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
1838 } else {
1839 // Mark effective beginning of when frame pointer becomes valid.
1840 // Define the current CFA to use the EBP/RBP register.
1841 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1842 BuildCFI(
1843 MBB, MBBI, DL,
1844 MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr),
1846 }
1847 }
1848
1849 if (NeedsWinFPO) {
1850 // .cv_fpo_setframe $FramePtr
1851 HasWinCFI = true;
1852 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
1854 .addImm(0)
1856 }
1857 }
1858 }
1859 } else {
1860 assert(!IsFunclet && "funclets without FPs not yet implemented");
1861 NumBytes =
1862 StackSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1863 }
1864
1865 // Update the offset adjustment, which is mainly used by codeview to translate
1866 // from ESP to VFRAME relative local variable offsets.
1867 if (!IsFunclet) {
1868 if (HasFP && TRI->hasStackRealignment(MF))
1869 MFI.setOffsetAdjustment(-NumBytes);
1870 else
1871 MFI.setOffsetAdjustment(-StackSize);
1872 }
1873
1874 // For EH funclets, only allocate enough space for outgoing calls. Save the
1875 // NumBytes value that we would've used for the parent frame.
1876 unsigned ParentFrameNumBytes = NumBytes;
1877 if (IsFunclet)
1878 NumBytes = getWinEHFuncletFrameSize(MF);
1879
1880 // Skip the callee-saved push instructions.
1881 bool PushedRegs = false;
1882 int StackOffset = 2 * stackGrowth;
1884 auto IsCSPush = [&](const MachineBasicBlock::iterator &MBBI) {
1885 if (MBBI == MBB.end() || !MBBI->getFlag(MachineInstr::FrameSetup))
1886 return false;
1887 unsigned Opc = MBBI->getOpcode();
1888 return Opc == X86::PUSH32r || Opc == X86::PUSH64r || Opc == X86::PUSHP64r ||
1889 Opc == X86::PUSH2 || Opc == X86::PUSH2P;
1890 };
1891
1892 while (IsCSPush(MBBI)) {
1893 PushedRegs = true;
1894 Register Reg = MBBI->getOperand(0).getReg();
1895 LastCSPush = MBBI;
1896 ++MBBI;
1897 unsigned Opc = LastCSPush->getOpcode();
1898
1899 if (!HasFP && NeedsDwarfCFI) {
1900 // Mark callee-saved push instruction.
1901 // Define the current CFA rule to use the provided offset.
1902 assert(StackSize);
1903 // Compared to push, push2 introduces more stack offset (one more
1904 // register).
1905 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1906 StackOffset += stackGrowth;
1907 BuildCFI(MBB, MBBI, DL,
1910 StackOffset += stackGrowth;
1911 }
1912
1913 if (NeedsWinCFI) {
1914 HasWinCFI = true;
1915 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1916 .addImm(Reg)
1918 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1919 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1920 .addImm(LastCSPush->getOperand(1).getReg())
1922 }
1923 }
1924
1925 // Realign stack after we pushed callee-saved registers (so that we'll be
1926 // able to calculate their offsets from the frame pointer).
1927 // Don't do this for Win64, it needs to realign the stack after the prologue.
1928 if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) &&
1929 !ArgBaseReg.isValid()) {
1930 assert(HasFP && "There should be a frame pointer if stack is realigned.");
1931 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1932
1933 if (NeedsWinCFI) {
1934 HasWinCFI = true;
1935 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign))
1936 .addImm(MaxAlign)
1938 }
1939 }
1940
1941 // If there is an SUB32ri of ESP immediately before this instruction, merge
1942 // the two. This can be the case when tail call elimination is enabled and
1943 // the callee has more arguments then the caller.
1944 NumBytes -= mergeSPUpdates(MBB, MBBI, true);
1945
1946 // Adjust stack pointer: ESP -= numbytes.
1947
1948 // Windows and cygwin/mingw require a prologue helper routine when allocating
1949 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
1950 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
1951 // stack and adjust the stack pointer in one go. The 64-bit version of
1952 // __chkstk is only responsible for probing the stack. The 64-bit prologue is
1953 // responsible for adjusting the stack pointer. Touching the stack at 4K
1954 // increments is necessary to ensure that the guard pages used by the OS
1955 // virtual memory manager are allocated in correct sequence.
1956 uint64_t AlignedNumBytes = NumBytes;
1957 if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF))
1958 AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
1959 if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) {
1960 assert(!X86FI->getUsesRedZone() &&
1961 "The Red Zone is not accounted for in stack probes");
1962
1963 // Check whether EAX is livein for this block.
1964 bool isEAXAlive = isEAXLiveIn(MBB);
1965
1966 if (isEAXAlive) {
1967 if (Is64Bit) {
1968 // Save RAX
1969 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1970 .addReg(X86::RAX, RegState::Kill)
1972 } else {
1973 // Save EAX
1974 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
1975 .addReg(X86::EAX, RegState::Kill)
1977 }
1978 }
1979
1980 if (Is64Bit) {
1981 // Handle the 64-bit Windows ABI case where we need to call __chkstk.
1982 // Function prologue is responsible for adjusting the stack pointer.
1983 int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes;
1984 BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Alloc)), X86::RAX)
1985 .addImm(Alloc)
1987 } else {
1988 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
1989 // We'll also use 4 already allocated bytes for EAX.
1990 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
1991 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
1993 }
1994
1995 // Call __chkstk, __chkstk_ms, or __alloca.
1996 emitStackProbe(MF, MBB, MBBI, DL, true);
1997
1998 if (isEAXAlive) {
1999 // Restore RAX/EAX
2001 if (Is64Bit)
2002 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX),
2003 StackPtr, false, NumBytes - 8);
2004 else
2005 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX),
2006 StackPtr, false, NumBytes - 4);
2007 MI->setFlag(MachineInstr::FrameSetup);
2008 MBB.insert(MBBI, MI);
2009 }
2010 } else if (NumBytes) {
2011 emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false);
2012 }
2013
2014 if (NeedsWinCFI && NumBytes) {
2015 HasWinCFI = true;
2016 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
2017 .addImm(NumBytes)
2019 }
2020
2021 int SEHFrameOffset = 0;
2022 unsigned SPOrEstablisher;
2023 if (IsFunclet) {
2024 if (IsClrFunclet) {
2025 // The establisher parameter passed to a CLR funclet is actually a pointer
2026 // to the (mostly empty) frame of its nearest enclosing funclet; we have
2027 // to find the root function establisher frame by loading the PSPSym from
2028 // the intermediate frame.
2029 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2030 MachinePointerInfo NoInfo;
2031 MBB.addLiveIn(Establisher);
2032 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher),
2033 Establisher, false, PSPSlotOffset)
2036 ;
2037 // Save the root establisher back into the current funclet's (mostly
2038 // empty) frame, in case a sub-funclet or the GC needs it.
2039 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr,
2040 false, PSPSlotOffset)
2041 .addReg(Establisher)
2043 NoInfo,
2046 }
2047 SPOrEstablisher = Establisher;
2048 } else {
2049 SPOrEstablisher = StackPtr;
2050 }
2051
2052 if (IsWin64Prologue && HasFP) {
2053 // Set RBP to a small fixed offset from RSP. In the funclet case, we base
2054 // this calculation on the incoming establisher, which holds the value of
2055 // RSP from the parent frame at the end of the prologue.
2056 SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
2057 if (SEHFrameOffset)
2058 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
2059 SPOrEstablisher, false, SEHFrameOffset);
2060 else
2061 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr)
2062 .addReg(SPOrEstablisher);
2063
2064 // If this is not a funclet, emit the CFI describing our frame pointer.
2065 if (NeedsWinCFI && !IsFunclet) {
2066 assert(!NeedsWinFPO && "this setframe incompatible with FPO data");
2067 HasWinCFI = true;
2068 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
2070 .addImm(SEHFrameOffset)
2072 if (isAsynchronousEHPersonality(Personality))
2073 MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset;
2074 }
2075 } else if (IsFunclet && STI.is32Bit()) {
2076 // Reset EBP / ESI to something good for funclets.
2078 // If we're a catch funclet, we can be returned to via catchret. Save ESP
2079 // into the registration node so that the runtime will restore it for us.
2080 if (!MBB.isCleanupFuncletEntry()) {
2081 assert(Personality == EHPersonality::MSVC_CXX);
2082 Register FrameReg;
2084 int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed();
2085 // ESP is the first field, so no extra displacement is needed.
2086 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg,
2087 false, EHRegOffset)
2088 .addReg(X86::ESP);
2089 }
2090 }
2091
2092 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
2093 const MachineInstr &FrameInstr = *MBBI;
2094 ++MBBI;
2095
2096 if (NeedsWinCFI) {
2097 int FI;
2098 if (Register Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
2099 if (X86::FR64RegClass.contains(Reg)) {
2100 int Offset;
2101 Register IgnoredFrameReg;
2102 if (IsWin64Prologue && IsFunclet)
2103 Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg);
2104 else
2105 Offset =
2106 getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() +
2107 SEHFrameOffset;
2108
2109 HasWinCFI = true;
2110 assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
2111 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
2112 .addImm(Reg)
2113 .addImm(Offset)
2115 }
2116 }
2117 }
2118 }
2119
2120 if (NeedsWinCFI && HasWinCFI)
2121 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
2123
2124 if (FnHasClrFunclet && !IsFunclet) {
2125 // Save the so-called Initial-SP (i.e. the value of the stack pointer
2126 // immediately after the prolog) into the PSPSlot so that funclets
2127 // and the GC can recover it.
2128 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2129 auto PSPInfo = MachinePointerInfo::getFixedStack(
2131 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false,
2132 PSPSlotOffset)
2137 }
2138
2139 // Realign stack after we spilled callee-saved registers (so that we'll be
2140 // able to calculate their offsets from the frame pointer).
2141 // Win64 requires aligning the stack after the prologue.
2142 if (IsWin64Prologue && TRI->hasStackRealignment(MF)) {
2143 assert(HasFP && "There should be a frame pointer if stack is realigned.");
2144 BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign);
2145 }
2146
2147 // We already dealt with stack realignment and funclets above.
2148 if (IsFunclet && STI.is32Bit())
2149 return;
2150
2151 // If we need a base pointer, set it up here. It's whatever the value
2152 // of the stack pointer is at this point. Any variable size objects
2153 // will be allocated after this, so we can still use the base pointer
2154 // to reference locals.
2155 if (TRI->hasBasePointer(MF)) {
2156 // Update the base pointer with the current stack pointer.
2157 unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
2158 BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
2159 .addReg(SPOrEstablisher)
2161 if (X86FI->getRestoreBasePointer()) {
2162 // Stash value of base pointer. Saving RSP instead of EBP shortens
2163 // dependence chain. Used by SjLj EH.
2164 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2165 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), FramePtr, true,
2167 .addReg(SPOrEstablisher)
2169 }
2170
2171 if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
2172 // Stash the value of the frame pointer relative to the base pointer for
2173 // Win32 EH. This supports Win32 EH, which does the inverse of the above:
2174 // it recovers the frame pointer from the base pointer rather than the
2175 // other way around.
2176 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2177 Register UsedReg;
2178 int Offset =
2179 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
2180 .getFixed();
2181 assert(UsedReg == BasePtr);
2182 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
2185 }
2186 }
2187 if (ArgBaseReg.isValid()) {
2188 // Save argument base pointer.
2189 auto *MI = X86FI->getStackPtrSaveMI();
2190 int FI = MI->getOperand(1).getIndex();
2191 unsigned MOVmr = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
2192 // movl %basereg, offset(%ebp)
2193 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), FI)
2194 .addReg(ArgBaseReg)
2196 }
2197
2198 if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
2199 // Mark end of stack pointer adjustment.
2200 if (!HasFP && NumBytes) {
2201 // Define the current CFA rule to use the provided offset.
2202 assert(StackSize);
2203 BuildCFI(
2204 MBB, MBBI, DL,
2205 MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth),
2207 }
2208
2209 // Emit DWARF info specifying the offsets of the callee-saved registers.
2211 }
2212
2213 // X86 Interrupt handling function cannot assume anything about the direction
2214 // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
2215 // in each prologue of interrupt handler function.
2216 //
2217 // Create "cld" instruction only in these cases:
2218 // 1. The interrupt handling function uses any of the "rep" instructions.
2219 // 2. Interrupt handling function calls another function.
2220 // 3. If there are any inline asm blocks, as we do not know what they do
2221 //
2222 // TODO: We should also emit cld if we detect the use of std, but as of now,
2223 // the compiler does not even emit that instruction or even define it, so in
2224 // practice, this would only happen with inline asm, which we cover anyway.
2226 bool NeedsCLD = false;
2227
2228 for (const MachineBasicBlock &B : MF) {
2229 for (const MachineInstr &MI : B) {
2230 if (MI.isCall()) {
2231 NeedsCLD = true;
2232 break;
2233 }
2234
2235 if (isOpcodeRep(MI.getOpcode())) {
2236 NeedsCLD = true;
2237 break;
2238 }
2239
2240 if (MI.isInlineAsm()) {
2241 // TODO: Parse asm for rep instructions or call sites?
2242 // For now, let's play it safe and emit a cld instruction
2243 // just in case.
2244 NeedsCLD = true;
2245 break;
2246 }
2247 }
2248 }
2249
2250 if (NeedsCLD) {
2251 BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
2253 }
2254 }
2255
2256 // At this point we know if the function has WinCFI or not.
2257 MF.setHasWinCFI(HasWinCFI);
2258}
2259
2261 const MachineFunction &MF) const {
2262 // We can't use LEA instructions for adjusting the stack pointer if we don't
2263 // have a frame pointer in the Win64 ABI. Only ADD instructions may be used
2264 // to deallocate the stack.
2265 // This means that we can use LEA for SP in two situations:
2266 // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
2267 // 2. We *have* a frame pointer which means we are permitted to use LEA.
2268 return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
2269}
2270
2272 switch (MI.getOpcode()) {
2273 case X86::CATCHRET:
2274 case X86::CLEANUPRET:
2275 return true;
2276 default:
2277 return false;
2278 }
2279 llvm_unreachable("impossible");
2280}
2281
2282// CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
2283// stack. It holds a pointer to the bottom of the root function frame. The
2284// establisher frame pointer passed to a nested funclet may point to the
2285// (mostly empty) frame of its parent funclet, but it will need to find
2286// the frame of the root function to access locals. To facilitate this,
2287// every funclet copies the pointer to the bottom of the root function
2288// frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
2289// same offset for the PSPSym in the root function frame that's used in the
2290// funclets' frames allows each funclet to dynamically accept any ancestor
2291// frame as its establisher argument (the runtime doesn't guarantee the
2292// immediate parent for some reason lost to history), and also allows the GC,
2293// which uses the PSPSym for some bookkeeping, to find it in any funclet's
2294// frame with only a single offset reported for the entire method.
2295unsigned
2296X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
2297 const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
2298 Register SPReg;
2299 int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg,
2300 /*IgnoreSPUpdates*/ true)
2301 .getFixed();
2302 assert(Offset >= 0 && SPReg == TRI->getStackRegister());
2303 return static_cast<unsigned>(Offset);
2304}
2305
2306unsigned
2307X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
2309 // This is the size of the pushed CSRs.
2310 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2311 // This is the size of callee saved XMMs.
2312 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2313 unsigned XMMSize =
2314 WinEHXMMSlotInfo.size() * TRI->getSpillSize(X86::VR128RegClass);
2315 // This is the amount of stack a funclet needs to allocate.
2316 unsigned UsedSize;
2317 EHPersonality Personality =
2319 if (Personality == EHPersonality::CoreCLR) {
2320 // CLR funclets need to hold enough space to include the PSPSym, at the
2321 // same offset from the stack pointer (immediately after the prolog) as it
2322 // resides at in the main function.
2323 UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
2324 } else {
2325 // Other funclets just need enough stack for outgoing call arguments.
2326 UsedSize = MF.getFrameInfo().getMaxCallFrameSize();
2327 }
2328 // RBP is not included in the callee saved register block. After pushing RBP,
2329 // everything is 16 byte aligned. Everything we allocate before an outgoing
2330 // call must also be 16 byte aligned.
2331 unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlign());
2332 // Subtract out the size of the callee saved registers. This is how much stack
2333 // each funclet will allocate.
2334 return FrameSizeMinusRBP + XMMSize - CSSize;
2335}
2336
2337static bool isTailCallOpcode(unsigned Opc) {
2338 return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi ||
2339 Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 ||
2340 Opc == X86::TCRETURNdi64 || Opc == X86::TCRETURNmi64;
2341}
2342
2344 MachineBasicBlock &MBB) const {
2345 const MachineFrameInfo &MFI = MF.getFrameInfo();
2348 MachineBasicBlock::iterator MBBI = Terminator;
2349 DebugLoc DL;
2350 if (MBBI != MBB.end())
2351 DL = MBBI->getDebugLoc();
2352 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
2353 const bool Is64BitILP32 = STI.isTarget64BitILP32();
2355 Register MachineFramePtr =
2356 Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
2357
2358 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2359 bool NeedsWin64CFI =
2360 IsWin64Prologue && MF.getFunction().needsUnwindTableEntry();
2361 bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI);
2362
2363 // Get the number of bytes to allocate from the FrameInfo.
2364 uint64_t StackSize = MFI.getStackSize();
2365 uint64_t MaxAlign = calculateMaxStackAlign(MF);
2366 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2367 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
2368 bool HasFP = hasFP(MF);
2369 uint64_t NumBytes = 0;
2370
2371 bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() &&
2373 MF.needsFrameMoves();
2374
2375 Register ArgBaseReg;
2376 if (auto *MI = X86FI->getStackPtrSaveMI()) {
2377 unsigned Opc = X86::LEA32r;
2378 Register StackReg = X86::ESP;
2379 ArgBaseReg = MI->getOperand(0).getReg();
2380 if (STI.is64Bit()) {
2381 Opc = X86::LEA64r;
2382 StackReg = X86::RSP;
2383 }
2384 // leal -4(%basereg), %esp
2385 // .cfi_def_cfa %esp, 4
2386 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackReg)
2387 .addUse(ArgBaseReg)
2388 .addImm(1)
2389 .addUse(X86::NoRegister)
2390 .addImm(-(int64_t)SlotSize)
2391 .addUse(X86::NoRegister)
2393 if (NeedsDwarfCFI) {
2394 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackReg, true);
2395 BuildCFI(MBB, MBBI, DL,
2396 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2398 --MBBI;
2399 }
2400 --MBBI;
2401 }
2402
2403 if (IsFunclet) {
2404 assert(HasFP && "EH funclets without FP not yet implemented");
2405 NumBytes = getWinEHFuncletFrameSize(MF);
2406 } else if (HasFP) {
2407 // Calculate required stack adjustment.
2408 uint64_t FrameSize = StackSize - SlotSize;
2409 NumBytes = FrameSize - CSSize - TailCallArgReserveSize;
2410
2411 // Callee-saved registers were pushed on stack before the stack was
2412 // realigned.
2413 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
2414 NumBytes = alignTo(FrameSize, MaxAlign);
2415 } else {
2416 NumBytes = StackSize - CSSize - TailCallArgReserveSize;
2417 }
2418 uint64_t SEHStackAllocAmt = NumBytes;
2419
2420 // AfterPop is the position to insert .cfi_restore.
2422 if (HasFP) {
2423 if (X86FI->hasSwiftAsyncContext()) {
2424 // Discard the context.
2425 int Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
2426 emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/ true);
2427 }
2428 // Pop EBP.
2429 BuildMI(MBB, MBBI, DL,
2431 MachineFramePtr)
2433
2434 // We need to reset FP to its untagged state on return. Bit 60 is currently
2435 // used to show the presence of an extended frame.
2436 if (X86FI->hasSwiftAsyncContext()) {
2437 BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8), MachineFramePtr)
2438 .addUse(MachineFramePtr)
2439 .addImm(60)
2441 }
2442
2443 if (NeedsDwarfCFI) {
2444 if (!ArgBaseReg.isValid()) {
2445 unsigned DwarfStackPtr =
2446 TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
2447 BuildCFI(MBB, MBBI, DL,
2448 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2450 }
2451 if (!MBB.succ_empty() && !MBB.isReturnBlock()) {
2452 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
2453 BuildCFI(MBB, AfterPop, DL,
2454 MCCFIInstruction::createRestore(nullptr, DwarfFramePtr),
2456 --MBBI;
2457 --AfterPop;
2458 }
2459 --MBBI;
2460 }
2461 }
2462
2463 MachineBasicBlock::iterator FirstCSPop = MBBI;
2464 // Skip the callee-saved pop instructions.
2465 while (MBBI != MBB.begin()) {
2466 MachineBasicBlock::iterator PI = std::prev(MBBI);
2467 unsigned Opc = PI->getOpcode();
2468
2469 if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
2470 if (!PI->getFlag(MachineInstr::FrameDestroy) ||
2471 (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::BTR64ri8 &&
2472 Opc != X86::ADD64ri32 && Opc != X86::POPP64r && Opc != X86::POP2 &&
2473 Opc != X86::POP2P && Opc != X86::LEA64r))
2474 break;
2475 FirstCSPop = PI;
2476 }
2477
2478 --MBBI;
2479 }
2480 if (ArgBaseReg.isValid()) {
2481 // Restore argument base pointer.
2482 auto *MI = X86FI->getStackPtrSaveMI();
2483 int FI = MI->getOperand(1).getIndex();
2484 unsigned MOVrm = Is64Bit ? X86::MOV64rm : X86::MOV32rm;
2485 // movl offset(%ebp), %basereg
2486 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVrm), ArgBaseReg), FI)
2488 }
2489 MBBI = FirstCSPop;
2490
2491 if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET)
2492 emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator);
2493
2494 if (MBBI != MBB.end())
2495 DL = MBBI->getDebugLoc();
2496 // If there is an ADD32ri or SUB32ri of ESP immediately before this
2497 // instruction, merge the two instructions.
2498 if (NumBytes || MFI.hasVarSizedObjects())
2499 NumBytes += mergeSPUpdates(MBB, MBBI, true);
2500
2501 // If dynamic alloca is used, then reset esp to point to the last callee-saved
2502 // slot before popping them off! Same applies for the case, when stack was
2503 // realigned. Don't do this if this was a funclet epilogue, since the funclets
2504 // will not do realignment or dynamic stack allocation.
2505 if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) &&
2506 !IsFunclet) {
2507 if (TRI->hasStackRealignment(MF))
2508 MBBI = FirstCSPop;
2509 unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
2510 uint64_t LEAAmount =
2511 IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
2512
2513 if (X86FI->hasSwiftAsyncContext())
2514 LEAAmount -= 16;
2515
2516 // There are only two legal forms of epilogue:
2517 // - add SEHAllocationSize, %rsp
2518 // - lea SEHAllocationSize(%FramePtr), %rsp
2519 //
2520 // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
2521 // However, we may use this sequence if we have a frame pointer because the
2522 // effects of the prologue can safely be undone.
2523 if (LEAAmount != 0) {
2524 unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
2526 false, LEAAmount);
2527 --MBBI;
2528 } else {
2529 unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
2530 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr).addReg(FramePtr);
2531 --MBBI;
2532 }
2533 } else if (NumBytes) {
2534 // Adjust stack pointer back: ESP += numbytes.
2535 emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
2536 if (!HasFP && NeedsDwarfCFI) {
2537 // Define the current CFA rule to use the provided offset.
2538 BuildCFI(MBB, MBBI, DL,
2540 nullptr, CSSize + TailCallArgReserveSize + SlotSize),
2542 }
2543 --MBBI;
2544 }
2545
2546 // Windows unwinder will not invoke function's exception handler if IP is
2547 // either in prologue or in epilogue. This behavior causes a problem when a
2548 // call immediately precedes an epilogue, because the return address points
2549 // into the epilogue. To cope with that, we insert an epilogue marker here,
2550 // then replace it with a 'nop' if it ends up immediately after a CALL in the
2551 // final emitted code.
2552 if (NeedsWin64CFI && MF.hasWinCFI())
2553 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
2554
2555 if (!HasFP && NeedsDwarfCFI) {
2556 MBBI = FirstCSPop;
2557 int64_t Offset = -(int64_t)CSSize - SlotSize;
2558 // Mark callee-saved pop instruction.
2559 // Define the current CFA rule to use the provided offset.
2560 while (MBBI != MBB.end()) {
2562 unsigned Opc = PI->getOpcode();
2563 ++MBBI;
2564 if (Opc == X86::POP32r || Opc == X86::POP64r || Opc == X86::POPP64r ||
2565 Opc == X86::POP2 || Opc == X86::POP2P) {
2566 Offset += SlotSize;
2567 // Compared to pop, pop2 introduces more stack offset (one more
2568 // register).
2569 if (Opc == X86::POP2 || Opc == X86::POP2P)
2570 Offset += SlotSize;
2571 BuildCFI(MBB, MBBI, DL,
2574 }
2575 }
2576 }
2577
2578 // Emit DWARF info specifying the restores of the callee-saved registers.
2579 // For epilogue with return inside or being other block without successor,
2580 // no need to generate .cfi_restore for callee-saved registers.
2581 if (NeedsDwarfCFI && !MBB.succ_empty())
2582 emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false);
2583
2584 if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
2585 // Add the return addr area delta back since we are not tail calling.
2586 int Offset = -1 * X86FI->getTCReturnAddrDelta();
2587 assert(Offset >= 0 && "TCDelta should never be positive");
2588 if (Offset) {
2589 // Check for possible merge with preceding ADD instruction.
2590 Offset += mergeSPUpdates(MBB, Terminator, true);
2591 emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true);
2592 }
2593 }
2594
2595 // Emit tilerelease for AMX kernel.
2597 BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
2598}
2599
2601 int FI,
2602 Register &FrameReg) const {
2603 const MachineFrameInfo &MFI = MF.getFrameInfo();
2604
2605 bool IsFixed = MFI.isFixedObjectIndex(FI);
2606 // We can't calculate offset from frame pointer if the stack is realigned,
2607 // so enforce usage of stack/base pointer. The base pointer is used when we
2608 // have dynamic allocas in addition to dynamic realignment.
2609 if (TRI->hasBasePointer(MF))
2610 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister();
2611 else if (TRI->hasStackRealignment(MF))
2612 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister();
2613 else
2614 FrameReg = TRI->getFrameRegister(MF);
2615
2616 // Offset will hold the offset from the stack pointer at function entry to the
2617 // object.
2618 // We need to factor in additional offsets applied during the prologue to the
2619 // frame, base, and stack pointer depending on which is used.
2620 int64_t Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea();
2622 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2623 uint64_t StackSize = MFI.getStackSize();
2624 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2625 int64_t FPDelta = 0;
2626
2627 // In an x86 interrupt, remove the offset we added to account for the return
2628 // address from any stack object allocated in the caller's frame. Interrupts
2629 // do not have a standard return address. Fixed objects in the current frame,
2630 // such as SSE register spills, should not get this treatment.
2632 Offset >= 0) {
2634 }
2635
2636 if (IsWin64Prologue) {
2637 assert(!MFI.hasCalls() || (StackSize % 16) == 8);
2638
2639 // Calculate required stack adjustment.
2640 uint64_t FrameSize = StackSize - SlotSize;
2641 // If required, include space for extra hidden slot for stashing base
2642 // pointer.
2643 if (X86FI->getRestoreBasePointer())
2644 FrameSize += SlotSize;
2645 uint64_t NumBytes = FrameSize - CSSize;
2646
2647 uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
2648 if (FI && FI == X86FI->getFAIndex())
2649 return StackOffset::getFixed(-SEHFrameOffset);
2650
2651 // FPDelta is the offset from the "traditional" FP location of the old base
2652 // pointer followed by return address and the location required by the
2653 // restricted Win64 prologue.
2654 // Add FPDelta to all offsets below that go through the frame pointer.
2655 FPDelta = FrameSize - SEHFrameOffset;
2656 assert((!MFI.hasCalls() || (FPDelta % 16) == 0) &&
2657 "FPDelta isn't aligned per the Win64 ABI!");
2658 }
2659
2660 if (FrameReg == TRI->getFramePtr()) {
2661 // Skip saved EBP/RBP
2662 Offset += SlotSize;
2663
2664 // Account for restricted Windows prologue.
2665 Offset += FPDelta;
2666
2667 // Skip the RETADDR move area
2668 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2669 if (TailCallReturnAddrDelta < 0)
2670 Offset -= TailCallReturnAddrDelta;
2671
2673 }
2674
2675 // FrameReg is either the stack pointer or a base pointer. But the base is
2676 // located at the end of the statically known StackSize so the distinction
2677 // doesn't really matter.
2678 if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF))
2679 assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
2680 return StackOffset::getFixed(Offset + StackSize);
2681}
2682
2684 Register &FrameReg) const {
2685 const MachineFrameInfo &MFI = MF.getFrameInfo();
2687 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2688 const auto it = WinEHXMMSlotInfo.find(FI);
2689
2690 if (it == WinEHXMMSlotInfo.end())
2691 return getFrameIndexReference(MF, FI, FrameReg).getFixed();
2692
2693 FrameReg = TRI->getStackRegister();
2695 it->second;
2696}
2697
2700 Register &FrameReg,
2701 int Adjustment) const {
2702 const MachineFrameInfo &MFI = MF.getFrameInfo();
2703 FrameReg = TRI->getStackRegister();
2704 return StackOffset::getFixed(MFI.getObjectOffset(FI) -
2705 getOffsetOfLocalArea() + Adjustment);
2706}
2707
2710 int FI, Register &FrameReg,
2711 bool IgnoreSPUpdates) const {
2712
2713 const MachineFrameInfo &MFI = MF.getFrameInfo();
2714 // Does not include any dynamic realign.
2715 const uint64_t StackSize = MFI.getStackSize();
2716 // LLVM arranges the stack as follows:
2717 // ...
2718 // ARG2
2719 // ARG1
2720 // RETADDR
2721 // PUSH RBP <-- RBP points here
2722 // PUSH CSRs
2723 // ~~~~~~~ <-- possible stack realignment (non-win64)
2724 // ...
2725 // STACK OBJECTS
2726 // ... <-- RSP after prologue points here
2727 // ~~~~~~~ <-- possible stack realignment (win64)
2728 //
2729 // if (hasVarSizedObjects()):
2730 // ... <-- "base pointer" (ESI/RBX) points here
2731 // DYNAMIC ALLOCAS
2732 // ... <-- RSP points here
2733 //
2734 // Case 1: In the simple case of no stack realignment and no dynamic
2735 // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
2736 // with fixed offsets from RSP.
2737 //
2738 // Case 2: In the case of stack realignment with no dynamic allocas, fixed
2739 // stack objects are addressed with RBP and regular stack objects with RSP.
2740 //
2741 // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
2742 // to address stack arguments for outgoing calls and nothing else. The "base
2743 // pointer" points to local variables, and RBP points to fixed objects.
2744 //
2745 // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
2746 // answer we give is relative to the SP after the prologue, and not the
2747 // SP in the middle of the function.
2748
2749 if (MFI.isFixedObjectIndex(FI) && TRI->hasStackRealignment(MF) &&
2750 !STI.isTargetWin64())
2751 return getFrameIndexReference(MF, FI, FrameReg);
2752
2753 // If !hasReservedCallFrame the function might have SP adjustement in the
2754 // body. So, even though the offset is statically known, it depends on where
2755 // we are in the function.
2756 if (!IgnoreSPUpdates && !hasReservedCallFrame(MF))
2757 return getFrameIndexReference(MF, FI, FrameReg);
2758
2759 // We don't handle tail calls, and shouldn't be seeing them either.
2761 "we don't handle this case!");
2762
2763 // This is how the math works out:
2764 //
2765 // %rsp grows (i.e. gets lower) left to right. Each box below is
2766 // one word (eight bytes). Obj0 is the stack slot we're trying to
2767 // get to.
2768 //
2769 // ----------------------------------
2770 // | BP | Obj0 | Obj1 | ... | ObjN |
2771 // ----------------------------------
2772 // ^ ^ ^ ^
2773 // A B C E
2774 //
2775 // A is the incoming stack pointer.
2776 // (B - A) is the local area offset (-8 for x86-64) [1]
2777 // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2]
2778 //
2779 // |(E - B)| is the StackSize (absolute value, positive). For a
2780 // stack that grown down, this works out to be (B - E). [3]
2781 //
2782 // E is also the value of %rsp after stack has been set up, and we
2783 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now
2784 // (C - E) == (C - A) - (B - A) + (B - E)
2785 // { Using [1], [2] and [3] above }
2786 // == getObjectOffset - LocalAreaOffset + StackSize
2787
2788 return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize);
2789}
2790
2793 std::vector<CalleeSavedInfo> &CSI) const {
2794 MachineFrameInfo &MFI = MF.getFrameInfo();
2796
2797 unsigned CalleeSavedFrameSize = 0;
2798 unsigned XMMCalleeSavedFrameSize = 0;
2799 auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2800 int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
2801
2802 int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2803
2804 if (TailCallReturnAddrDelta < 0) {
2805 // create RETURNADDR area
2806 // arg
2807 // arg
2808 // RETADDR
2809 // { ...
2810 // RETADDR area
2811 // ...
2812 // }
2813 // [EBP]
2814 MFI.CreateFixedObject(-TailCallReturnAddrDelta,
2815 TailCallReturnAddrDelta - SlotSize, true);
2816 }
2817
2818 // Spill the BasePtr if it's used.
2819 if (this->TRI->hasBasePointer(MF)) {
2820 // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
2821 if (MF.hasEHFunclets()) {
2823 X86FI->setHasSEHFramePtrSave(true);
2824 X86FI->setSEHFramePtrSaveIndex(FI);
2825 }
2826 }
2827
2828 if (hasFP(MF)) {
2829 // emitPrologue always spills frame register the first thing.
2830 SpillSlotOffset -= SlotSize;
2831 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2832
2833 // The async context lives directly before the frame pointer, and we
2834 // allocate a second slot to preserve stack alignment.
2835 if (X86FI->hasSwiftAsyncContext()) {
2836 SpillSlotOffset -= SlotSize;
2837 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2838 SpillSlotOffset -= SlotSize;
2839 }
2840
2841 // Since emitPrologue and emitEpilogue will handle spilling and restoring of
2842 // the frame register, we can delete it from CSI list and not have to worry
2843 // about avoiding it later.
2844 Register FPReg = TRI->getFrameRegister(MF);
2845 for (unsigned i = 0; i < CSI.size(); ++i) {
2846 if (TRI->regsOverlap(CSI[i].getReg(), FPReg)) {
2847 CSI.erase(CSI.begin() + i);
2848 break;
2849 }
2850 }
2851 }
2852
2853 // Strategy:
2854 // 1. Use push2 when
2855 // a) number of CSR > 1 if no need padding
2856 // b) number of CSR > 2 if need padding
2857 // 2. When the number of CSR push is odd
2858 // a. Start to use push2 from the 1st push if stack is 16B aligned.
2859 // b. Start to use push2 from the 2nd push if stack is not 16B aligned.
2860 // 3. When the number of CSR push is even, start to use push2 from the 1st
2861 // push and make the stack 16B aligned before the push
2862 unsigned NumRegsForPush2 = 0;
2863 if (STI.hasPush2Pop2()) {
2864 unsigned NumCSGPR = llvm::count_if(CSI, [](const CalleeSavedInfo &I) {
2865 return X86::GR64RegClass.contains(I.getReg());
2866 });
2867 bool NeedPadding = (SpillSlotOffset % 16 != 0) && (NumCSGPR % 2 == 0);
2868 bool UsePush2Pop2 = NeedPadding ? NumCSGPR > 2 : NumCSGPR > 1;
2869 X86FI->setPadForPush2Pop2(NeedPadding && UsePush2Pop2);
2870 NumRegsForPush2 = UsePush2Pop2 ? alignDown(NumCSGPR, 2) : 0;
2871 if (X86FI->padForPush2Pop2()) {
2872 SpillSlotOffset -= SlotSize;
2873 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2874 }
2875 }
2876
2877 // Assign slots for GPRs. It increases frame size.
2878 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2879 Register Reg = I.getReg();
2880
2881 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2882 continue;
2883
2884 // A CSR is a candidate for push2/pop2 when it's slot offset is 16B aligned
2885 // or only an odd number of registers in the candidates.
2886 if (X86FI->getNumCandidatesForPush2Pop2() < NumRegsForPush2 &&
2887 (SpillSlotOffset % 16 == 0 ||
2888 X86FI->getNumCandidatesForPush2Pop2() % 2))
2889 X86FI->addCandidateForPush2Pop2(Reg);
2890
2891 SpillSlotOffset -= SlotSize;
2892 CalleeSavedFrameSize += SlotSize;
2893
2894 int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2895 I.setFrameIdx(SlotIndex);
2896 }
2897
2898 // Adjust the offset of spill slot as we know the accurate callee saved frame
2899 // size.
2900 if (X86FI->getRestoreBasePointer()) {
2901 SpillSlotOffset -= SlotSize;
2902 CalleeSavedFrameSize += SlotSize;
2903
2904 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2905 // TODO: saving the slot index is better?
2906 X86FI->setRestoreBasePointer(CalleeSavedFrameSize);
2907 }
2908 assert(X86FI->getNumCandidatesForPush2Pop2() % 2 == 0 &&
2909 "Expect even candidates for push2/pop2");
2910 if (X86FI->getNumCandidatesForPush2Pop2())
2911 ++NumFunctionUsingPush2Pop2;
2912 X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
2913 MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
2914
2915 // Assign slots for XMMs.
2916 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2917 Register Reg = I.getReg();
2918 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
2919 continue;
2920
2921 // If this is k-register make sure we lookup via the largest legal type.
2922 MVT VT = MVT::Other;
2923 if (X86::VK16RegClass.contains(Reg))
2924 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
2925
2926 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2927 unsigned Size = TRI->getSpillSize(*RC);
2928 Align Alignment = TRI->getSpillAlign(*RC);
2929 // ensure alignment
2930 assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86");
2931 SpillSlotOffset = -alignTo(-SpillSlotOffset, Alignment);
2932
2933 // spill into slot
2934 SpillSlotOffset -= Size;
2935 int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
2936 I.setFrameIdx(SlotIndex);
2937 MFI.ensureMaxAlignment(Alignment);
2938
2939 // Save the start offset and size of XMM in stack frame for funclets.
2940 if (X86::VR128RegClass.contains(Reg)) {
2941 WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize;
2942 XMMCalleeSavedFrameSize += Size;
2943 }
2944 }
2945
2946 return true;
2947}
2948
2953
2954 // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
2955 // for us, and there are no XMM CSRs on Win32.
2956 if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows())
2957 return true;
2958
2959 // Push GPRs. It increases frame size.
2960 const MachineFunction &MF = *MBB.getParent();
2962 if (X86FI->padForPush2Pop2())
2963 emitSPUpdate(MBB, MI, DL, -(int64_t)SlotSize, /*InEpilogue=*/false);
2964
2965 // Update LiveIn of the basic block and decide whether we can add a kill flag
2966 // to the use.
2967 auto UpdateLiveInCheckCanKill = [&](Register Reg) {
2968 const MachineRegisterInfo &MRI = MF.getRegInfo();
2969 // Do not set a kill flag on values that are also marked as live-in. This
2970 // happens with the @llvm-returnaddress intrinsic and with arguments
2971 // passed in callee saved registers.
2972 // Omitting the kill flags is conservatively correct even if the live-in
2973 // is not used after all.
2974 if (MRI.isLiveIn(Reg))
2975 return false;
2976 MBB.addLiveIn(Reg);
2977 // Check if any subregister is live-in
2978 for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg)
2979 if (MRI.isLiveIn(*AReg))
2980 return false;
2981 return true;
2982 };
2983 auto UpdateLiveInGetKillRegState = [&](Register Reg) {
2984 return getKillRegState(UpdateLiveInCheckCanKill(Reg));
2985 };
2986
2987 for (auto RI = CSI.rbegin(), RE = CSI.rend(); RI != RE; ++RI) {
2988 Register Reg = RI->getReg();
2989 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2990 continue;
2991
2992 if (X86FI->isCandidateForPush2Pop2(Reg)) {
2993 Register Reg2 = (++RI)->getReg();
2995 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
2996 .addReg(Reg2, UpdateLiveInGetKillRegState(Reg2))
2998 } else {
2999 BuildMI(MBB, MI, DL, TII.get(getPUSHOpcode(STI)))
3000 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
3002 }
3003 }
3004
3005 if (X86FI->getRestoreBasePointer()) {
3006 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
3007 Register BaseReg = this->TRI->getBaseRegister();
3008 BuildMI(MBB, MI, DL, TII.get(Opc))
3009 .addReg(BaseReg, getKillRegState(true))
3011 }
3012
3013 // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
3014 // It can be done by spilling XMMs to stack frame.
3015 for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
3016 Register Reg = I.getReg();
3017 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3018 continue;
3019
3020 // If this is k-register make sure we lookup via the largest legal type.
3021 MVT VT = MVT::Other;
3022 if (X86::VK16RegClass.contains(Reg))
3023 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3024
3025 // Add the callee-saved register as live-in. It's killed at the spill.
3026 MBB.addLiveIn(Reg);
3027 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3028
3029 TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI,
3030 Register());
3031 --MI;
3032 MI->setFlag(MachineInstr::FrameSetup);
3033 ++MI;
3034 }
3035
3036 return true;
3037}
3038
3039void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB,
3041 MachineInstr *CatchRet) const {
3042 // SEH shouldn't use catchret.
3045 "SEH should not use CATCHRET");
3046 const DebugLoc &DL = CatchRet->getDebugLoc();
3047 MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB();
3048
3049 // Fill EAX/RAX with the address of the target block.
3050 if (STI.is64Bit()) {
3051 // LEA64r CatchRetTarget(%rip), %rax
3052 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX)
3053 .addReg(X86::RIP)
3054 .addImm(0)
3055 .addReg(0)
3056 .addMBB(CatchRetTarget)
3057 .addReg(0);
3058 } else {
3059 // MOV32ri $CatchRetTarget, %eax
3060 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
3061 .addMBB(CatchRetTarget);
3062 }
3063
3064 // Record that we've taken the address of CatchRetTarget and no longer just
3065 // reference it in a terminator.
3066 CatchRetTarget->setMachineBlockAddressTaken();
3067}
3068
3072 if (CSI.empty())
3073 return false;
3074
3075 if (MI != MBB.end() && isFuncletReturnInstr(*MI) && STI.isOSWindows()) {
3076 // Don't restore CSRs in 32-bit EH funclets. Matches
3077 // spillCalleeSavedRegisters.
3078 if (STI.is32Bit())
3079 return true;
3080 // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
3081 // funclets. emitEpilogue transforms these to normal jumps.
3082 if (MI->getOpcode() == X86::CATCHRET) {
3083 const Function &F = MBB.getParent()->getFunction();
3084 bool IsSEH = isAsynchronousEHPersonality(
3085 classifyEHPersonality(F.getPersonalityFn()));
3086 if (IsSEH)
3087 return true;
3088 }
3089 }
3090
3092
3093 // Reload XMMs from stack frame.
3094 for (const CalleeSavedInfo &I : CSI) {
3095 Register Reg = I.getReg();
3096 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3097 continue;
3098
3099 // If this is k-register make sure we lookup via the largest legal type.
3100 MVT VT = MVT::Other;
3101 if (X86::VK16RegClass.contains(Reg))
3102 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3103
3104 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3105 TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, TRI,
3106 Register());
3107 }
3108
3109 // Clear the stack slot for spill base pointer register.
3110 MachineFunction &MF = *MBB.getParent();
3112 if (X86FI->getRestoreBasePointer()) {
3113 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
3114 Register BaseReg = this->TRI->getBaseRegister();
3115 BuildMI(MBB, MI, DL, TII.get(Opc), BaseReg)
3117 }
3118
3119 // POP GPRs.
3120 for (auto I = CSI.begin(), E = CSI.end(); I != E; ++I) {
3121 Register Reg = I->getReg();
3122 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3123 continue;
3124
3125 if (X86FI->isCandidateForPush2Pop2(Reg))
3126 BuildMI(MBB, MI, DL, TII.get(getPOP2Opcode(STI)), Reg)
3129 else
3130 BuildMI(MBB, MI, DL, TII.get(getPOPOpcode(STI)), Reg)
3132 }
3133 if (X86FI->padForPush2Pop2())
3134 emitSPUpdate(MBB, MI, DL, SlotSize, /*InEpilogue=*/true);
3135
3136 return true;
3137}
3138
3140 BitVector &SavedRegs,
3141 RegScavenger *RS) const {
3143
3144 // Spill the BasePtr if it's used.
3145 if (TRI->hasBasePointer(MF)) {
3146 Register BasePtr = TRI->getBaseRegister();
3147 if (STI.isTarget64BitILP32())
3148 BasePtr = getX86SubSuperRegister(BasePtr, 64);
3149 SavedRegs.set(BasePtr);
3150 }
3151}
3152
3153static bool HasNestArgument(const MachineFunction *MF) {
3154 const Function &F = MF->getFunction();
3155 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
3156 I++) {
3157 if (I->hasNestAttr() && !I->use_empty())
3158 return true;
3159 }
3160 return false;
3161}
3162
3163/// GetScratchRegister - Get a temp register for performing work in the
3164/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
3165/// and the properties of the function either one or two registers will be
3166/// needed. Set primary to true for the first register, false for the second.
3167static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64,
3168 const MachineFunction &MF, bool Primary) {
3170
3171 // Erlang stuff.
3173 if (Is64Bit)
3174 return Primary ? X86::R14 : X86::R13;
3175 else
3176 return Primary ? X86::EBX : X86::EDI;
3177 }
3178
3179 if (Is64Bit) {
3180 if (IsLP64)
3181 return Primary ? X86::R11 : X86::R12;
3182 else
3183 return Primary ? X86::R11D : X86::R12D;
3184 }
3185
3186 bool IsNested = HasNestArgument(&MF);
3187
3191 if (IsNested)
3192 report_fatal_error("Segmented stacks does not support fastcall with "
3193 "nested function.");
3194 return Primary ? X86::EAX : X86::ECX;
3195 }
3196 if (IsNested)
3197 return Primary ? X86::EDX : X86::EAX;
3198 return Primary ? X86::ECX : X86::EAX;
3199}
3200
3201// The stack limit in the TCB is set to this many bytes above the actual stack
3202// limit.
3204
3206 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3207 MachineFrameInfo &MFI = MF.getFrameInfo();
3208 uint64_t StackSize;
3209 unsigned TlsReg, TlsOffset;
3210 DebugLoc DL;
3211
3212 // To support shrink-wrapping we would need to insert the new blocks
3213 // at the right place and update the branches to PrologueMBB.
3214 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3215
3216 unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3217 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3218 "Scratch register is live-in");
3219
3220 if (MF.getFunction().isVarArg())
3221 report_fatal_error("Segmented stacks do not support vararg functions.");
3222 if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
3225 report_fatal_error("Segmented stacks not supported on this platform.");
3226
3227 // Eventually StackSize will be calculated by a link-time pass; which will
3228 // also decide whether checking code needs to be injected into this particular
3229 // prologue.
3230 StackSize = MFI.getStackSize();
3231
3232 if (!MFI.needsSplitStackProlog())
3233 return;
3234
3238 bool IsNested = false;
3239
3240 // We need to know if the function has a nest argument only in 64 bit mode.
3241 if (Is64Bit)
3242 IsNested = HasNestArgument(&MF);
3243
3244 // The MOV R10, RAX needs to be in a different block, since the RET we emit in
3245 // allocMBB needs to be last (terminating) instruction.
3246
3247 for (const auto &LI : PrologueMBB.liveins()) {
3248 allocMBB->addLiveIn(LI);
3249 checkMBB->addLiveIn(LI);
3250 }
3251
3252 if (IsNested)
3253 allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
3254
3255 MF.push_front(allocMBB);
3256 MF.push_front(checkMBB);
3257
3258 // When the frame size is less than 256 we just compare the stack
3259 // boundary directly to the value of the stack pointer, per gcc.
3260 bool CompareStackPointer = StackSize < kSplitStackAvailable;
3261
3262 // Read the limit off the current stacklet off the stack_guard location.
3263 if (Is64Bit) {
3264 if (STI.isTargetLinux()) {
3265 TlsReg = X86::FS;
3266 TlsOffset = IsLP64 ? 0x70 : 0x40;
3267 } else if (STI.isTargetDarwin()) {
3268 TlsReg = X86::GS;
3269 TlsOffset = 0x60 + 90 * 8; // See pthread_machdep.h. Steal TLS slot 90.
3270 } else if (STI.isTargetWin64()) {
3271 TlsReg = X86::GS;
3272 TlsOffset = 0x28; // pvArbitrary, reserved for application use
3273 } else if (STI.isTargetFreeBSD()) {
3274 TlsReg = X86::FS;
3275 TlsOffset = 0x18;
3276 } else if (STI.isTargetDragonFly()) {
3277 TlsReg = X86::FS;
3278 TlsOffset = 0x20; // use tls_tcb.tcb_segstack
3279 } else {
3280 report_fatal_error("Segmented stacks not supported on this platform.");
3281 }
3282
3283 if (CompareStackPointer)
3284 ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
3285 else
3286 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r),
3287 ScratchReg)
3288 .addReg(X86::RSP)
3289 .addImm(1)
3290 .addReg(0)
3291 .addImm(-StackSize)
3292 .addReg(0);
3293
3294 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm))
3295 .addReg(ScratchReg)
3296 .addReg(0)
3297 .addImm(1)
3298 .addReg(0)
3299 .addImm(TlsOffset)
3300 .addReg(TlsReg);
3301 } else {
3302 if (STI.isTargetLinux()) {
3303 TlsReg = X86::GS;
3304 TlsOffset = 0x30;
3305 } else if (STI.isTargetDarwin()) {
3306 TlsReg = X86::GS;
3307 TlsOffset = 0x48 + 90 * 4;
3308 } else if (STI.isTargetWin32()) {
3309 TlsReg = X86::FS;
3310 TlsOffset = 0x14; // pvArbitrary, reserved for application use
3311 } else if (STI.isTargetDragonFly()) {
3312 TlsReg = X86::FS;
3313 TlsOffset = 0x10; // use tls_tcb.tcb_segstack
3314 } else if (STI.isTargetFreeBSD()) {
3315 report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
3316 } else {
3317 report_fatal_error("Segmented stacks not supported on this platform.");
3318 }
3319
3320 if (CompareStackPointer)
3321 ScratchReg = X86::ESP;
3322 else
3323 BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg)
3324 .addReg(X86::ESP)
3325 .addImm(1)
3326 .addReg(0)
3327 .addImm(-StackSize)
3328 .addReg(0);
3329
3332 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3333 .addReg(ScratchReg)
3334 .addReg(0)
3335 .addImm(0)
3336 .addReg(0)
3337 .addImm(TlsOffset)
3338 .addReg(TlsReg);
3339 } else if (STI.isTargetDarwin()) {
3340
3341 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
3342 unsigned ScratchReg2;
3343 bool SaveScratch2;
3344 if (CompareStackPointer) {
3345 // The primary scratch register is available for holding the TLS offset.
3346 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3347 SaveScratch2 = false;
3348 } else {
3349 // Need to use a second register to hold the TLS offset
3350 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
3351
3352 // Unfortunately, with fastcc the second scratch register may hold an
3353 // argument.
3354 SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
3355 }
3356
3357 // If Scratch2 is live-in then it needs to be saved.
3358 assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
3359 "Scratch register is live-in and not saved");
3360
3361 if (SaveScratch2)
3362 BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
3363 .addReg(ScratchReg2, RegState::Kill);
3364
3365 BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
3366 .addImm(TlsOffset);
3367 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3368 .addReg(ScratchReg)
3369 .addReg(ScratchReg2)
3370 .addImm(1)
3371 .addReg(0)
3372 .addImm(0)
3373 .addReg(TlsReg);
3374
3375 if (SaveScratch2)
3376 BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
3377 }
3378 }
3379
3380 // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
3381 // It jumps to normal execution of the function body.
3382 BuildMI(checkMBB, DL, TII.get(X86::JCC_1))
3383 .addMBB(&PrologueMBB)
3385
3386 // On 32 bit we first push the arguments size and then the frame size. On 64
3387 // bit, we pass the stack frame size in r10 and the argument size in r11.
3388 if (Is64Bit) {
3389 // Functions with nested arguments use R10, so it needs to be saved across
3390 // the call to _morestack
3391
3392 const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
3393 const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
3394 const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
3395 const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
3396
3397 if (IsNested)
3398 BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
3399
3400 BuildMI(allocMBB, DL, TII.get(getMOVriOpcode(IsLP64, StackSize)), Reg10)
3401 .addImm(StackSize);
3402 BuildMI(allocMBB, DL,
3404 Reg11)
3405 .addImm(X86FI->getArgumentStackSize());
3406 } else {
3407 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i))
3408 .addImm(X86FI->getArgumentStackSize());
3409 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i)).addImm(StackSize);
3410 }
3411
3412 // __morestack is in libgcc
3414 // Under the large code model, we cannot assume that __morestack lives
3415 // within 2^31 bytes of the call site, so we cannot use pc-relative
3416 // addressing. We cannot perform the call via a temporary register,
3417 // as the rax register may be used to store the static chain, and all
3418 // other suitable registers may be either callee-save or used for
3419 // parameter passing. We cannot use the stack at this point either
3420 // because __morestack manipulates the stack directly.
3421 //
3422 // To avoid these issues, perform an indirect call via a read-only memory
3423 // location containing the address.
3424 //
3425 // This solution is not perfect, as it assumes that the .rodata section
3426 // is laid out within 2^31 bytes of each function body, but this seems
3427 // to be sufficient for JIT.
3428 // FIXME: Add retpoline support and remove the error here..
3430 report_fatal_error("Emitting morestack calls on 64-bit with the large "
3431 "code model and thunks not yet implemented.");
3432 BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
3433 .addReg(X86::RIP)
3434 .addImm(0)
3435 .addReg(0)
3436 .addExternalSymbol("__morestack_addr")
3437 .addReg(0);
3438 } else {
3439 if (Is64Bit)
3440 BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
3441 .addExternalSymbol("__morestack");
3442 else
3443 BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
3444 .addExternalSymbol("__morestack");
3445 }
3446
3447 if (IsNested)
3448 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
3449 else
3450 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
3451
3452 allocMBB->addSuccessor(&PrologueMBB);
3453
3454 checkMBB->addSuccessor(allocMBB, BranchProbability::getZero());
3455 checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne());
3456
3457#ifdef EXPENSIVE_CHECKS
3458 MF.verify();
3459#endif
3460}
3461
3462/// Lookup an ERTS parameter in the !hipe.literals named metadata node.
3463/// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets
3464/// to fields it needs, through a named metadata node "hipe.literals" containing
3465/// name-value pairs.
3466static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD,
3467 const StringRef LiteralName) {
3468 for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) {
3469 MDNode *Node = HiPELiteralsMD->getOperand(i);
3470 if (Node->getNumOperands() != 2)
3471 continue;
3472 MDString *NodeName = dyn_cast<MDString>(Node->getOperand(0));
3473 ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Node->getOperand(1));
3474 if (!NodeName || !NodeVal)
3475 continue;
3476 ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(NodeVal->getValue());
3477 if (ValConst && NodeName->getString() == LiteralName) {
3478 return ValConst->getZExtValue();
3479 }
3480 }
3481
3482 report_fatal_error("HiPE literal " + LiteralName +
3483 " required but not provided");
3484}
3485
3486// Return true if there are no non-ehpad successors to MBB and there are no
3487// non-meta instructions between MBBI and MBB.end().
3490 return llvm::all_of(
3491 MBB.successors(),
3492 [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) &&
3493 std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) {
3494 return MI.isMetaInstruction();
3495 });
3496}
3497
3498/// Erlang programs may need a special prologue to handle the stack size they
3499/// might need at runtime. That is because Erlang/OTP does not implement a C
3500/// stack but uses a custom implementation of hybrid stack/heap architecture.
3501/// (for more information see Eric Stenman's Ph.D. thesis:
3502/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
3503///
3504/// CheckStack:
3505/// temp0 = sp - MaxStack
3506/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3507/// OldStart:
3508/// ...
3509/// IncStack:
3510/// call inc_stack # doubles the stack space
3511/// temp0 = sp - MaxStack
3512/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3514 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3515 MachineFrameInfo &MFI = MF.getFrameInfo();
3516 DebugLoc DL;
3517
3518 // To support shrink-wrapping we would need to insert the new blocks
3519 // at the right place and update the branches to PrologueMBB.
3520 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3521
3522 // HiPE-specific values
3523 NamedMDNode *HiPELiteralsMD =
3524 MF.getFunction().getParent()->getNamedMetadata("hipe.literals");
3525 if (!HiPELiteralsMD)
3527 "Can't generate HiPE prologue without runtime parameters");
3528 const unsigned HipeLeafWords = getHiPELiteral(
3529 HiPELiteralsMD, Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS");
3530 const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
3531 const unsigned Guaranteed = HipeLeafWords * SlotSize;
3532 unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs
3533 ? MF.getFunction().arg_size() - CCRegisteredArgs
3534 : 0;
3535 unsigned MaxStack = MFI.getStackSize() + CallerStkArity * SlotSize + SlotSize;
3536
3538 "HiPE prologue is only supported on Linux operating systems.");
3539
3540 // Compute the largest caller's frame that is needed to fit the callees'
3541 // frames. This 'MaxStack' is computed from:
3542 //
3543 // a) the fixed frame size, which is the space needed for all spilled temps,
3544 // b) outgoing on-stack parameter areas, and
3545 // c) the minimum stack space this function needs to make available for the
3546 // functions it calls (a tunable ABI property).
3547 if (MFI.hasCalls()) {
3548 unsigned MoreStackForCalls = 0;
3549
3550 for (auto &MBB : MF) {
3551 for (auto &MI : MBB) {
3552 if (!MI.isCall())
3553 continue;
3554
3555 // Get callee operand.
3556 const MachineOperand &MO = MI.getOperand(0);
3557
3558 // Only take account of global function calls (no closures etc.).
3559 if (!MO.isGlobal())
3560 continue;
3561
3562 const Function *F = dyn_cast<Function>(MO.getGlobal());
3563 if (!F)
3564 continue;
3565
3566 // Do not update 'MaxStack' for primitive and built-in functions
3567 // (encoded with names either starting with "erlang."/"bif_" or not
3568 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
3569 // "_", such as the BIF "suspend_0") as they are executed on another
3570 // stack.
3571 if (F->getName().contains("erlang.") || F->getName().contains("bif_") ||
3572 F->getName().find_first_of("._") == StringRef::npos)
3573 continue;
3574
3575 unsigned CalleeStkArity = F->arg_size() > CCRegisteredArgs
3576 ? F->arg_size() - CCRegisteredArgs
3577 : 0;
3578 if (HipeLeafWords - 1 > CalleeStkArity)
3579 MoreStackForCalls =
3580 std::max(MoreStackForCalls,
3581 (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
3582 }
3583 }
3584 MaxStack += MoreStackForCalls;
3585 }
3586
3587 // If the stack frame needed is larger than the guaranteed then runtime checks
3588 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
3589 if (MaxStack > Guaranteed) {
3590 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
3591 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
3592
3593 for (const auto &LI : PrologueMBB.liveins()) {
3594 stackCheckMBB->addLiveIn(LI);
3595 incStackMBB->addLiveIn(LI);
3596 }
3597
3598 MF.push_front(incStackMBB);
3599 MF.push_front(stackCheckMBB);
3600
3601 unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
3602 unsigned LEAop, CMPop, CALLop;
3603 SPLimitOffset = getHiPELiteral(HiPELiteralsMD, "P_NSP_LIMIT");
3604 if (Is64Bit) {
3605 SPReg = X86::RSP;
3606 PReg = X86::RBP;
3607 LEAop = X86::LEA64r;
3608 CMPop = X86::CMP64rm;
3609 CALLop = X86::CALL64pcrel32;
3610 } else {
3611 SPReg = X86::ESP;
3612 PReg = X86::EBP;
3613 LEAop = X86::LEA32r;
3614 CMPop = X86::CMP32rm;
3615 CALLop = X86::CALLpcrel32;
3616 }
3617
3618 ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3619 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3620 "HiPE prologue scratch register is live-in");
3621
3622 // Create new MBB for StackCheck:
3623 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3624 false, -MaxStack);
3625 // SPLimitOffset is in a fixed heap location (pointed by BP).
3626 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3627 PReg, false, SPLimitOffset);
3628 BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1))
3629 .addMBB(&PrologueMBB)
3631
3632 // Create new MBB for IncStack:
3633 BuildMI(incStackMBB, DL, TII.get(CALLop)).addExternalSymbol("inc_stack_0");
3634 addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3635 false, -MaxStack);
3636 addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3637 PReg, false, SPLimitOffset);
3638 BuildMI(incStackMBB, DL, TII.get(X86::JCC_1))
3639 .addMBB(incStackMBB)
3641
3642 stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
3643 stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
3644 incStackMBB->addSuccessor(&PrologueMBB, {99, 100});
3645 incStackMBB->addSuccessor(incStackMBB, {1, 100});
3646 }
3647#ifdef EXPENSIVE_CHECKS
3648 MF.verify();
3649#endif
3650}
3651
3652bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
3654 const DebugLoc &DL,
3655 int Offset) const {
3656 if (Offset <= 0)
3657 return false;
3658
3659 if (Offset % SlotSize)
3660 return false;
3661
3662 int NumPops = Offset / SlotSize;
3663 // This is only worth it if we have at most 2 pops.
3664 if (NumPops != 1 && NumPops != 2)
3665 return false;
3666
3667 // Handle only the trivial case where the adjustment directly follows
3668 // a call. This is the most common one, anyway.
3669 if (MBBI == MBB.begin())
3670 return false;
3671 MachineBasicBlock::iterator Prev = std::prev(MBBI);
3672 if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
3673 return false;
3674
3675 unsigned Regs[2];
3676 unsigned FoundRegs = 0;
3677
3679 const MachineOperand &RegMask = Prev->getOperand(1);
3680
3681 auto &RegClass =
3682 Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass;
3683 // Try to find up to NumPops free registers.
3684 for (auto Candidate : RegClass) {
3685 // Poor man's liveness:
3686 // Since we're immediately after a call, any register that is clobbered
3687 // by the call and not defined by it can be considered dead.
3688 if (!RegMask.clobbersPhysReg(Candidate))
3689 continue;
3690
3691 // Don't clobber reserved registers
3692 if (MRI.isReserved(Candidate))
3693 continue;
3694
3695 bool IsDef = false;
3696 for (const MachineOperand &MO : Prev->implicit_operands()) {
3697 if (MO.isReg() && MO.isDef() &&
3698 TRI->isSuperOrSubRegisterEq(MO.getReg(), Candidate)) {
3699 IsDef = true;
3700 break;
3701 }
3702 }
3703
3704 if (IsDef)
3705 continue;
3706
3707 Regs[FoundRegs++] = Candidate;
3708 if (FoundRegs == (unsigned)NumPops)
3709 break;
3710 }
3711
3712 if (FoundRegs == 0)
3713 return false;
3714
3715 // If we found only one free register, but need two, reuse the same one twice.
3716 while (FoundRegs < (unsigned)NumPops)
3717 Regs[FoundRegs++] = Regs[0];
3718
3719 for (int i = 0; i < NumPops; ++i)
3720 BuildMI(MBB, MBBI, DL, TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r),
3721 Regs[i]);
3722
3723 return true;
3724}
3725
3729 bool reserveCallFrame = hasReservedCallFrame(MF);
3730 unsigned Opcode = I->getOpcode();
3731 bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
3732 DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased.
3733 uint64_t Amount = TII.getFrameSize(*I);
3734 uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0;
3735 I = MBB.erase(I);
3736 auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
3737
3738 // Try to avoid emitting dead SP adjustments if the block end is unreachable,
3739 // typically because the function is marked noreturn (abort, throw,
3740 // assert_fail, etc).
3741 if (isDestroy && blockEndIsUnreachable(MBB, I))
3742 return I;
3743
3744 if (!reserveCallFrame) {
3745 // If the stack pointer can be changed after prologue, turn the
3746 // adjcallstackup instruction into a 'sub ESP, <amt>' and the
3747 // adjcallstackdown instruction into 'add ESP, <amt>'
3748
3749 // We need to keep the stack aligned properly. To do this, we round the
3750 // amount of space needed for the outgoing arguments up to the next
3751 // alignment boundary.
3752 Amount = alignTo(Amount, getStackAlign());
3753
3754 const Function &F = MF.getFunction();
3755 bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
3756 bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves();
3757
3758 // If we have any exception handlers in this function, and we adjust
3759 // the SP before calls, we may need to indicate this to the unwinder
3760 // using GNU_ARGS_SIZE. Note that this may be necessary even when
3761 // Amount == 0, because the preceding function may have set a non-0
3762 // GNU_ARGS_SIZE.
3763 // TODO: We don't need to reset this between subsequent functions,
3764 // if it didn't change.
3765 bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty();
3766
3767 if (HasDwarfEHHandlers && !isDestroy &&
3769 BuildCFI(MBB, InsertPos, DL,
3770 MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
3771
3772 if (Amount == 0)
3773 return I;
3774
3775 // Factor out the amount that gets handled inside the sequence
3776 // (Pushes of argument for frame setup, callee pops for frame destroy)
3777 Amount -= InternalAmt;
3778
3779 // TODO: This is needed only if we require precise CFA.
3780 // If this is a callee-pop calling convention, emit a CFA adjust for
3781 // the amount the callee popped.
3782 if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
3783 BuildCFI(MBB, InsertPos, DL,
3784 MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
3785
3786 // Add Amount to SP to destroy a frame, or subtract to setup.
3787 int64_t StackAdjustment = isDestroy ? Amount : -Amount;
3788
3789 if (StackAdjustment) {
3790 // Merge with any previous or following adjustment instruction. Note: the
3791 // instructions merged with here do not have CFI, so their stack
3792 // adjustments do not feed into CfaAdjustment.
3793 StackAdjustment += mergeSPUpdates(MBB, InsertPos, true);
3794 StackAdjustment += mergeSPUpdates(MBB, InsertPos, false);
3795
3796 if (StackAdjustment) {
3797 if (!(F.hasMinSize() &&
3798 adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
3799 BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
3800 /*InEpilogue=*/false);
3801 }
3802 }
3803
3804 if (DwarfCFI && !hasFP(MF)) {
3805 // If we don't have FP, but need to generate unwind information,
3806 // we need to set the correct CFA offset after the stack adjustment.
3807 // How much we adjust the CFA offset depends on whether we're emitting
3808 // CFI only for EH purposes or for debugging. EH only requires the CFA
3809 // offset to be correct at each call site, while for debugging we want
3810 // it to be more precise.
3811
3812 int64_t CfaAdjustment = -StackAdjustment;
3813 // TODO: When not using precise CFA, we also need to adjust for the
3814 // InternalAmt here.
3815 if (CfaAdjustment) {
3816 BuildCFI(
3817 MBB, InsertPos, DL,
3818 MCCFIInstruction::createAdjustCfaOffset(nullptr, CfaAdjustment));
3819 }
3820 }
3821
3822 return I;
3823 }
3824
3825 if (InternalAmt) {
3828 while (CI != B && !std::prev(CI)->isCall())
3829 --CI;
3830 BuildStackAdjustment(MBB, CI, DL, -InternalAmt, /*InEpilogue=*/false);
3831 }
3832
3833 return I;
3834}
3835
3837 assert(MBB.getParent() && "Block is not attached to a function!");
3838 const MachineFunction &MF = *MBB.getParent();
3839 if (!MBB.isLiveIn(X86::EFLAGS))
3840 return true;
3841
3842 // If stack probes have to loop inline or call, that will clobber EFLAGS.
3843 // FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock.
3845 const X86TargetLowering &TLI = *STI.getTargetLowering();
3846 if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF))
3847 return false;
3848
3850 return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext();
3851}
3852
3854 assert(MBB.getParent() && "Block is not attached to a function!");
3855
3856 // Win64 has strict requirements in terms of epilogue and we are
3857 // not taking a chance at messing with them.
3858 // I.e., unless this block is already an exit block, we can't use
3859 // it as an epilogue.
3860 if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
3861 return false;
3862
3863 // Swift async context epilogue has a BTR instruction that clobbers parts of
3864 // EFLAGS.
3865 const MachineFunction &MF = *MBB.getParent();
3868
3870 return true;
3871
3872 // If we cannot use LEA to adjust SP, we may need to use ADD, which
3873 // clobbers the EFLAGS. Check that we do not need to preserve it,
3874 // otherwise, conservatively assume this is not
3875 // safe to insert the epilogue here.
3877}
3878
3880 // If we may need to emit frameless compact unwind information, give
3881 // up as this is currently broken: PR25614.
3882 bool CompactUnwind =
3884 return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF) ||
3885 !CompactUnwind) &&
3886 // The lowering of segmented stack and HiPE only support entry
3887 // blocks as prologue blocks: PR26107. This limitation may be
3888 // lifted if we fix:
3889 // - adjustForSegmentedStacks
3890 // - adjustForHiPEPrologue
3892 !MF.shouldSplitStack();
3893}
3894
3897 const DebugLoc &DL, bool RestoreSP) const {
3898 assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
3899 assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
3900 assert(STI.is32Bit() && !Uses64BitFramePtr &&
3901 "restoring EBP/ESI on non-32-bit target");
3902
3903 MachineFunction &MF = *MBB.getParent();
3905 Register BasePtr = TRI->getBaseRegister();
3906 WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo();
3908 MachineFrameInfo &MFI = MF.getFrameInfo();
3909
3910 // FIXME: Don't set FrameSetup flag in catchret case.
3911
3912 int FI = FuncInfo.EHRegNodeFrameIndex;
3913 int EHRegSize = MFI.getObjectSize(FI);
3914
3915 if (RestoreSP) {
3916 // MOV32rm -EHRegSize(%ebp), %esp
3917 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP),
3918 X86::EBP, true, -EHRegSize)
3920 }
3921
3922 Register UsedReg;
3923 int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg).getFixed();
3924 int EndOffset = -EHRegOffset - EHRegSize;
3925 FuncInfo.EHRegNodeEndOffset = EndOffset;
3926
3927 if (UsedReg == FramePtr) {
3928 // ADD $offset, %ebp
3929 unsigned ADDri = getADDriOpcode(false);
3930 BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
3932 .addImm(EndOffset)
3934 ->getOperand(3)
3935 .setIsDead();
3936 assert(EndOffset >= 0 &&
3937 "end of registration object above normal EBP position!");
3938 } else if (UsedReg == BasePtr) {
3939 // LEA offset(%ebp), %esi
3940 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr),
3941 FramePtr, false, EndOffset)
3943 // MOV32rm SavedEBPOffset(%esi), %ebp
3944 assert(X86FI->getHasSEHFramePtrSave());
3945 int Offset =
3946 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
3947 .getFixed();
3948 assert(UsedReg == BasePtr);
3949 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr),
3950 UsedReg, true, Offset)
3952 } else {
3953 llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
3954 }
3955 return MBBI;
3956}
3957
3959 return TRI->getSlotSize();
3960}
3961
3964 return StackPtr;
3965}
3966
3970 Register FrameRegister = RI->getFrameRegister(MF);
3971 if (getInitialCFARegister(MF) == FrameRegister &&
3973 DwarfFrameBase FrameBase;
3974 FrameBase.Kind = DwarfFrameBase::CFA;
3975 FrameBase.Location.Offset =
3977 return FrameBase;
3978 }
3979
3980 return DwarfFrameBase{DwarfFrameBase::Register, {FrameRegister}};
3981}
3982
3983namespace {
3984// Struct used by orderFrameObjects to help sort the stack objects.
3985struct X86FrameSortingObject {
3986 bool IsValid = false; // true if we care about this Object.
3987 unsigned ObjectIndex = 0; // Index of Object into MFI list.
3988 unsigned ObjectSize = 0; // Size of Object in bytes.
3989 Align ObjectAlignment = Align(1); // Alignment of Object in bytes.
3990 unsigned ObjectNumUses = 0; // Object static number of uses.
3991};
3992
3993// The comparison function we use for std::sort to order our local
3994// stack symbols. The current algorithm is to use an estimated
3995// "density". This takes into consideration the size and number of
3996// uses each object has in order to roughly minimize code size.
3997// So, for example, an object of size 16B that is referenced 5 times
3998// will get higher priority than 4 4B objects referenced 1 time each.
3999// It's not perfect and we may be able to squeeze a few more bytes out of
4000// it (for example : 0(esp) requires fewer bytes, symbols allocated at the
4001// fringe end can have special consideration, given their size is less
4002// important, etc.), but the algorithmic complexity grows too much to be
4003// worth the extra gains we get. This gets us pretty close.
4004// The final order leaves us with objects with highest priority going
4005// at the end of our list.
4006struct X86FrameSortingComparator {
4007 inline bool operator()(const X86FrameSortingObject &A,
4008 const X86FrameSortingObject &B) const {
4009 uint64_t DensityAScaled, DensityBScaled;
4010
4011 // For consistency in our comparison, all invalid objects are placed
4012 // at the end. This also allows us to stop walking when we hit the
4013 // first invalid item after it's all sorted.
4014 if (!A.IsValid)
4015 return false;
4016 if (!B.IsValid)
4017 return true;
4018
4019 // The density is calculated by doing :
4020 // (double)DensityA = A.ObjectNumUses / A.ObjectSize
4021 // (double)DensityB = B.ObjectNumUses / B.ObjectSize
4022 // Since this approach may cause inconsistencies in
4023 // the floating point <, >, == comparisons, depending on the floating
4024 // point model with which the compiler was built, we're going
4025 // to scale both sides by multiplying with
4026 // A.ObjectSize * B.ObjectSize. This ends up factoring away
4027 // the division and, with it, the need for any floating point
4028 // arithmetic.
4029 DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) *
4030 static_cast<uint64_t>(B.ObjectSize);
4031 DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) *
4032 static_cast<uint64_t>(A.ObjectSize);
4033
4034 // If the two densities are equal, prioritize highest alignment
4035 // objects. This allows for similar alignment objects
4036 // to be packed together (given the same density).
4037 // There's room for improvement here, also, since we can pack
4038 // similar alignment (different density) objects next to each
4039 // other to save padding. This will also require further
4040 // complexity/iterations, and the overall gain isn't worth it,
4041 // in general. Something to keep in mind, though.
4042 if (DensityAScaled == DensityBScaled)
4043 return A.ObjectAlignment < B.ObjectAlignment;
4044
4045 return DensityAScaled < DensityBScaled;
4046 }
4047};
4048} // namespace
4049
4050// Order the symbols in the local stack.
4051// We want to place the local stack objects in some sort of sensible order.
4052// The heuristic we use is to try and pack them according to static number
4053// of uses and size of object in order to minimize code size.
4055 const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
4056 const MachineFrameInfo &MFI = MF.getFrameInfo();
4057
4058 // Don't waste time if there's nothing to do.
4059 if (ObjectsToAllocate.empty())
4060 return;
4061
4062 // Create an array of all MFI objects. We won't need all of these
4063 // objects, but we're going to create a full array of them to make
4064 // it easier to index into when we're counting "uses" down below.
4065 // We want to be able to easily/cheaply access an object by simply
4066 // indexing into it, instead of having to search for it every time.
4067 std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd());
4068
4069 // Walk the objects we care about and mark them as such in our working
4070 // struct.
4071 for (auto &Obj : ObjectsToAllocate) {
4072 SortingObjects[Obj].IsValid = true;
4073 SortingObjects[Obj].ObjectIndex = Obj;
4074 SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj);
4075 // Set the size.
4076 int ObjectSize = MFI.getObjectSize(Obj);
4077 if (ObjectSize == 0)
4078 // Variable size. Just use 4.
4079 SortingObjects[Obj].ObjectSize = 4;
4080 else
4081 SortingObjects[Obj].ObjectSize = ObjectSize;
4082 }
4083
4084 // Count the number of uses for each object.
4085 for (auto &MBB : MF) {
4086 for (auto &MI : MBB) {
4087 if (MI.isDebugInstr())
4088 continue;
4089 for (const MachineOperand &MO : MI.operands()) {
4090 // Check to see if it's a local stack symbol.
4091 if (!MO.isFI())
4092 continue;
4093 int Index = MO.getIndex();
4094 // Check to see if it falls within our range, and is tagged
4095 // to require ordering.
4096 if (Index >= 0 && Index < MFI.getObjectIndexEnd() &&
4097 SortingObjects[Index].IsValid)
4098 SortingObjects[Index].ObjectNumUses++;
4099 }
4100 }
4101 }
4102
4103 // Sort the objects using X86FrameSortingAlgorithm (see its comment for
4104 // info).
4105 llvm::stable_sort(SortingObjects, X86FrameSortingComparator());
4106
4107 // Now modify the original list to represent the final order that
4108 // we want. The order will depend on whether we're going to access them
4109 // from the stack pointer or the frame pointer. For SP, the list should
4110 // end up with the END containing objects that we want with smaller offsets.
4111 // For FP, it should be flipped.
4112 int i = 0;
4113 for (auto &Obj : SortingObjects) {
4114 // All invalid items are sorted at the end, so it's safe to stop.
4115 if (!Obj.IsValid)
4116 break;
4117 ObjectsToAllocate[i++] = Obj.ObjectIndex;
4118 }
4119
4120 // Flip it if we're accessing off of the FP.
4121 if (!TRI->hasStackRealignment(MF) && hasFP(MF))
4122 std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end());
4123}
4124
4125unsigned
4127 // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
4128 unsigned Offset = 16;
4129 // RBP is immediately pushed.
4130 Offset += SlotSize;
4131 // All callee-saved registers are then pushed.
4132 Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
4133 // Every funclet allocates enough stack space for the largest outgoing call.
4134 Offset += getWinEHFuncletFrameSize(MF);
4135 return Offset;
4136}
4137
4139 MachineFunction &MF, RegScavenger *RS) const {
4140 // Mark the function as not having WinCFI. We will set it back to true in
4141 // emitPrologue if it gets called and emits CFI.
4142 MF.setHasWinCFI(false);
4143
4144 MachineFrameInfo &MFI = MF.getFrameInfo();
4145 // If the frame is big enough that we might need to scavenge a register to
4146 // handle huge offsets, reserve a stack slot for that now.
4147 if (!isInt<32>(MFI.estimateStackSize(MF))) {
4148 int FI = MFI.CreateStackObject(SlotSize, Align(SlotSize), false);
4150 }
4151
4152 // If we are using Windows x64 CFI, ensure that the stack is always 8 byte
4153 // aligned. The format doesn't support misaligned stack adjustments.
4156
4157 // If this function isn't doing Win64-style C++ EH, we don't need to do
4158 // anything.
4159 if (STI.is64Bit() && MF.hasEHFunclets() &&
4162 adjustFrameForMsvcCxxEh(MF);
4163 }
4164}
4165
4166void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const {
4167 // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
4168 // relative to RSP after the prologue. Find the offset of the last fixed
4169 // object, so that we can allocate a slot immediately following it. If there
4170 // were no fixed objects, use offset -SlotSize, which is immediately after the
4171 // return address. Fixed objects have negative frame indices.
4172 MachineFrameInfo &MFI = MF.getFrameInfo();
4173 WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
4174 int64_t MinFixedObjOffset = -SlotSize;
4175 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I)
4176 MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
4177
4178 for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
4179 for (WinEHHandlerType &H : TBME.HandlerArray) {
4180 int FrameIndex = H.CatchObj.FrameIndex;
4181 if (FrameIndex != INT_MAX) {
4182 // Ensure alignment.
4183 unsigned Align = MFI.getObjectAlign(FrameIndex).value();
4184 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align;
4185 MinFixedObjOffset -= MFI.getObjectSize(FrameIndex);
4186 MFI.setObjectOffset(FrameIndex, MinFixedObjOffset);
4187 }
4188 }
4189 }
4190
4191 // Ensure alignment.
4192 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8;
4193 int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
4194 int UnwindHelpFI =
4195 MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false);
4196 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
4197
4198 // Store -2 into UnwindHelp on function entry. We have to scan forwards past
4199 // other frame setup instructions.
4200 MachineBasicBlock &MBB = MF.front();
4201 auto MBBI = MBB.begin();
4202 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
4203 ++MBBI;
4204
4206 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
4207 UnwindHelpFI)
4208 .addImm(-2);
4209}
4210
4212 MachineFunction &MF, RegScavenger *RS) const {
4213 auto *X86FI = MF.getInfo<X86MachineFunctionInfo>();
4214
4215 if (STI.is32Bit() && MF.hasEHFunclets())
4217 // We have emitted prolog and epilog. Don't need stack pointer saving
4218 // instruction any more.
4219 if (MachineInstr *MI = X86FI->getStackPtrSaveMI()) {
4220 MI->eraseFromParent();
4221 X86FI->setStackPtrSaveMI(nullptr);
4222 }
4223}
4224
4226 MachineFunction &MF) const {
4227 // 32-bit functions have to restore stack pointers when control is transferred
4228 // back to the parent function. These blocks are identified as eh pads that
4229 // are not funclet entries.
4230 bool IsSEH = isAsynchronousEHPersonality(
4232 for (MachineBasicBlock &MBB : MF) {
4233 bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry();
4234 if (NeedsRestore)
4236 /*RestoreSP=*/IsSEH);
4237 }
4238}
4239
4240// Compute the alignment gap between current SP after spilling FP/BP and the
4241// next properly aligned stack offset.
4243 const TargetRegisterClass *RC,
4244 unsigned NumSpilledRegs) {
4246 unsigned AllocSize = TRI->getSpillSize(*RC) * NumSpilledRegs;
4247 Align StackAlign = MF.getSubtarget().getFrameLowering()->getStackAlign();
4248 unsigned AlignedSize = alignTo(AllocSize, StackAlign);
4249 return AlignedSize - AllocSize;
4250}
4251
4252void X86FrameLowering::spillFPBPUsingSP(MachineFunction &MF,
4254 Register FP, Register BP,
4255 int SPAdjust) const {
4256 assert(FP.isValid() || BP.isValid());
4257
4258 MachineBasicBlock *MBB = BeforeMI->getParent();
4259 DebugLoc DL = BeforeMI->getDebugLoc();
4260
4261 // Spill FP.
4262 if (FP.isValid()) {
4263 BuildMI(*MBB, BeforeMI, DL,
4265 .addReg(FP);
4266 }
4267
4268 // Spill BP.
4269 if (BP.isValid()) {
4270 BuildMI(*MBB, BeforeMI, DL,
4272 .addReg(BP);
4273 }
4274
4275 // Make sure SP is aligned.
4276 if (SPAdjust)
4277 emitSPUpdate(*MBB, BeforeMI, DL, -SPAdjust, false);
4278
4279 // Emit unwinding information.
4280 if (FP.isValid() && needsDwarfCFI(MF)) {
4281 // Emit .cfi_remember_state to remember old frame.
4282 unsigned CFIIndex =
4284 BuildMI(*MBB, BeforeMI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
4285 .addCFIIndex(CFIIndex);
4286
4287 // Setup new CFA value with DW_CFA_def_cfa_expression:
4288 // DW_OP_breg7+offset, DW_OP_deref, DW_OP_consts 16, DW_OP_plus
4289 SmallString<64> CfaExpr;
4290 uint8_t buffer[16];
4291 int Offset = SPAdjust;
4292 if (BP.isValid())
4293 Offset += TRI->getSpillSize(*TRI->getMinimalPhysRegClass(BP));
4294 // If BeforeMI is a frame setup instruction, we need to adjust the position
4295 // and offset of the new cfi instruction.
4296 if (TII.isFrameSetup(*BeforeMI)) {
4297 Offset += alignTo(TII.getFrameSize(*BeforeMI), getStackAlign());
4298 BeforeMI = std::next(BeforeMI);
4299 }
4301 if (STI.isTarget64BitILP32())
4303 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackPtr, true);
4304 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfStackPtr));
4305 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
4306 CfaExpr.push_back(dwarf::DW_OP_deref);
4307 CfaExpr.push_back(dwarf::DW_OP_consts);
4308 CfaExpr.append(buffer, buffer + encodeSLEB128(SlotSize * 2, buffer));
4309 CfaExpr.push_back((uint8_t)dwarf::DW_OP_plus);
4310
4311 SmallString<64> DefCfaExpr;
4312 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
4313 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
4314 DefCfaExpr.append(CfaExpr.str());
4315 BuildCFI(*MBB, BeforeMI, DL,
4316 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
4318 }
4319}
4320
4321void X86FrameLowering::restoreFPBPUsingSP(MachineFunction &MF,
4323 Register FP, Register BP,
4324 int SPAdjust) const {
4325 assert(FP.isValid() || BP.isValid());
4326
4327 // Adjust SP so it points to spilled FP or BP.
4328 MachineBasicBlock *MBB = AfterMI->getParent();
4329 MachineBasicBlock::iterator Pos = std::next(AfterMI);
4330 DebugLoc DL = AfterMI->getDebugLoc();
4331 if (SPAdjust)
4332 emitSPUpdate(*MBB, Pos, DL, SPAdjust, false);
4333
4334 // Restore BP.
4335 if (BP.isValid()) {
4336 BuildMI(*MBB, Pos, DL,
4337 TII.get(getPOPOpcode(MF.getSubtarget<X86Subtarget>())), BP);
4338 }
4339
4340 // Restore FP.
4341 if (FP.isValid()) {
4342 BuildMI(*MBB, Pos, DL,
4344
4345 // Emit unwinding information.
4346 if (needsDwarfCFI(MF)) {
4347 // Restore original frame with .cfi_restore_state.
4348 unsigned CFIIndex =
4350 BuildMI(*MBB, Pos, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
4351 .addCFIIndex(CFIIndex);
4352 }
4353 }
4354}
4355
4356void X86FrameLowering::saveAndRestoreFPBPUsingSP(
4358 MachineBasicBlock::iterator AfterMI, bool SpillFP, bool SpillBP) const {
4359 assert(SpillFP || SpillBP);
4360
4361 Register FP, BP;
4362 const TargetRegisterClass *RC;
4363 unsigned NumRegs = 0;
4364
4365 if (SpillFP) {
4366 FP = TRI->getFrameRegister(MF);
4367 if (STI.isTarget64BitILP32())
4369 RC = TRI->getMinimalPhysRegClass(FP);
4370 ++NumRegs;
4371 }
4372 if (SpillBP) {
4373 BP = TRI->getBaseRegister();
4374 if (STI.isTarget64BitILP32())
4375 BP = Register(getX86SubSuperRegister(BP, 64));
4376 RC = TRI->getMinimalPhysRegClass(BP);
4377 ++NumRegs;
4378 }
4379 int SPAdjust = computeFPBPAlignmentGap(MF, RC, NumRegs);
4380
4381 spillFPBPUsingSP(MF, BeforeMI, FP, BP, SPAdjust);
4382 restoreFPBPUsingSP(MF, AfterMI, FP, BP, SPAdjust);
4383}
4384
4385bool X86FrameLowering::skipSpillFPBP(
4387 if (MI->getOpcode() == X86::LCMPXCHG16B_SAVE_RBX) {
4388 // The pseudo instruction LCMPXCHG16B_SAVE_RBX is generated in the form
4389 // SaveRbx = COPY RBX
4390 // SaveRbx = LCMPXCHG16B_SAVE_RBX ..., SaveRbx, implicit-def rbx
4391 // And later LCMPXCHG16B_SAVE_RBX is expanded to restore RBX from SaveRbx.
4392 // We should skip this instruction sequence.
4393 int FI;
4394 unsigned Reg;
4395 while (!(MI->getOpcode() == TargetOpcode::COPY &&
4396 MI->getOperand(1).getReg() == X86::RBX) &&
4397 !((Reg = TII.isStoreToStackSlot(*MI, FI)) && Reg == X86::RBX))
4398 ++MI;
4399 return true;
4400 }
4401 return false;
4402}
4403
4405 const TargetRegisterInfo *TRI, bool &AccessFP,
4406 bool &AccessBP) {
4407 AccessFP = AccessBP = false;
4408 if (FP) {
4409 if (MI.findRegisterUseOperandIdx(FP, TRI, false) != -1 ||
4410 MI.findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
4411 AccessFP = true;
4412 }
4413 if (BP) {
4414 if (MI.findRegisterUseOperandIdx(BP, TRI, false) != -1 ||
4415 MI.findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
4416 AccessBP = true;
4417 }
4418 return AccessFP || AccessBP;
4419}
4420
4421// Invoke instruction has been lowered to normal function call. We try to figure
4422// out if MI comes from Invoke.
4423// Do we have any better method?
4424static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels) {
4425 if (!MI.isCall())
4426 return false;
4427 if (InsideEHLabels)
4428 return true;
4429
4430 const MachineBasicBlock *MBB = MI.getParent();
4431 if (!MBB->hasEHPadSuccessor())
4432 return false;
4433
4434 // Check if there is another call instruction from MI to the end of MBB.
4436 for (++MBBI; MBBI != ME; ++MBBI)
4437 if (MBBI->isCall())
4438 return false;
4439 return true;
4440}
4441
4442/// If a function uses base pointer and the base pointer is clobbered by inline
4443/// asm, RA doesn't detect this case, and after the inline asm, the base pointer
4444/// contains garbage value.
4445/// For example if a 32b x86 function uses base pointer esi, and esi is
4446/// clobbered by following inline asm
4447/// asm("rep movsb" : "+D"(ptr), "+S"(x), "+c"(c)::"memory");
4448/// We need to save esi before the asm and restore it after the asm.
4449///
4450/// The problem can also occur to frame pointer if there is a function call, and
4451/// the callee uses a different calling convention and clobbers the fp.
4452///
4453/// Because normal frame objects (spill slots) are accessed through fp/bp
4454/// register, so we can't spill fp/bp to normal spill slots.
4455///
4456/// FIXME: There are 2 possible enhancements:
4457/// 1. In many cases there are different physical registers not clobbered by
4458/// inline asm, we can use one of them as base pointer. Or use a virtual
4459/// register as base pointer and let RA allocate a physical register to it.
4460/// 2. If there is no other instructions access stack with fp/bp from the
4461/// inline asm to the epilog, and no cfi requirement for a correct fp, we can
4462/// skip the save and restore operations.
4464 Register FP, BP;
4466 if (TFI.hasFP(MF))
4467 FP = TRI->getFrameRegister(MF);
4468 if (TRI->hasBasePointer(MF))
4469 BP = TRI->getBaseRegister();
4470
4471 // Currently only inline asm and function call can clobbers fp/bp. So we can
4472 // do some quick test and return early.
4473 if (!MF.hasInlineAsm()) {
4475 if (!X86FI->getFPClobberedByCall())
4476 FP = 0;
4477 if (!X86FI->getBPClobberedByCall())
4478 BP = 0;
4479 }
4480 if (!FP && !BP)
4481 return;
4482
4483 for (MachineBasicBlock &MBB : MF) {
4484 bool InsideEHLabels = false;
4485 auto MI = MBB.rbegin(), ME = MBB.rend();
4486 auto TermMI = MBB.getFirstTerminator();
4487 if (TermMI != MBB.begin())
4488 MI = *(std::prev(TermMI));
4489
4490 while (MI != ME) {
4491 // Skip frame setup/destroy instructions.
4492 // Skip Invoke (call inside try block) instructions.
4493 // Skip instructions handled by target.
4494 if (MI->getFlag(MachineInstr::MIFlag::FrameSetup) ||
4496 isInvoke(*MI, InsideEHLabels) || skipSpillFPBP(MF, MI)) {
4497 ++MI;
4498 continue;
4499 }
4500
4501 if (MI->getOpcode() == TargetOpcode::EH_LABEL) {
4502 InsideEHLabels = !InsideEHLabels;
4503 ++MI;
4504 continue;
4505 }
4506
4507 bool AccessFP, AccessBP;
4508 // Check if fp or bp is used in MI.
4509 if (!isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)) {
4510 ++MI;
4511 continue;
4512 }
4513
4514 // Look for the range [DefMI, KillMI] in which fp or bp is defined and
4515 // used.
4516 bool FPLive = false, BPLive = false;
4517 bool SpillFP = false, SpillBP = false;
4518 auto DefMI = MI, KillMI = MI;
4519 do {
4520 SpillFP |= AccessFP;
4521 SpillBP |= AccessBP;
4522
4523 // Maintain FPLive and BPLive.
4524 if (FPLive && MI->findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
4525 FPLive = false;
4526 if (FP && MI->findRegisterUseOperandIdx(FP, TRI, false) != -1)
4527 FPLive = true;
4528 if (BPLive && MI->findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
4529 BPLive = false;
4530 if (BP && MI->findRegisterUseOperandIdx(BP, TRI, false) != -1)
4531 BPLive = true;
4532
4533 DefMI = MI++;
4534 } while ((MI != ME) &&
4535 (FPLive || BPLive ||
4536 isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)));
4537
4538 // Don't need to save/restore if FP is accessed through llvm.frameaddress.
4539 if (FPLive && !SpillBP)
4540 continue;
4541
4542 // If the bp is clobbered by a call, we should save and restore outside of
4543 // the frame setup instructions.
4544 if (KillMI->isCall() && DefMI != ME) {
4546 auto FrameSetup = std::next(DefMI);
4547 // Look for frame setup instruction toward the start of the BB.
4548 // If we reach another call instruction, it means no frame setup
4549 // instruction for the current call instruction.
4550 while (FrameSetup != ME && !TII.isFrameSetup(*FrameSetup) &&
4551 !FrameSetup->isCall())
4552 ++FrameSetup;
4553 // If a frame setup instruction is found, we need to find out the
4554 // corresponding frame destroy instruction.
4555 if (FrameSetup != ME && TII.isFrameSetup(*FrameSetup)) {
4556 while (!TII.isFrameInstr(*KillMI))
4557 --KillMI;
4558 DefMI = FrameSetup;
4559 MI = DefMI;
4560 ++MI;
4561 }
4562 }
4563
4564 // Call target function to spill and restore FP and BP registers.
4565 saveAndRestoreFPBPUsingSP(MF, &(*DefMI), &(*KillMI), SpillFP, SpillBP);
4566 }
4567 }
4568}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool isFuncletReturnInstr(const MachineInstr &MI)
static const uint64_t kSplitStackAvailable
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Given that RA is a live value
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static cl::opt< int > PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), cl::init(4096), cl::Hidden)
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static bool isTailCallOpcode(unsigned Opc)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Module.h This file contains the declarations for the Module class.
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static bool is64Bit(const char *name)
static unsigned calculateSetFPREG(uint64_t SPAdjust)
static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary)
GetScratchRegister - Get a temp register for performing work in the segmented stack and the Erlang/Hi...
static unsigned getADDriOpcode(bool IsLP64)
static unsigned getPUSH2Opcode(const X86Subtarget &ST)
static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm)
static unsigned getLEArOpcode(bool IsLP64)
static unsigned getSUBriOpcode(bool IsLP64)
static bool flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB)
Check if the flags need to be preserved before the terminators.
static bool isFPBPAccess(const MachineInstr &MI, Register FP, Register BP, const TargetRegisterInfo *TRI, bool &AccessFP, bool &AccessBP)
static bool isOpcodeRep(unsigned Opcode)
Return true if an opcode is part of the REP group of instructions.
static unsigned getANDriOpcode(bool IsLP64, int64_t Imm)
static bool isEAXLiveIn(MachineBasicBlock &MBB)
static int computeFPBPAlignmentGap(MachineFunction &MF, const TargetRegisterClass *RC, unsigned NumSpilledRegs)
static unsigned getADDrrOpcode(bool IsLP64)
static bool HasNestArgument(const MachineFunction *MF)
static unsigned getPOPOpcode(const X86Subtarget &ST)
static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels)
static unsigned getPOP2Opcode(const X86Subtarget &ST)
static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD, const StringRef LiteralName)
Lookup an ERTS parameter in the !hipe.literals named metadata node.
static bool blockEndIsUnreachable(const MachineBasicBlock &MBB, MachineBasicBlock::const_iterator MBBI)
static unsigned getSUBrrOpcode(bool IsLP64)
static unsigned getPUSHOpcode(const X86Subtarget &ST)
static const unsigned FramePtr
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
reverse_iterator rend() const
Definition: ArrayRef.h:157
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
reverse_iterator rbegin() const
Definition: ArrayRef.h:156
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
BitVector & reset()
Definition: BitVector.h:392
BitVector & set()
Definition: BitVector.h:351
iterator_range< const_set_bits_iterator > set_bits() const
Definition: BitVector.h:140
static BranchProbability getOne()
static BranchProbability getZero()
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:281
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition: Function.h:903
Constant * getPersonalityFn() const
Get the personality function associated with this function.
Definition: Function.cpp:1993
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:357
size_t arg_size() const
Definition: Function.h:899
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition: Function.h:680
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:232
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:743
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:759
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:565
static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int64_t Size, SMLoc Loc={})
A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE.
Definition: MCDwarf.h:670
static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_restore says that the rule for Register is now the same as it was at the beginning of the functi...
Definition: MCDwarf.h:633
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:558
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:600
static MCCFIInstruction createRememberState(MCSymbol *L, SMLoc Loc={})
.cfi_remember_state Save all current rules for all registers.
Definition: MCDwarf.h:653
OpType getOperation() const
Definition: MCDwarf.h:680
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:573
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition: MCDwarf.h:664
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int64_t Adjustment, SMLoc Loc={})
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Definition: MCDwarf.h:581
static MCCFIInstruction createRestoreState(MCSymbol *L, SMLoc Loc={})
.cfi_restore_state Restore the previously saved state.
Definition: MCDwarf.h:658
const MCObjectFileInfo * getObjectFileInfo() const
Definition: MCContext.h:416
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:414
MCSection * getCompactUnwindSection() const
MCRegAliasIterator enumerates all registers aliasing Reg.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
Metadata node.
Definition: Metadata.h:1069
A single uniqued string.
Definition: Metadata.h:720
StringRef getString() const
Definition: Metadata.cpp:616
Machine Value Type.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
reverse_iterator rend()
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isReturnBlock() const
Convenience function that returns true if the block ends in a return instruction.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
reverse_iterator rbegin()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
@ LQR_Live
Register is known to be (at least partially) live.
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
bool isCleanupFuncletEntry() const
Returns true if this is the entry block of a cleanup funclet.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool adjustsStack() const
Return true if this function adjusts the stack – e.g., when calling another function.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void ensureMaxAlignment(Align Alignment)
Make sure the function is at least Align bytes aligned.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
void setObjectOffset(int ObjectIdx, int64_t SPOffset)
Set the stack frame offset of the specified object.
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool hasOpaqueSPAdjustment() const
Returns true if the function contains opaque dynamic stack adjustments.
void setCVBytesOfCalleeSavedRegisters(unsigned S)
int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
uint64_t estimateStackSize(const MachineFunction &MF) const
Estimate and return the size of the stack frame.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasCopyImplyingStackAdjustment() const
Returns true if the function contains operations which will lower down to instructions which manipula...
bool hasStackObjects() const
Return true if there are any stack objects in this function.
int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset, bool IsImmutable=false)
Create a spill slot at a fixed location on the stack.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
void setStackSize(uint64_t Size)
Set the size of the stack.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
void setOffsetAdjustment(int64_t Adj)
Set the correction for frame offsets.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
bool hasInlineAsm() const
Returns true if the function contains any inline assembly.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
bool needsFrameMoves() const
True if this function needs frame moves for debug or exceptions.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
bool callsUnwindInit() const
void push_front(MachineBasicBlock *MBB)
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const std::vector< LandingPadInfo > & getLandingPads() const
Return a reference to the landing pad info for the current function.
bool shouldSplitStack() const
Should we be emitting segmented stack stuff for the function.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:572
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:498
unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
bool isLiveIn(Register Reg) const
NamedMDNode * getNamedMetadata(StringRef Name) const
Return the first NamedMDNode in the module with the specified name.
Definition: Module.cpp:262
unsigned getCodeViewFlag() const
Returns the CodeView Version by checking module flags.
Definition: Module.cpp:562
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
iterator end() const
Definition: ArrayRef.h:357
iterator begin() const
Definition: ArrayRef.h:356
A tuple of MDNodes.
Definition: Metadata.h:1730
MDNode * getOperand(unsigned i) const
Definition: Metadata.cpp:1388
unsigned getNumOperands() const
Definition: Metadata.cpp:1384
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:65
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:254
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:49
static StackOffset getFixed(int64_t Fixed)
Definition: TypeSize.h:42
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
static constexpr size_t npos
Definition: StringRef.h:52
Information about stack frame layout on the target.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register.
int getOffsetOfLocalArea() const
getOffsetOfLocalArea - This method returns the offset of the local area from the stack pointer on ent...
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
const Triple & getTargetTriple() const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
SwiftAsyncFramePointerMode SwiftAsyncFramePointer
Control when and how the Swift async frame pointer bit should be set.
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual Register getFrameRegister(const MachineFunction &MF) const =0
Debug information queries.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
bool isOSWindows() const
Tests whether the OS is Windows.
Definition: Triple.h:624
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Definition: Triple.h:558
Value wrapper in the Metadata hierarchy.
Definition: Metadata.h:450
Value * getValue() const
Definition: Metadata.h:490
bool has128ByteRedZone(const MachineFunction &MF) const
Return true if the function has a redzone (accessible bytes past the frame of the top of stack functi...
void spillFPBP(MachineFunction &MF) const override
If a function uses base pointer and the base pointer is clobbered by inline asm, RA doesn't detect th...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
bool needsFrameIndexResolution(const MachineFunction &MF) const override
X86FrameLowering(const X86Subtarget &STI, MaybeAlign StackAlignOverride)
const X86RegisterInfo * TRI
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
MachineBasicBlock::iterator restoreWin32EHStackPointers(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool RestoreSP=false) const
Sets up EBP and optionally ESI based on the incoming EBP value.
int getInitialCFAOffset(const MachineFunction &MF) const override
Return initial CFA offset value i.e.
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a prologue for the target.
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
void emitStackProbe(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, std::optional< MachineFunction::DebugInstrOperandPair > InstrNum=std::nullopt) const
Emit target stack probe code.
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsPrologue) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
StackOffset getFrameIndexReferenceSP(const MachineFunction &MF, int FI, Register &SPReg, int Adjustment) const
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const override
Replace a StackProbe inline-stub with the actual probe code inline.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
const X86InstrInfo & TII
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &DL, int64_t NumBytes, bool InEpilogue) const
Emit a series of instructions to increment / decrement the stack pointer by a constant value.
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a epilogue for the target.
bool Is64Bit
Is64Bit implies that x86_64 instructions are available.
Register getInitialCFARegister(const MachineFunction &MF) const override
Return initial CFA register value i.e.
bool Uses64BitFramePtr
True if the 64-bit frame or stack pointer should be used.
unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Adjust the prologue to have the function use segmented stacks.
DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const override
Return the frame base information to be encoded in the DWARF subprogram debug info.
void emitCalleeSavedFrameMovesFullCFA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const override
Emits Dwarf Info specifying offsets of callee saved registers and frame pointer.
int getWin64EHFrameIndexRef(const MachineFunction &MF, int FI, Register &SPReg) const
bool canUseLEAForSPInEpilogue(const MachineFunction &MF) const
Check that LEA can be used on SP in an epilogue sequence for MF.
bool stackProbeFunctionModifiesSP() const override
Does the stack probe function call return with a modified stack pointer?
void orderFrameObjects(const MachineFunction &MF, SmallVectorImpl< int > &ObjectsToAllocate) const override
Order the symbols in the local stack.
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst, MachineInstr::MIFlag Flag=MachineInstr::NoFlags) const
Wraps up getting a CFI index and building a MachineInstr for it.
int mergeSPUpdates(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, bool doMergeWithPrevious) const
Check the instruction before/after the passed instruction.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
StackOffset getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, Register &FrameReg, bool IgnoreSPUpdates) const override
Same as getFrameIndexReference, except that the stack pointer (as opposed to the frame pointer) will ...
void restoreWinEHStackPointersInParent(MachineFunction &MF) const
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
void adjustForHiPEPrologue(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Erlang programs may need a special prologue to handle the stack size they might need at runtime.
const X86Subtarget & STI
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects=true) const override
int64_t getFrameAdjustment(const MachineInstr &I) const
Returns the stack pointer adjustment that happens inside the frame setup..destroy sequence (e....
Definition: X86InstrInfo.h:215
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
bool isCandidateForPush2Pop2(Register Reg) const
void setRestoreBasePointer(const MachineFunction *MF)
DenseMap< int, unsigned > & getWinEHXMMSlotInfo()
MachineInstr * getStackPtrSaveMI() const
AMXProgModelEnum getAMXProgModel() const
void addCandidateForPush2Pop2(Register Reg)
void setStackPtrSaveMI(MachineInstr *MI)
void setCalleeSavedFrameSize(unsigned bytes)
bool hasBasePointer(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const
findDeadCallerSavedReg - Return a caller-saved register that isn't live when it reaches the "return" ...
Register getStackRegister() const
unsigned getSlotSize() const
Register getFramePtr() const
Returns physical register used as frame pointer.
Register getBaseRegister() const
bool isOSWindows() const
Definition: X86Subtarget.h:322
const X86TargetLowering * getTargetLowering() const override
Definition: X86Subtarget.h:118
bool isTargetDragonFly() const
Definition: X86Subtarget.h:282
bool isTargetWindowsMSVC() const
Definition: X86Subtarget.h:300
bool isTarget64BitILP32() const
Is this x86_64 with the ILP32 programming model (x32 ABI)?
Definition: X86Subtarget.h:173
bool isTargetDarwin() const
Definition: X86Subtarget.h:280
bool isTargetWin64() const
Definition: X86Subtarget.h:324
bool isTarget64BitLP64() const
Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
Definition: X86Subtarget.h:178
bool swiftAsyncContextIsDynamicallySet() const
Return whether FrameLowering should always set the "extended frame present" bit in FP,...
Definition: X86Subtarget.h:386
bool isTargetWindowsCoreCLR() const
Definition: X86Subtarget.h:304
const X86InstrInfo * getInstrInfo() const override
Definition: X86Subtarget.h:122
bool isCallingConvWin64(CallingConv::ID CC) const
Definition: X86Subtarget.h:337
bool isTargetFreeBSD() const
Definition: X86Subtarget.h:281
bool isTargetNaCl64() const
Definition: X86Subtarget.h:296
bool isTargetWin32() const
Definition: X86Subtarget.h:326
bool useIndirectThunkCalls() const
Definition: X86Subtarget.h:218
bool isTargetLinux() const
Definition: X86Subtarget.h:290
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
StringRef getStackProbeSymbolName(const MachineFunction &MF) const override
Returns the name of the symbol used to emit stack probes or the empty string if not applicable.
bool hasStackProbeSymbol(const MachineFunction &MF) const override
Returns true if stack probing through a function call is requested.
unsigned getStackProbeSize(const MachineFunction &MF) const
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint16_t StackAdjustment(const RuntimeFunction &RF)
StackAdjustment - calculated stack adjustment in words.
Definition: ARMWinEH.h:199
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition: CallingConv.h:53
@ X86_INTR
x86 hardware interrupt context.
Definition: CallingConv.h:173
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ X86_FastCall
'fast' analog of X86_StdCall.
Definition: CallingConv.h:103
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:387
CallingConvention
Definition: Dwarf.h:738
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
void stable_sort(R &&Range)
Definition: STLExtras.h:2020
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
@ DwarfCFI
DWARF-like instruction based exceptions.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:555
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
static const MachineInstrBuilder & addRegOffset(const MachineInstrBuilder &MIB, unsigned Reg, bool isKill, int Offset)
addRegOffset - This function is used to add a memory reference of the form [Reg + Offset],...
@ Always
Always set the bit.
@ DeploymentBased
Determine whether to set the bit statically or dynamically based on the deployment target.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a SLEB128 value to an output stream.
Definition: LEB128.h:23
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1928
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition: LEB128.h:80
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
Definition: LivePhysRegs.h:215
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Pair of physical register and lane mask.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
union llvm::TargetFrameLowering::DwarfFrameBase::@246 Location
enum llvm::TargetFrameLowering::DwarfFrameBase::FrameBaseKind Kind
SmallVector< WinEHTryBlockMapEntry, 4 > TryBlockMap
Definition: WinEHFuncInfo.h:97
SmallVector< WinEHHandlerType, 1 > HandlerArray
Definition: WinEHFuncInfo.h:76