LLVM 23.0.0git
X86FrameLowering.cpp
Go to the documentation of this file.
1//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86FrameLowering.h"
15#include "X86InstrBuilder.h"
16#include "X86InstrInfo.h"
18#include "X86Subtarget.h"
19#include "X86TargetMachine.h"
20#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/DataLayout.h"
31#include "llvm/IR/Function.h"
32#include "llvm/IR/Module.h"
33#include "llvm/MC/MCAsmInfo.h"
35#include "llvm/MC/MCSymbol.h"
36#include "llvm/Support/LEB128.h"
38#include <cstdlib>
39
40#define DEBUG_TYPE "x86-fl"
41
42STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
43STATISTIC(NumFrameExtraProbe,
44 "Number of extra stack probes generated in prologue");
45STATISTIC(NumFunctionUsingPush2Pop2, "Number of functions using push2/pop2");
46
47using namespace llvm;
48
49static const TargetRegisterClass *
51 const TargetRegisterInfo &TRI) {
52 if (X86::VK16RegClass.contains(Reg))
53 return STI.hasBWI() ? &X86::VK64RegClass : &X86::VK16RegClass;
54 return TRI.getMinimalPhysRegClass(Reg);
55}
56
58 MaybeAlign StackAlignOverride)
59 : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(),
60 STI.is64Bit() ? -8 : -4),
61 STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
62 // Cache a bunch of frame-related predicates for this subtarget.
63 SlotSize = TRI->getSlotSize();
64 assert(SlotSize == 4 || SlotSize == 8);
65 Is64Bit = STI.is64Bit();
66 IsLP64 = STI.isTarget64BitLP64();
67 // standard x86_64 uses 64-bit frame/stack pointers, x32 - 32-bit.
68 Uses64BitFramePtr = STI.isTarget64BitLP64();
69 StackPtr = TRI->getStackRegister();
70}
71
73 return !MF.getFrameInfo().hasVarSizedObjects() &&
74 !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
75 !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
76}
77
78/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
79/// call frame pseudos can be simplified. Having a FP, as in the default
80/// implementation, is not sufficient here since we can't always use it.
81/// Use a more nuanced condition.
83 const MachineFunction &MF) const {
84 return hasReservedCallFrame(MF) ||
85 MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
86 (hasFP(MF) && !TRI->hasStackRealignment(MF)) ||
87 TRI->hasBasePointer(MF);
88}
89
90// needsFrameIndexResolution - Do we need to perform FI resolution for
91// this function. Normally, this is required only when the function
92// has any stack objects. However, FI resolution actually has another job,
93// not apparent from the title - it resolves callframesetup/destroy
94// that were not simplified earlier.
95// So, this is required for x86 functions that have push sequences even
96// when there are no stack objects.
98 const MachineFunction &MF) const {
99 return MF.getFrameInfo().hasStackObjects() ||
100 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
101}
102
103/// hasFPImpl - Return true if the specified function should have a dedicated
104/// frame pointer register. This is true if the function has variable sized
105/// allocas or if frame pointer elimination is disabled.
107 const MachineFrameInfo &MFI = MF.getFrameInfo();
108 return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
109 TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
113 MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
114 MFI.hasStackMap() || MFI.hasPatchPoint() ||
115 (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment()));
116}
117
118static unsigned getSUBriOpcode(bool IsLP64) {
119 return IsLP64 ? X86::SUB64ri32 : X86::SUB32ri;
120}
121
122static unsigned getADDriOpcode(bool IsLP64) {
123 return IsLP64 ? X86::ADD64ri32 : X86::ADD32ri;
124}
125
126static unsigned getSUBrrOpcode(bool IsLP64) {
127 return IsLP64 ? X86::SUB64rr : X86::SUB32rr;
128}
129
130static unsigned getADDrrOpcode(bool IsLP64) {
131 return IsLP64 ? X86::ADD64rr : X86::ADD32rr;
132}
133
134static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
135 return IsLP64 ? X86::AND64ri32 : X86::AND32ri;
136}
137
138static unsigned getLEArOpcode(bool IsLP64) {
139 return IsLP64 ? X86::LEA64r : X86::LEA32r;
140}
141
142// Push-Pop Acceleration (PPX) hint is used to indicate that the POP reads the
143// value written by the PUSH from the stack. The processor tracks these marked
144// instructions internally and fast-forwards register data between matching PUSH
145// and POP instructions, without going through memory or through the training
146// loop of the Fast Store Forwarding Predictor (FSFP). Instead, a more efficient
147// memory-renaming optimization can be used.
148//
149// The PPX hint is purely a performance hint. Instructions with this hint have
150// the same functional semantics as those without. PPX hints set by the
151// compiler that violate the balancing rule may turn off the PPX optimization,
152// but they will not affect program semantics.
153//
154// Hence, PPX is used for balanced spill/reloads (Exceptions and setjmp/longjmp
155// are not considered).
156//
157// PUSH2 and POP2 are instructions for (respectively) pushing/popping 2
158// GPRs at a time to/from the stack.
159static unsigned getPUSHOpcode(const X86Subtarget &ST) {
160 return ST.is64Bit() ? (ST.hasPPX() ? X86::PUSHP64r : X86::PUSH64r)
161 : X86::PUSH32r;
162}
163static unsigned getPOPOpcode(const X86Subtarget &ST) {
164 return ST.is64Bit() ? (ST.hasPPX() ? X86::POPP64r : X86::POP64r)
165 : X86::POP32r;
166}
167static unsigned getPUSH2Opcode(const X86Subtarget &ST) {
168 return ST.hasPPX() ? X86::PUSH2P : X86::PUSH2;
169}
170static unsigned getPOP2Opcode(const X86Subtarget &ST) {
171 return ST.hasPPX() ? X86::POP2P : X86::POP2;
172}
173
175 for (MachineBasicBlock::RegisterMaskPair RegMask : MBB.liveins()) {
176 MCRegister Reg = RegMask.PhysReg;
177
178 if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
179 Reg == X86::AH || Reg == X86::AL)
180 return true;
181 }
182
183 return false;
184}
185
186/// Check if the flags need to be preserved before the terminators.
187/// This would be the case, if the eflags is live-in of the region
188/// composed by the terminators or live-out of that region, without
189/// being defined by a terminator.
190static bool
192 for (const MachineInstr &MI : MBB.terminators()) {
193 bool BreakNext = false;
194 for (const MachineOperand &MO : MI.operands()) {
195 if (!MO.isReg())
196 continue;
197 Register Reg = MO.getReg();
198 if (Reg != X86::EFLAGS)
199 continue;
200
201 // This terminator needs an eflags that is not defined
202 // by a previous another terminator:
203 // EFLAGS is live-in of the region composed by the terminators.
204 if (!MO.isDef())
205 return true;
206 // This terminator defines the eflags, i.e., we don't need to preserve it.
207 // However, we still need to check this specific terminator does not
208 // read a live-in value.
209 BreakNext = true;
210 }
211 // We found a definition of the eflags, no need to preserve them.
212 if (BreakNext)
213 return false;
214 }
215
216 // None of the terminators use or define the eflags.
217 // Check if they are live-out, that would imply we need to preserve them.
218 for (const MachineBasicBlock *Succ : MBB.successors())
219 if (Succ->isLiveIn(X86::EFLAGS))
220 return true;
221
222 return false;
223}
224
225constexpr uint64_t MaxSPChunk = (1ULL << 31) - 1;
226
227/// emitSPUpdate - Emit a series of instructions to increment / decrement the
228/// stack pointer by a constant value.
231 const DebugLoc &DL, int64_t NumBytes,
232 bool InEpilogue) const {
233 bool isSub = NumBytes < 0;
234 uint64_t Offset = isSub ? -NumBytes : NumBytes;
237
239 // We're being asked to adjust a 32-bit stack pointer by 4 GiB or more.
240 // This might be unreachable code, so don't complain now; just trap if
241 // it's reached at runtime.
242 BuildMI(MBB, MBBI, DL, TII.get(X86::TRAP));
243 return;
244 }
245
246 MachineFunction &MF = *MBB.getParent();
248 const X86TargetLowering &TLI = *STI.getTargetLowering();
249 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
250
251 // It's ok to not take into account large chunks when probing, as the
252 // allocation is split in smaller chunks anyway.
253 if (EmitInlineStackProbe && !InEpilogue) {
254
255 // This pseudo-instruction is going to be expanded, potentially using a
256 // loop, by inlineStackProbe().
257 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)).addImm(Offset);
258 return;
259 } else if (Offset > MaxSPChunk) {
260 // Rather than emit a long series of instructions for large offsets,
261 // load the offset into a register and do one sub/add
262 unsigned Reg = 0;
263 unsigned Rax = (unsigned)(Uses64BitFramePtr ? X86::RAX : X86::EAX);
264
265 if (isSub && !isEAXLiveIn(MBB))
266 Reg = Rax;
267 else
268 Reg = getX86SubSuperRegister(TRI->findDeadCallerSavedReg(MBB, MBBI),
269 Uses64BitFramePtr ? 64 : 32);
270
271 unsigned AddSubRROpc = isSub ? getSUBrrOpcode(Uses64BitFramePtr)
273 if (Reg) {
274 BuildMI(MBB, MBBI, DL,
276 .addImm(Offset)
277 .setMIFlag(Flag);
278 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr)
280 .addReg(Reg);
281 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
282 return;
283 } else if (Offset > 8 * MaxSPChunk) {
284 // If we would need more than 8 add or sub instructions (a >16GB stack
285 // frame), it's worth spilling RAX to materialize this immediate.
286 // pushq %rax
287 // movabsq +-$Offset+-SlotSize, %rax
288 // addq %rsp, %rax
289 // xchg %rax, (%rsp)
290 // movq (%rsp), %rsp
291 assert(Uses64BitFramePtr && "can't have 32-bit 16GB stack frame");
292 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
294 .setMIFlag(Flag);
295 // Subtract is not commutative, so negate the offset and always use add.
296 // Subtract 8 less and add 8 more to account for the PUSH we just did.
297 if (isSub)
298 Offset = -(Offset - SlotSize);
299 else
301 BuildMI(MBB, MBBI, DL,
303 .addImm(Offset)
304 .setMIFlag(Flag);
305 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax)
306 .addReg(Rax)
308 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
309 // Exchange the new SP in RAX with the top of the stack.
311 BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax),
312 StackPtr, false, 0);
313 // Load new SP from the top of the stack into RSP.
314 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr),
315 StackPtr, false, 0);
316 return;
317 }
318 }
319
320 while (Offset) {
321 if (Offset == SlotSize) {
322 // Use push / pop for slot sized adjustments as a size optimization. We
323 // need to find a dead register when using pop.
324 unsigned Reg = isSub ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
325 : TRI->findDeadCallerSavedReg(MBB, MBBI);
326 if (Reg) {
327 unsigned Opc = isSub ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
328 : (Is64Bit ? X86::POP64r : X86::POP32r);
329 BuildMI(MBB, MBBI, DL, TII.get(Opc))
330 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub))
331 .setMIFlag(Flag);
332 return;
333 }
334 }
335
336 uint64_t ThisVal = std::min(Offset, MaxSPChunk);
337
338 BuildStackAdjustment(MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue)
339 .setMIFlag(Flag);
340
341 Offset -= ThisVal;
342 }
343}
344
345MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
347 const DebugLoc &DL, int64_t Offset, bool InEpilogue) const {
348 assert(Offset != 0 && "zero offset stack adjustment requested");
349
350 // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue
351 // is tricky.
352 bool UseLEA;
353 if (!InEpilogue) {
354 // Check if inserting the prologue at the beginning
355 // of MBB would require to use LEA operations.
356 // We need to use LEA operations if EFLAGS is live in, because
357 // it means an instruction will read it before it gets defined.
358 UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS);
359 } else {
360 // If we can use LEA for SP but we shouldn't, check that none
361 // of the terminators uses the eflags. Otherwise we will insert
362 // a ADD that will redefine the eflags and break the condition.
363 // Alternatively, we could move the ADD, but this may not be possible
364 // and is an optimization anyway.
365 UseLEA = canUseLEAForSPInEpilogue(*MBB.getParent());
366 if (UseLEA && !STI.useLeaForSP())
368 // If that assert breaks, that means we do not do the right thing
369 // in canUseAsEpilogue.
371 "We shouldn't have allowed this insertion point");
372 }
373
374 MachineInstrBuilder MI;
375 // Use an NF (no-flags) variant as a smaller replacement for LEA when EFLAGS
376 // must be preserved (i.e. only when we would otherwise emit LEA). If EFLAGS
377 // is dead we prefer the plain SUB/ADD, which is shorter than the EVEX-encoded
378 // NF form. The NF stack-adjust opcodes below are 64-bit (SUB64ri32_NF/
379 // ADD64ri32_NF), so don't use them for the x32 ABI where the stack pointer is
380 // 32-bit. NF cannot reach a Win64 epilogue (which never uses LEA for the SP
381 // adjustment unless it has a frame pointer, and that path doesn't go through
382 // here), so the Windows epilogue unwinder never sees an undisassemblable NF
383 // add/sub.
384 bool UseNF = UseLEA && STI.hasNF() && Uses64BitFramePtr;
385 bool IsSub = Offset < 0;
386 uint64_t AbsOffset = IsSub ? -Offset : Offset;
387 if (UseNF) {
388 const unsigned Opc = IsSub ? X86::SUB64ri32_NF : X86::ADD64ri32_NF;
389 MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
391 .addImm(AbsOffset);
392 // NF instructions define no EFLAGS, so there is nothing to mark dead.
393 } else if (UseLEA) {
396 StackPtr),
397 StackPtr, false, Offset);
398 } else {
399 const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr)
401 MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
403 .addImm(AbsOffset);
404 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
405 }
406 return MI;
407}
408
409template <typename FoundT, typename CalcT>
410int64_t X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
412 FoundT FoundStackAdjust,
413 CalcT CalcNewOffset,
414 bool doMergeWithPrevious) const {
415 if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
416 (!doMergeWithPrevious && MBBI == MBB.end()))
417 return CalcNewOffset(0);
418
419 MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
420
422 // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI
423 // instruction, and that there are no DBG_VALUE or other instructions between
424 // ADD/SUB/LEA and its corresponding CFI instruction.
425 /* TODO: Add support for the case where there are multiple CFI instructions
426 below the ADD/SUB/LEA, e.g.:
427 ...
428 add
429 cfi_def_cfa_offset
430 cfi_offset
431 ...
432 */
433 if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
434 PI = std::prev(PI);
435
436 int64_t Offset = 0;
437 for (;;) {
438 unsigned Opc = PI->getOpcode();
439
440 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri ||
441 Opc == X86::ADD64ri32_NF) &&
442 PI->getOperand(0).getReg() == StackPtr) {
443 assert(PI->getOperand(1).getReg() == StackPtr);
444 Offset = PI->getOperand(2).getImm();
445 } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
446 PI->getOperand(0).getReg() == StackPtr &&
447 PI->getOperand(1).getReg() == StackPtr &&
448 PI->getOperand(2).getImm() == 1 &&
449 PI->getOperand(3).getReg() == X86::NoRegister &&
450 PI->getOperand(5).getReg() == X86::NoRegister) {
451 // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
452 Offset = PI->getOperand(4).getImm();
453 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri ||
454 Opc == X86::SUB64ri32_NF) &&
455 PI->getOperand(0).getReg() == StackPtr) {
456 assert(PI->getOperand(1).getReg() == StackPtr);
457 Offset = -PI->getOperand(2).getImm();
458 } else
459 return CalcNewOffset(0);
460
461 FoundStackAdjust(PI, Offset);
462 if ((uint64_t)std::abs((int64_t)CalcNewOffset(Offset)) < MaxSPChunk)
463 break;
464
465 if (doMergeWithPrevious ? (PI == MBB.begin()) : (PI == MBB.end()))
466 return CalcNewOffset(0);
467
468 PI = doMergeWithPrevious ? std::prev(PI) : std::next(PI);
469 }
470
471 PI = MBB.erase(PI);
472 if (PI != MBB.end() && PI->isCFIInstruction()) {
473 auto CIs = MBB.getParent()->getFrameInstructions();
474 MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()];
477 PI = MBB.erase(PI);
478 }
479 if (!doMergeWithPrevious)
481
482 return CalcNewOffset(Offset);
483}
484
487 int64_t AddOffset,
488 bool doMergeWithPrevious) const {
489 return mergeSPUpdates(
490 MBB, MBBI, [AddOffset](int64_t Offset) { return AddOffset + Offset; },
491 doMergeWithPrevious);
492}
493
496 const DebugLoc &DL,
497 const MCCFIInstruction &CFIInst,
498 MachineInstr::MIFlag Flag) const {
499 MachineFunction &MF = *MBB.getParent();
500 unsigned CFIIndex = MF.addFrameInst(CFIInst);
501
503 MF.getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true);
504
505 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
506 .addCFIIndex(CFIIndex)
507 .setMIFlag(Flag);
508}
509
510/// Emits Dwarf Info specifying offsets of callee saved registers and
511/// frame pointer. This is called only when basic block sections are enabled.
514 MachineFunction &MF = *MBB.getParent();
515 if (!hasFP(MF)) {
517 return;
518 }
519 const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo();
520 const Register FramePtr = TRI->getFrameRegister(MF);
521 const Register MachineFramePtr =
522 STI.isTarget64BitILP32() ? Register(getX86SubSuperRegister(FramePtr, 64))
523 : FramePtr;
524 unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true);
525 // Offset = space for return address + size of the frame pointer itself.
526 int64_t Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4);
528 MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset));
530}
531
534 const DebugLoc &DL, bool IsPrologue) const {
535 MachineFunction &MF = *MBB.getParent();
536 MachineFrameInfo &MFI = MF.getFrameInfo();
537 const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo();
539
540 // Add callee saved registers to move list.
541 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
542
543 // Calculate offsets.
544 for (const CalleeSavedInfo &I : CSI) {
545 int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
546 MCRegister Reg = I.getReg();
547 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
548
549 if (IsPrologue) {
550 if (X86FI->getStackPtrSaveMI()) {
551 // +2*SlotSize because there is return address and ebp at the bottom
552 // of the stack.
553 // | retaddr |
554 // | ebp |
555 // | |<--ebp
556 Offset += 2 * SlotSize;
557 SmallString<64> CfaExpr;
558 CfaExpr.push_back(dwarf::DW_CFA_expression);
559 uint8_t buffer[16];
560 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
561 CfaExpr.push_back(2);
562 Register FramePtr = TRI->getFrameRegister(MF);
563 const Register MachineFramePtr =
564 STI.isTarget64BitILP32()
566 : FramePtr;
567 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
568 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
569 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
571 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
573 } else {
575 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
576 }
577 } else {
579 MCCFIInstruction::createRestore(nullptr, DwarfReg));
580 }
581 }
582 if (auto *MI = X86FI->getStackPtrSaveMI()) {
583 int FI = MI->getOperand(1).getIndex();
584 int64_t Offset = MFI.getObjectOffset(FI) + 2 * SlotSize;
585 SmallString<64> CfaExpr;
586 Register FramePtr = TRI->getFrameRegister(MF);
587 const Register MachineFramePtr =
588 STI.isTarget64BitILP32()
590 : FramePtr;
591 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
592 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
593 uint8_t buffer[16];
594 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
595 CfaExpr.push_back(dwarf::DW_OP_deref);
596
597 SmallString<64> DefCfaExpr;
598 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
599 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
600 DefCfaExpr.append(CfaExpr.str());
601 // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref
603 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
605 }
606}
607
608void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
609 MachineBasicBlock &MBB) const {
610 const MachineFunction &MF = *MBB.getParent();
611
612 // Insertion point.
613 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
614
615 // Fake a debug loc.
616 DebugLoc DL;
617 if (MBBI != MBB.end())
618 DL = MBBI->getDebugLoc();
619
620 // Zero out FP stack if referenced. Do this outside of the loop below so that
621 // it's done only once.
622 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
623 for (MCRegister Reg : RegsToZero.set_bits()) {
624 if (!X86::RFP80RegClass.contains(Reg))
625 continue;
626
627 unsigned NumFPRegs = ST.is64Bit() ? 8 : 7;
628 for (unsigned i = 0; i != NumFPRegs; ++i)
629 BuildMI(MBB, MBBI, DL, TII.get(X86::LD_F0));
630
631 for (unsigned i = 0; i != NumFPRegs; ++i)
632 BuildMI(MBB, MBBI, DL, TII.get(X86::ST_FPrr)).addReg(X86::ST0);
633 break;
634 }
635
636 // For GPRs, we only care to clear out the 32-bit register.
637 BitVector GPRsToZero(TRI->getNumRegs());
638 for (MCRegister Reg : RegsToZero.set_bits())
639 if (TRI->isGeneralPurposeRegister(MF, Reg)) {
640 GPRsToZero.set(getX86SubSuperRegister(Reg, 32));
641 RegsToZero.reset(Reg);
642 }
643
644 // Zero out the GPRs first.
645 for (MCRegister Reg : GPRsToZero.set_bits())
646 TII.buildClearRegister(Reg, MBB, MBBI, DL);
647
648 // Zero out the remaining registers.
649 for (MCRegister Reg : RegsToZero.set_bits())
650 TII.buildClearRegister(Reg, MBB, MBBI, DL);
651}
652
655 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
656 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
658 if (STI.isTargetWindowsCoreCLR()) {
659 if (InProlog) {
660 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING))
661 .addImm(0 /* no explicit stack size */);
662 } else {
663 emitStackProbeInline(MF, MBB, MBBI, DL, false);
664 }
665 } else {
666 emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum);
667 }
668}
669
671 return STI.isOSWindows() && !STI.isTargetWin64();
672}
673
675 MachineBasicBlock &PrologMBB) const {
676 auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
677 return MI.getOpcode() == X86::STACKALLOC_W_PROBING;
678 });
679 if (Where != PrologMBB.end()) {
680 DebugLoc DL = PrologMBB.findDebugLoc(Where);
681 emitStackProbeInline(MF, PrologMBB, Where, DL, true);
682 Where->eraseFromParent();
683 }
684}
685
686void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
689 const DebugLoc &DL,
690 bool InProlog) const {
692 if (STI.isTargetWindowsCoreCLR() && STI.is64Bit())
693 emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog);
694 else
695 emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog);
696}
697
698void X86FrameLowering::emitStackProbeInlineGeneric(
700 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
701 MachineInstr &AllocWithProbe = *MBBI;
702 uint64_t Offset = AllocWithProbe.getOperand(0).getImm();
703
706 assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&
707 "different expansion expected for CoreCLR 64 bit");
708
709 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
710 uint64_t ProbeChunk = StackProbeSize * 8;
711
712 uint64_t MaxAlign =
713 TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
714
715 // Synthesize a loop or unroll it, depending on the number of iterations.
716 // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
717 // between the unaligned rsp and current rsp.
718 if (Offset > ProbeChunk) {
719 emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
720 MaxAlign % StackProbeSize);
721 } else {
722 emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
723 MaxAlign % StackProbeSize);
724 }
725}
726
727void X86FrameLowering::emitStackProbeInlineGenericBlock(
730 uint64_t AlignOffset) const {
731
732 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
733 const bool HasFP = hasFP(MF);
734 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
735 const X86TargetLowering &TLI = *STI.getTargetLowering();
736 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
737 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
738
739 uint64_t CurrentOffset = 0;
740
741 assert(AlignOffset < StackProbeSize);
742
743 // If the offset is so small it fits within a page, there's nothing to do.
744 if (StackProbeSize < Offset + AlignOffset) {
745
746 uint64_t StackAdjustment = StackProbeSize - AlignOffset;
747 BuildStackAdjustment(MBB, MBBI, DL, -StackAdjustment, /*InEpilogue=*/false)
748 .setMIFlag(MachineInstr::FrameSetup);
749 if (!HasFP && NeedsDwarfCFI) {
750 BuildCFI(
751 MBB, MBBI, DL,
752 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
753 }
754
755 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
757 StackPtr, false, 0)
758 .addImm(0)
760 NumFrameExtraProbe++;
761 CurrentOffset = StackProbeSize - AlignOffset;
762 }
763
764 // For the next N - 1 pages, just probe. I tried to take advantage of
765 // natural probes but it implies much more logic and there was very few
766 // interesting natural probes to interleave.
767 while (CurrentOffset + StackProbeSize < Offset) {
768 BuildStackAdjustment(MBB, MBBI, DL, -StackProbeSize, /*InEpilogue=*/false)
769 .setMIFlag(MachineInstr::FrameSetup);
770
771 if (!HasFP && NeedsDwarfCFI) {
772 BuildCFI(
773 MBB, MBBI, DL,
774 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize));
775 }
776 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
778 StackPtr, false, 0)
779 .addImm(0)
781 NumFrameExtraProbe++;
782 CurrentOffset += StackProbeSize;
783 }
784
785 // No need to probe the tail, it is smaller than a Page.
786 uint64_t ChunkSize = Offset - CurrentOffset;
787 if (ChunkSize == SlotSize) {
788 // Use push for slot sized adjustments as a size optimization,
789 // like emitSPUpdate does when not probing.
790 unsigned Reg = Is64Bit ? X86::RAX : X86::EAX;
791 unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
792 BuildMI(MBB, MBBI, DL, TII.get(Opc))
795 } else {
796 BuildStackAdjustment(MBB, MBBI, DL, -ChunkSize, /*InEpilogue=*/false)
797 .setMIFlag(MachineInstr::FrameSetup);
798 }
799 // No need to adjust Dwarf CFA offset here, the last position of the stack has
800 // been defined
801}
802
803void X86FrameLowering::emitStackProbeInlineGenericLoop(
806 uint64_t AlignOffset) const {
807 assert(Offset && "null offset");
808
809 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
811 "Inline stack probe loop will clobber live EFLAGS.");
812
813 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
814 const bool HasFP = hasFP(MF);
815 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
816 const X86TargetLowering &TLI = *STI.getTargetLowering();
817 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
818 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
819
820 if (AlignOffset) {
821 if (AlignOffset < StackProbeSize) {
822 // Perform a first smaller allocation followed by a probe.
823 BuildStackAdjustment(MBB, MBBI, DL, -AlignOffset, /*InEpilogue=*/false)
824 .setMIFlag(MachineInstr::FrameSetup);
825
826 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
828 StackPtr, false, 0)
829 .addImm(0)
831 NumFrameExtraProbe++;
832 Offset -= AlignOffset;
833 }
834 }
835
836 // Synthesize a loop
837 NumFrameLoopProbe++;
838 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
839
840 MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB);
841 MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB);
842
844 MF.insert(MBBIter, testMBB);
845 MF.insert(MBBIter, tailMBB);
846
847 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
848 : Is64Bit ? X86::R11D
849 : X86::EAX;
850
851 // save loop bound
852 {
853 const uint64_t BoundOffset = alignDown(Offset, StackProbeSize);
854
855 // Can we calculate the loop bound using SUB with a 32-bit immediate?
856 // Note that the immediate gets sign-extended when used with a 64-bit
857 // register, so in that case we only have 31 bits to work with.
858 bool canUseSub =
859 Uses64BitFramePtr ? isUInt<31>(BoundOffset) : isUInt<32>(BoundOffset);
860
861 if (canUseSub) {
862 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
863
864 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
867 BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
868 .addReg(FinalStackProbed)
869 .addImm(BoundOffset)
871 } else if (Uses64BitFramePtr) {
872 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), FinalStackProbed)
873 .addImm(-BoundOffset)
875 BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), FinalStackProbed)
876 .addReg(FinalStackProbed)
879 } else {
880 llvm_unreachable("Offset too large for 32-bit stack pointer");
881 }
882
883 // while in the loop, use loop-invariant reg for CFI,
884 // instead of the stack pointer, which changes during the loop
885 if (!HasFP && NeedsDwarfCFI) {
886 // x32 uses the same DWARF register numbers as x86-64,
887 // so there isn't a register number for r11d, we must use r11 instead
888 const Register DwarfFinalStackProbed =
889 STI.isTarget64BitILP32()
890 ? Register(getX86SubSuperRegister(FinalStackProbed, 64))
891 : FinalStackProbed;
892
895 nullptr, TRI->getDwarfRegNum(DwarfFinalStackProbed, true)));
897 MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset));
898 }
899 }
900
901 // allocate a page
902 BuildStackAdjustment(*testMBB, testMBB->end(), DL, -StackProbeSize,
903 /*InEpilogue=*/false)
904 .setMIFlag(MachineInstr::FrameSetup);
905
906 // touch the page
907 addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
909 StackPtr, false, 0)
910 .addImm(0)
912
913 // cmp with stack pointer bound
914 BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
916 .addReg(FinalStackProbed)
918
919 // jump
920 BuildMI(testMBB, DL, TII.get(X86::JCC_1))
921 .addMBB(testMBB)
924 testMBB->addSuccessor(testMBB);
925 testMBB->addSuccessor(tailMBB);
926
927 // BB management
928 tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end());
930 MBB.addSuccessor(testMBB);
931
932 // handle tail
933 const uint64_t TailOffset = Offset % StackProbeSize;
934 MachineBasicBlock::iterator TailMBBIter = tailMBB->begin();
935 if (TailOffset) {
936 BuildStackAdjustment(*tailMBB, TailMBBIter, DL, -TailOffset,
937 /*InEpilogue=*/false)
938 .setMIFlag(MachineInstr::FrameSetup);
939 }
940
941 // after the loop, switch back to stack pointer for CFI
942 if (!HasFP && NeedsDwarfCFI) {
943 // x32 uses the same DWARF register numbers as x86-64,
944 // so there isn't a register number for esp, we must use rsp instead
945 const Register DwarfStackPtr =
946 STI.isTarget64BitILP32()
949
950 BuildCFI(*tailMBB, TailMBBIter, DL,
952 nullptr, TRI->getDwarfRegNum(DwarfStackPtr, true)));
953 }
954
955 // Update Live In information
956 fullyRecomputeLiveIns({tailMBB, testMBB});
957}
958
959void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
961 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
962 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
963 assert(STI.is64Bit() && "different expansion needed for 32 bit");
964 assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
965 const TargetInstrInfo &TII = *STI.getInstrInfo();
966 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
967
968 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
970 "Inline stack probe loop will clobber live EFLAGS.");
971
972 // RAX contains the number of bytes of desired stack adjustment.
973 // The handling here assumes this value has already been updated so as to
974 // maintain stack alignment.
975 //
976 // We need to exit with RSP modified by this amount and execute suitable
977 // page touches to notify the OS that we're growing the stack responsibly.
978 // All stack probing must be done without modifying RSP.
979 //
980 // MBB:
981 // SizeReg = RAX;
982 // ZeroReg = 0
983 // CopyReg = RSP
984 // Flags, TestReg = CopyReg - SizeReg
985 // FinalReg = !Flags.Ovf ? TestReg : ZeroReg
986 // LimitReg = gs magic thread env access
987 // if FinalReg >= LimitReg goto ContinueMBB
988 // RoundBB:
989 // RoundReg = page address of FinalReg
990 // LoopMBB:
991 // LoopReg = PHI(LimitReg,ProbeReg)
992 // ProbeReg = LoopReg - PageSize
993 // [ProbeReg] = 0
994 // if (ProbeReg > RoundReg) goto LoopMBB
995 // ContinueMBB:
996 // RSP = RSP - RAX
997 // [rest of original MBB]
998
999 // Set up the new basic blocks
1000 MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB);
1001 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
1002 MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB);
1003
1004 MachineFunction::iterator MBBIter = std::next(MBB.getIterator());
1005 MF.insert(MBBIter, RoundMBB);
1006 MF.insert(MBBIter, LoopMBB);
1007 MF.insert(MBBIter, ContinueMBB);
1008
1009 // Split MBB and move the tail portion down to ContinueMBB.
1010 MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI);
1011 ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end());
1012 ContinueMBB->transferSuccessorsAndUpdatePHIs(&MBB);
1013
1014 // Some useful constants
1015 const int64_t ThreadEnvironmentStackLimit = 0x10;
1016 const int64_t PageSize = 0x1000;
1017 const int64_t PageMask = ~(PageSize - 1);
1018
1019 // Registers we need. For the normal case we use virtual
1020 // registers. For the prolog expansion we use RAX, RCX and RDX.
1021 MachineRegisterInfo &MRI = MF.getRegInfo();
1022 const TargetRegisterClass *RegClass = &X86::GR64RegClass;
1023 const Register
1024 SizeReg = InProlog ? X86::RAX : MRI.createVirtualRegister(RegClass),
1025 ZeroReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
1026 CopyReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1027 TestReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1028 FinalReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1029 RoundedReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1030 LimitReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
1031 JoinReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
1032 ProbeReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass);
1033
1034 // SP-relative offsets where we can save RCX and RDX.
1035 int64_t RCXShadowSlot = 0;
1036 int64_t RDXShadowSlot = 0;
1037
1038 // If inlining in the prolog, save RCX and RDX.
1039 if (InProlog) {
1040 // Compute the offsets. We need to account for things already
1041 // pushed onto the stack at this point: return address, frame
1042 // pointer (if used), and callee saves.
1043 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
1044 const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
1045 const bool HasFP = hasFP(MF);
1046
1047 // Check if we need to spill RCX and/or RDX.
1048 // Here we assume that no earlier prologue instruction changes RCX and/or
1049 // RDX, so checking the block live-ins is enough.
1050 const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX);
1051 const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX);
1052 int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
1053 // Assign the initial slot to both registers, then change RDX's slot if both
1054 // need to be spilled.
1055 if (IsRCXLiveIn)
1056 RCXShadowSlot = InitSlot;
1057 if (IsRDXLiveIn)
1058 RDXShadowSlot = InitSlot;
1059 if (IsRDXLiveIn && IsRCXLiveIn)
1060 RDXShadowSlot += 8;
1061 // Emit the saves if needed.
1062 if (IsRCXLiveIn)
1063 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
1064 RCXShadowSlot)
1065 .addReg(X86::RCX);
1066 if (IsRDXLiveIn)
1067 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
1068 RDXShadowSlot)
1069 .addReg(X86::RDX);
1070 } else {
1071 // Not in the prolog. Copy RAX to a virtual reg.
1072 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
1073 }
1074
1075 // Add code to MBB to check for overflow and set the new target stack pointer
1076 // to zero if so.
1077 BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg)
1078 .addReg(ZeroReg, RegState::Undef)
1079 .addReg(ZeroReg, RegState::Undef);
1080 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP);
1081 BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
1082 .addReg(CopyReg)
1083 .addReg(SizeReg);
1084 BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg)
1085 .addReg(TestReg)
1086 .addReg(ZeroReg)
1088
1089 // FinalReg now holds final stack pointer value, or zero if
1090 // allocation would overflow. Compare against the current stack
1091 // limit from the thread environment block. Note this limit is the
1092 // lowest touched page on the stack, not the point at which the OS
1093 // will cause an overflow exception, so this is just an optimization
1094 // to avoid unnecessarily touching pages that are below the current
1095 // SP but already committed to the stack by the OS.
1096 BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg)
1097 .addReg(0)
1098 .addImm(1)
1099 .addReg(0)
1100 .addImm(ThreadEnvironmentStackLimit)
1101 .addReg(X86::GS);
1102 BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
1103 // Jump if the desired stack pointer is at or above the stack limit.
1104 BuildMI(&MBB, DL, TII.get(X86::JCC_1))
1105 .addMBB(ContinueMBB)
1107
1108 // Add code to roundMBB to round the final stack pointer to a page boundary.
1109 if (InProlog)
1110 RoundMBB->addLiveIn(FinalReg);
1111 BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
1112 .addReg(FinalReg)
1113 .addImm(PageMask);
1114 BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB);
1115
1116 // LimitReg now holds the current stack limit, RoundedReg page-rounded
1117 // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
1118 // and probe until we reach RoundedReg.
1119 if (!InProlog) {
1120 BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg)
1121 .addReg(LimitReg)
1122 .addMBB(RoundMBB)
1123 .addReg(ProbeReg)
1124 .addMBB(LoopMBB);
1125 }
1126
1127 if (InProlog)
1128 LoopMBB->addLiveIn(JoinReg);
1129 addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
1130 false, -PageSize);
1131
1132 // Probe by storing a byte onto the stack.
1133 BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi))
1134 .addReg(ProbeReg)
1135 .addImm(1)
1136 .addReg(0)
1137 .addImm(0)
1138 .addReg(0)
1139 .addImm(0);
1140
1141 if (InProlog)
1142 LoopMBB->addLiveIn(RoundedReg);
1143 BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
1144 .addReg(RoundedReg)
1145 .addReg(ProbeReg);
1146 BuildMI(LoopMBB, DL, TII.get(X86::JCC_1))
1147 .addMBB(LoopMBB)
1149
1150 MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
1151
1152 // If in prolog, restore RDX and RCX.
1153 if (InProlog) {
1154 if (RCXShadowSlot) // It means we spilled RCX in the prologue.
1155 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1156 TII.get(X86::MOV64rm), X86::RCX),
1157 X86::RSP, false, RCXShadowSlot);
1158 if (RDXShadowSlot) // It means we spilled RDX in the prologue.
1159 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1160 TII.get(X86::MOV64rm), X86::RDX),
1161 X86::RSP, false, RDXShadowSlot);
1162 }
1163
1164 // Now that the probing is done, add code to continueMBB to update
1165 // the stack pointer for real.
1166 BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
1167 .addReg(X86::RSP)
1168 .addReg(SizeReg);
1169
1170 // Add the control flow edges we need.
1171 MBB.addSuccessor(ContinueMBB);
1172 MBB.addSuccessor(RoundMBB);
1173 RoundMBB->addSuccessor(LoopMBB);
1174 LoopMBB->addSuccessor(ContinueMBB);
1175 LoopMBB->addSuccessor(LoopMBB);
1176
1177 if (InProlog) {
1178 LivePhysRegs LiveRegs;
1179 computeAndAddLiveIns(LiveRegs, *ContinueMBB);
1180 }
1181
1182 // Mark all the instructions added to the prolog as frame setup.
1183 if (InProlog) {
1184 for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) {
1185 BeforeMBBI->setFlag(MachineInstr::FrameSetup);
1186 }
1187 for (MachineInstr &MI : *RoundMBB) {
1189 }
1190 for (MachineInstr &MI : *LoopMBB) {
1192 }
1193 for (MachineInstr &MI :
1194 llvm::make_range(ContinueMBB->begin(), ContinueMBBI)) {
1196 }
1197 }
1198}
1199
1200void X86FrameLowering::emitStackProbeCall(
1202 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
1203 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
1204 bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
1205
1206 // FIXME: Add indirect thunk support and remove this.
1207 if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
1208 report_fatal_error("Emitting stack probe calls on 64-bit with the large "
1209 "code model and indirect thunks not yet implemented.");
1210
1211 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
1213 "Stack probe calls will clobber live EFLAGS.");
1214
1215 unsigned CallOp;
1216 if (Is64Bit)
1217 CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
1218 else
1219 CallOp = X86::CALLpcrel32;
1220
1221 StringRef Symbol = STI.getTargetLowering()->getStackProbeSymbolName(MF);
1222
1223 MachineInstrBuilder CI;
1224 MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI);
1225
1226 // All current stack probes take AX and SP as input, clobber flags, and
1227 // preserve all registers. x86_64 probes leave RSP unmodified.
1229 // For the large code model, we have to call through a register. Use R11,
1230 // as it is scratch in all supported calling conventions.
1231 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
1233 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
1234 } else {
1235 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp))
1237 }
1238
1239 unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX;
1240 unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP;
1246
1247 MachineInstr *ModInst = CI;
1248 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1249 // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
1250 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
1251 // themselves. They also does not clobber %rax so we can reuse it when
1252 // adjusting %rsp.
1253 // All other platforms do not specify a particular ABI for the stack probe
1254 // function, so we arbitrarily define it to not adjust %esp/%rsp itself.
1255 ModInst =
1257 .addReg(SP)
1258 .addReg(AX);
1259 }
1260
1261 // DebugInfo variable locations -- if there's an instruction number for the
1262 // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that
1263 // modifies SP.
1264 if (InstrNum) {
1265 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1266 // Label destination operand of the subtract.
1267 MF.makeDebugValueSubstitution(*InstrNum,
1268 {ModInst->getDebugInstrNum(), 0});
1269 } else {
1270 // Label the call. The operand number is the penultimate operand, zero
1271 // based.
1272 unsigned SPDefOperand = ModInst->getNumOperands() - 2;
1274 *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand});
1275 }
1276 }
1277
1278 if (InProlog) {
1279 // Apply the frame setup flag to all inserted instrs.
1280 for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
1281 ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
1282 }
1283}
1284
1285static unsigned calculateSetFPREG(uint64_t SPAdjust) {
1286 // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
1287 // and might require smaller successive adjustments.
1288 const uint64_t Win64MaxSEHOffset = 128;
1289 uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
1290 // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
1291 return SEHFrameOffset & -16;
1292}
1293
1294// If we're forcing a stack realignment we can't rely on just the frame
1295// info, we need to know the ABI stack alignment as well in case we
1296// have a call out. Otherwise just make sure we have some alignment - we'll
1297// go with the minimum SlotSize.
1298uint64_t
1299X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
1300 const MachineFrameInfo &MFI = MF.getFrameInfo();
1301 Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment.
1302 Align StackAlign = getStackAlign();
1303 bool HasRealign = MF.getFunction().hasFnAttribute("stackrealign");
1304 if (HasRealign) {
1305 if (MFI.hasCalls())
1306 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
1307 else if (MaxAlign < SlotSize)
1308 MaxAlign = Align(SlotSize);
1309 }
1310
1312 if (HasRealign)
1313 MaxAlign = (MaxAlign > 16) ? MaxAlign : Align(16);
1314 else
1315 MaxAlign = Align(16);
1316 }
1317 return MaxAlign.value();
1318}
1319
1320void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
1322 const DebugLoc &DL, Register Reg,
1323 uint64_t MaxAlign) const {
1324 uint64_t Val = -MaxAlign;
1325 unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
1326
1327 MachineFunction &MF = *MBB.getParent();
1328 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
1329 const X86TargetLowering &TLI = *STI.getTargetLowering();
1330 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
1331 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
1332
1333 // We want to make sure that (in worst case) less than StackProbeSize bytes
1334 // are not probed after the AND. This assumption is used in
1335 // emitStackProbeInlineGeneric.
1336 if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
1337 {
1338 NumFrameLoopProbe++;
1339 MachineBasicBlock *entryMBB =
1341 MachineBasicBlock *headMBB =
1343 MachineBasicBlock *bodyMBB =
1345 MachineBasicBlock *footMBB =
1347
1349 MF.insert(MBBIter, entryMBB);
1350 MF.insert(MBBIter, headMBB);
1351 MF.insert(MBBIter, bodyMBB);
1352 MF.insert(MBBIter, footMBB);
1353 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
1354 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
1355 : Is64Bit ? X86::R11D
1356 : X86::EAX;
1357
1358 // Setup entry block
1359 {
1360
1361 entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI);
1362 BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
1365 MachineInstr *MI =
1366 BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed)
1367 .addReg(FinalStackProbed)
1368 .addImm(Val)
1370
1371 // The EFLAGS implicit def is dead.
1372 MI->getOperand(3).setIsDead();
1373
1374 BuildMI(entryMBB, DL,
1375 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1376 .addReg(FinalStackProbed)
1379 BuildMI(entryMBB, DL, TII.get(X86::JCC_1))
1380 .addMBB(&MBB)
1383 entryMBB->addSuccessor(headMBB);
1384 entryMBB->addSuccessor(&MBB);
1385 }
1386
1387 // Loop entry block
1388
1389 {
1390 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1391 BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
1393 .addImm(StackProbeSize)
1395
1396 BuildMI(headMBB, DL,
1397 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1399 .addReg(FinalStackProbed)
1401
1402 // jump to the footer if StackPtr < FinalStackProbed
1403 BuildMI(headMBB, DL, TII.get(X86::JCC_1))
1404 .addMBB(footMBB)
1407
1408 headMBB->addSuccessor(bodyMBB);
1409 headMBB->addSuccessor(footMBB);
1410 }
1411
1412 // setup loop body
1413 {
1414 addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc))
1416 StackPtr, false, 0)
1417 .addImm(0)
1419
1420 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1421 BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
1423 .addImm(StackProbeSize)
1425
1426 // cmp with stack pointer bound
1427 BuildMI(bodyMBB, DL,
1428 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1429 .addReg(FinalStackProbed)
1432
1433 // jump back while FinalStackProbed < StackPtr
1434 BuildMI(bodyMBB, DL, TII.get(X86::JCC_1))
1435 .addMBB(bodyMBB)
1438 bodyMBB->addSuccessor(bodyMBB);
1439 bodyMBB->addSuccessor(footMBB);
1440 }
1441
1442 // setup loop footer
1443 {
1444 BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr)
1445 .addReg(FinalStackProbed)
1447 addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc))
1449 StackPtr, false, 0)
1450 .addImm(0)
1452 footMBB->addSuccessor(&MBB);
1453 }
1454
1455 fullyRecomputeLiveIns({footMBB, bodyMBB, headMBB, &MBB});
1456 }
1457 } else {
1458 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
1459 .addReg(Reg)
1460 .addImm(Val)
1462
1463 // The EFLAGS implicit def is dead.
1464 MI->getOperand(3).setIsDead();
1465 }
1466}
1467
1469 // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
1470 // clobbered by any interrupt handler.
1471 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1472 "MF used frame lowering for wrong subtarget");
1473 const Function &Fn = MF.getFunction();
1474 const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
1475 return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone);
1476}
1477
1478/// Return true if we need to use the restricted Windows x64 prologue and
1479/// epilogue code patterns that can be described with WinCFI (.seh_*
1480/// directives).
1481bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const {
1482 return MF.getTarget().getMCAsmInfo().usesWindowsCFI();
1483}
1484
1485bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
1486 return !isWin64Prologue(MF) && MF.needsFrameMoves();
1487}
1488
1489/// Return true if an opcode is part of the REP group of instructions
1490static bool isOpcodeRep(unsigned Opcode) {
1491 switch (Opcode) {
1492 case X86::REPNE_PREFIX:
1493 case X86::REP_MOVSB_32:
1494 case X86::REP_MOVSB_64:
1495 case X86::REP_MOVSD_32:
1496 case X86::REP_MOVSD_64:
1497 case X86::REP_MOVSQ_32:
1498 case X86::REP_MOVSQ_64:
1499 case X86::REP_MOVSW_32:
1500 case X86::REP_MOVSW_64:
1501 case X86::REP_PREFIX:
1502 case X86::REP_STOSB_32:
1503 case X86::REP_STOSB_64:
1504 case X86::REP_STOSD_32:
1505 case X86::REP_STOSD_64:
1506 case X86::REP_STOSQ_32:
1507 case X86::REP_STOSQ_64:
1508 case X86::REP_STOSW_32:
1509 case X86::REP_STOSW_64:
1510 return true;
1511 default:
1512 break;
1513 }
1514 return false;
1515}
1516
1517/// emitPrologue - Push callee-saved registers onto the stack, which
1518/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
1519/// space for local variables. Also emit labels used by the exception handler to
1520/// generate the exception handling frames.
1521
1522/*
1523 Here's a gist of what gets emitted:
1524
1525 ; Establish frame pointer, if needed
1526 [if needs FP]
1527 push %rbp
1528 .cfi_def_cfa_offset 16
1529 .cfi_offset %rbp, -16
1530 .seh_pushreg %rpb
1531 mov %rsp, %rbp
1532 .cfi_def_cfa_register %rbp
1533
1534 ; Spill general-purpose registers
1535 [for all callee-saved GPRs]
1536 pushq %<reg>
1537 [if not needs FP]
1538 .cfi_def_cfa_offset (offset from RETADDR)
1539 .seh_pushreg %<reg>
1540
1541 ; If the required stack alignment > default stack alignment
1542 ; rsp needs to be re-aligned. This creates a "re-alignment gap"
1543 ; of unknown size in the stack frame.
1544 [if stack needs re-alignment]
1545 and $MASK, %rsp
1546
1547 ; Allocate space for locals
1548 [if target is Windows and allocated space > 4096 bytes]
1549 ; Windows needs special care for allocations larger
1550 ; than one page.
1551 mov $NNN, %rax
1552 call ___chkstk_ms/___chkstk
1553 sub %rax, %rsp
1554 [else]
1555 sub $NNN, %rsp
1556
1557 [if needs FP]
1558 .seh_stackalloc (size of XMM spill slots)
1559 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
1560 [else]
1561 .seh_stackalloc NNN
1562
1563 ; Spill XMMs
1564 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
1565 ; they may get spilled on any platform, if the current function
1566 ; calls @llvm.eh.unwind.init
1567 [if needs FP]
1568 [for all callee-saved XMM registers]
1569 movaps %<xmm reg>, -MMM(%rbp)
1570 [for all callee-saved XMM registers]
1571 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
1572 ; i.e. the offset relative to (%rbp - SEHFrameOffset)
1573 [else]
1574 [for all callee-saved XMM registers]
1575 movaps %<xmm reg>, KKK(%rsp)
1576 [for all callee-saved XMM registers]
1577 .seh_savexmm %<xmm reg>, KKK
1578
1579 .seh_endprologue
1580
1581 [if needs base pointer]
1582 mov %rsp, %rbx
1583 [if needs to restore base pointer]
1584 mov %rsp, -MMM(%rbp)
1585
1586 ; Emit CFI info
1587 [if needs FP]
1588 [for all callee-saved registers]
1589 .cfi_offset %<reg>, (offset from %rbp)
1590 [else]
1591 .cfi_def_cfa_offset (offset from RETADDR)
1592 [for all callee-saved registers]
1593 .cfi_offset %<reg>, (offset from %rsp)
1594
1595 Notes:
1596 - .seh directives are emitted only for Windows 64 ABI
1597 - .cv_fpo directives are emitted on win32 when emitting CodeView
1598 - .cfi directives are emitted for all other ABIs
1599 - for 32-bit code, substitute %e?? registers for %r??
1600*/
1601
1603 MachineBasicBlock &MBB) const {
1604 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1605 "MF used frame lowering for wrong subtarget");
1607 MachineFrameInfo &MFI = MF.getFrameInfo();
1608 const Function &Fn = MF.getFunction();
1610 uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
1611 uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate.
1612 bool IsFunclet = MBB.isEHFuncletEntry();
1614 if (Fn.hasPersonalityFn())
1615 Personality = classifyEHPersonality(Fn.getPersonalityFn());
1616 bool FnHasClrFunclet =
1617 MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
1618 bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
1619 bool HasFP = hasFP(MF);
1620 bool IsWin64Prologue = isWin64Prologue(MF);
1621 bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry();
1622 // FIXME: Emit FPO data for EH funclets.
1623 bool NeedsWinFPO = !IsFunclet && STI.isTargetWin32() &&
1625 bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO;
1626 bool NeedsDwarfCFI = needsDwarfCFI(MF);
1627 bool IsWin64UnwindV3 =
1628 NeedsWin64CFI &&
1630 Register FramePtr = TRI->getFrameRegister(MF);
1631 const Register MachineFramePtr =
1632 STI.isTarget64BitILP32() ? Register(getX86SubSuperRegister(FramePtr, 64))
1633 : FramePtr;
1634 Register BasePtr = TRI->getBaseRegister();
1635 bool HasWinCFI = false;
1636
1637 // Helpers to emit Windows x64 unwind SEH pseudos with the correct placement.
1638 // V1/V2: pseudo goes after the real instruction.
1639 // V3: pseudo goes before the real instruction.
1640 // Usage:
1641 // EmitSEHBefore([&]{ BuildMI(...SEH_PushReg...); });
1642 // BuildMI(... real instruction ...);
1643 // EmitSEHAfter([&]{ BuildMI(...SEH_PushReg...); });
1644 auto EmitSEHBefore = [&](auto EmitFn) {
1645 if (NeedsWinCFI && IsWin64UnwindV3) {
1646 HasWinCFI = true;
1647 EmitFn();
1648 }
1649 };
1650 auto EmitSEHAfter = [&](auto EmitFn) {
1651 if (NeedsWinCFI && !IsWin64UnwindV3) {
1652 HasWinCFI = true;
1653 EmitFn();
1654 }
1655 };
1656
1657 // Debug location must be unknown since the first debug location is used
1658 // to determine the end of the prologue.
1659 DebugLoc DL;
1660 Register ArgBaseReg;
1661
1662 // Emit extra prolog for argument stack slot reference.
1663 if (auto *MI = X86FI->getStackPtrSaveMI()) {
1664 // MI is lea instruction that created in X86ArgumentStackSlotPass.
1665 // Creat extra prolog for stack realignment.
1666 ArgBaseReg = MI->getOperand(0).getReg();
1667 // leal 4(%esp), %basereg
1668 // .cfi_def_cfa %basereg, 0
1669 // andl $-128, %esp
1670 // pushl -4(%basereg)
1671 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::LEA64r : X86::LEA32r),
1672 ArgBaseReg)
1674 .addImm(1)
1675 .addUse(X86::NoRegister)
1677 .addUse(X86::NoRegister)
1679 if (NeedsDwarfCFI) {
1680 // .cfi_def_cfa %basereg, 0
1681 unsigned DwarfStackPtr = TRI->getDwarfRegNum(ArgBaseReg, true);
1682 BuildCFI(MBB, MBBI, DL,
1683 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, 0),
1685 }
1686 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1687 int64_t Offset = -(int64_t)SlotSize;
1688 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm))
1689 .addReg(ArgBaseReg)
1690 .addImm(1)
1691 .addReg(X86::NoRegister)
1692 .addImm(Offset)
1693 .addReg(X86::NoRegister)
1695 }
1696
1697 // Space reserved for stack-based arguments when making a (ABI-guaranteed)
1698 // tail call.
1699 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
1700 if (TailCallArgReserveSize && IsWin64Prologue)
1701 report_fatal_error("Can't handle guaranteed tail call under win64 yet");
1702
1703 const bool EmitStackProbeCall =
1704 STI.getTargetLowering()->hasStackProbeSymbol(MF);
1705 unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
1706
1707 if (HasFP && X86FI->hasSwiftAsyncContext()) {
1710 if (STI.swiftAsyncContextIsDynamicallySet()) {
1711 // The special symbol below is absolute and has a *value* suitable to be
1712 // combined with the frame pointer directly.
1713 BuildMI(MBB, MBBI, DL, TII.get(X86::OR64rm), MachineFramePtr)
1714 .addUse(MachineFramePtr)
1715 .addUse(X86::RIP)
1716 .addImm(1)
1717 .addUse(X86::NoRegister)
1718 .addExternalSymbol("swift_async_extendedFramePointerFlags",
1720 .addUse(X86::NoRegister);
1721 break;
1722 }
1723 [[fallthrough]];
1724
1726 assert(
1727 !IsWin64Prologue &&
1728 "win64 prologue does not set the bit 60 in the saved frame pointer");
1729 BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr)
1730 .addUse(MachineFramePtr)
1731 .addImm(60)
1733 break;
1734
1736 break;
1737 }
1738 }
1739
1740 // Re-align the stack on 64-bit if the x86-interrupt calling convention is
1741 // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
1742 // stack alignment.
1744 Fn.arg_size() == 2) {
1745 StackSize += 8;
1746 MFI.setStackSize(StackSize);
1747
1748 // Update the stack pointer by pushing a register. This is the instruction
1749 // emitted that would be end up being emitted by a call to `emitSPUpdate`.
1750 // Hard-coding the update to a push avoids emitting a second
1751 // `STACKALLOC_W_PROBING` instruction in the save block: We know that stack
1752 // probing isn't needed anyways for an 8-byte update.
1753 // Pushing a register leaves us in a similar situation to a regular
1754 // function call where we know that the address at (rsp-8) is writeable.
1755 // That way we avoid any off-by-ones with stack probing for additional
1756 // stack pointer updates later on.
1757 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1758 .addReg(X86::RAX, RegState::Undef)
1760 }
1761
1762 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
1763 // function, and use up to 128 bytes of stack space, don't have a frame
1764 // pointer, calls, or dynamic alloca then we do not need to adjust the
1765 // stack pointer (we fit in the Red Zone). We also check that we don't
1766 // push and pop from the stack.
1767 if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) &&
1768 !MFI.hasVarSizedObjects() && // No dynamic alloca.
1769 !MFI.adjustsStack() && // No calls.
1770 !EmitStackProbeCall && // No stack probes.
1771 !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
1772 !MF.shouldSplitStack()) { // Regular stack
1773 uint64_t MinSize =
1775 if (HasFP)
1776 MinSize += SlotSize;
1777 X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
1778 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
1779 MFI.setStackSize(StackSize);
1780 }
1781
1782 // Insert stack pointer adjustment for later moving of return addr. Only
1783 // applies to tail call optimized functions where the callee argument stack
1784 // size is bigger than the callers.
1785 if (TailCallArgReserveSize != 0) {
1786 BuildStackAdjustment(MBB, MBBI, DL, -(int)TailCallArgReserveSize,
1787 /*InEpilogue=*/false)
1788 .setMIFlag(MachineInstr::FrameSetup);
1789 }
1790
1791 // Mapping for machine moves:
1792 //
1793 // DST: VirtualFP AND
1794 // SRC: VirtualFP => DW_CFA_def_cfa_offset
1795 // ELSE => DW_CFA_def_cfa
1796 //
1797 // SRC: VirtualFP AND
1798 // DST: Register => DW_CFA_def_cfa_register
1799 //
1800 // ELSE
1801 // OFFSET < 0 => DW_CFA_offset_extended_sf
1802 // REG < 64 => DW_CFA_offset + Reg
1803 // ELSE => DW_CFA_offset_extended
1804
1805 uint64_t NumBytes = 0;
1806 int stackGrowth = -SlotSize;
1807
1808 // Find the funclet establisher parameter
1809 MCRegister Establisher;
1810 if (IsClrFunclet)
1811 Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX;
1812 else if (IsFunclet)
1813 Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX;
1814
1815 if (IsWin64Prologue && IsFunclet && !IsClrFunclet) {
1816 // Immediately spill establisher into the home slot.
1817 // The runtime cares about this.
1818 // MOV64mr %rdx, 16(%rsp)
1819 unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
1820 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16)
1821 .addReg(Establisher)
1823 MBB.addLiveIn(Establisher);
1824 }
1825
1826 if (HasFP) {
1827 assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved");
1828
1829 // Calculate required stack adjustment.
1830 uint64_t FrameSize = StackSize - SlotSize;
1831 NumBytes =
1832 FrameSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1833
1834 // Callee-saved registers are pushed on stack before the stack is realigned.
1835 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
1836 NumBytes = alignTo(NumBytes, MaxAlign);
1837
1838 // Save EBP/RBP into the appropriate stack slot.
1839 auto EmitSEHPushFramePtr = [&]() {
1840 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1843 };
1844 EmitSEHBefore(EmitSEHPushFramePtr);
1845 BuildMI(MBB, MBBI, DL,
1847 .addReg(MachineFramePtr, RegState::Kill)
1849 EmitSEHAfter(EmitSEHPushFramePtr);
1850
1851 if (NeedsDwarfCFI && !ArgBaseReg.isValid()) {
1852 // Mark the place where EBP/RBP was saved.
1853 // Define the current CFA rule to use the provided offset.
1854 assert(StackSize);
1855 BuildCFI(MBB, MBBI, DL,
1857 nullptr, -2 * stackGrowth + (int)TailCallArgReserveSize),
1859
1860 // Change the rule for the FramePtr to be an "offset" rule.
1861 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1862 BuildCFI(MBB, MBBI, DL,
1863 MCCFIInstruction::createOffset(nullptr, DwarfFramePtr,
1864 2 * stackGrowth -
1865 (int)TailCallArgReserveSize),
1867 }
1868
1869 if (!IsFunclet) {
1870 if (X86FI->hasSwiftAsyncContext()) {
1871 assert(!IsWin64Prologue &&
1872 "win64 prologue does not store async context right below rbp");
1873 const auto &Attrs = MF.getFunction().getAttributes();
1874
1875 // Before we update the live frame pointer we have to ensure there's a
1876 // valid (or null) asynchronous context in its slot just before FP in
1877 // the frame record, so store it now.
1878 auto EmitSEHPushR14 = [&]() {
1879 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1880 .addImm(X86::R14)
1882 };
1883 EmitSEHBefore(EmitSEHPushR14);
1884 if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) {
1885 // We have an initial context in r14, store it just before the frame
1886 // pointer.
1887 MBB.addLiveIn(X86::R14);
1888 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1889 .addReg(X86::R14)
1891 } else {
1892 // No initial context, store null so that there's no pointer that
1893 // could be misused.
1894 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i32))
1895 .addImm(0)
1897 }
1898 EmitSEHAfter(EmitSEHPushR14);
1899
1900 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr)
1901 .addUse(X86::RSP)
1902 .addImm(1)
1903 .addUse(X86::NoRegister)
1904 .addImm(8)
1905 .addUse(X86::NoRegister)
1907 BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri32), X86::RSP)
1908 .addUse(X86::RSP)
1909 .addImm(8)
1911 }
1912
1913 if (!IsWin64Prologue && !IsFunclet) {
1914 // Update EBP with the new base value.
1915 if (!X86FI->hasSwiftAsyncContext())
1916 BuildMI(MBB, MBBI, DL,
1917 TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
1918 FramePtr)
1921
1922 if (NeedsDwarfCFI) {
1923 if (ArgBaseReg.isValid()) {
1924 SmallString<64> CfaExpr;
1925 CfaExpr.push_back(dwarf::DW_CFA_expression);
1926 uint8_t buffer[16];
1927 unsigned DwarfReg = TRI->getDwarfRegNum(MachineFramePtr, true);
1928 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
1929 CfaExpr.push_back(2);
1930 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
1931 CfaExpr.push_back(0);
1932 // DW_CFA_expression: reg5 DW_OP_breg5 +0
1933 BuildCFI(MBB, MBBI, DL,
1934 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
1936 } else {
1937 // Mark effective beginning of when frame pointer becomes valid.
1938 // Define the current CFA to use the EBP/RBP register.
1939 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1940 BuildCFI(
1941 MBB, MBBI, DL,
1942 MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr),
1944 }
1945 }
1946
1947 if (NeedsWinFPO) {
1948 // .cv_fpo_setframe $FramePtr
1949 // NeedsWinFPO is Win32 only, so we're never using Unwind v3, hence it
1950 // is always inserted afterwards.
1951 assert(!IsWin64UnwindV3);
1952 HasWinCFI = true;
1953 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
1955 .addImm(0)
1957 }
1958 }
1959 }
1960 } else {
1961 assert(!IsFunclet && "funclets without FPs not yet implemented");
1962 NumBytes =
1963 StackSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1964 }
1965
1966 // Update the offset adjustment, which is mainly used by codeview to translate
1967 // from ESP to VFRAME relative local variable offsets.
1968 if (!IsFunclet) {
1969 if (HasFP && TRI->hasStackRealignment(MF))
1970 MFI.setOffsetAdjustment(-NumBytes);
1971 else
1972 MFI.setOffsetAdjustment(-StackSize);
1973 }
1974
1975 // For EH funclets, only allocate enough space for outgoing calls. Save the
1976 // NumBytes value that we would've used for the parent frame.
1977 unsigned ParentFrameNumBytes = NumBytes;
1978 if (IsFunclet)
1979 NumBytes = getWinEHFuncletFrameSize(MF);
1980
1981 // Skip the callee-saved push instructions.
1982 bool PushedRegs = false;
1983 int StackOffset = 2 * stackGrowth;
1985 auto IsCSPush = [&](const MachineBasicBlock::iterator &MBBI) {
1986 if (MBBI == MBB.end() || !MBBI->getFlag(MachineInstr::FrameSetup))
1987 return false;
1988 unsigned Opc = MBBI->getOpcode();
1989 return Opc == X86::PUSH32r || Opc == X86::PUSH64r || Opc == X86::PUSHP64r ||
1990 Opc == X86::PUSH2 || Opc == X86::PUSH2P;
1991 };
1992
1993 while (IsCSPush(MBBI)) {
1994 PushedRegs = true;
1995 Register Reg = MBBI->getOperand(0).getReg();
1996 LastCSPush = MBBI;
1997 unsigned Opc = LastCSPush->getOpcode();
1998 bool IsPush2 = Opc == X86::PUSH2 || Opc == X86::PUSH2P;
1999
2000 // V3: emit SEH pseudo before the real instruction.
2001 EmitSEHBefore([&]() {
2002 if (IsPush2) {
2003 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Push2Regs))
2004 .addImm(Reg)
2005 .addImm(LastCSPush->getOperand(1).getReg())
2007 } else {
2008 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
2009 .addImm(Reg)
2011 }
2012 });
2013 ++MBBI;
2014
2015 if (!HasFP && NeedsDwarfCFI) {
2016 // Mark callee-saved push instruction.
2017 // Define the current CFA rule to use the provided offset.
2018 assert(StackSize);
2019 // Compared to push, push2 introduces more stack offset (one more
2020 // register).
2021 if (IsPush2)
2022 StackOffset += stackGrowth;
2023 BuildCFI(MBB, MBBI, DL,
2026 StackOffset += stackGrowth;
2027 }
2028
2029 // V1/V2: emit SEH pseudo after the real instruction.
2030 EmitSEHAfter([&]() {
2031 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
2032 .addImm(Reg)
2034 if (IsPush2)
2035 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
2036 .addImm(LastCSPush->getOperand(1).getReg())
2038 });
2039 }
2040
2041 // Realign stack after we pushed callee-saved registers (so that we'll be
2042 // able to calculate their offsets from the frame pointer).
2043 // Don't do this for Win64, it needs to realign the stack after the prologue.
2044 if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) &&
2045 !ArgBaseReg.isValid()) {
2046 assert(HasFP && "There should be a frame pointer if stack is realigned.");
2047 auto EmitSEHStackAlign = [&]() {
2048 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign))
2049 .addImm(MaxAlign)
2051 };
2052 EmitSEHBefore(EmitSEHStackAlign);
2053 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
2054 EmitSEHAfter(EmitSEHStackAlign);
2055 }
2056
2057 // If there is an SUB32ri of ESP immediately before this instruction, merge
2058 // the two. This can be the case when tail call elimination is enabled and
2059 // the callee has more arguments than the caller.
2060 NumBytes = mergeSPUpdates(
2061 MBB, MBBI, [NumBytes](int64_t Offset) { return NumBytes - Offset; },
2062 true);
2063
2064 // Adjust stack pointer: ESP -= numbytes.
2065
2066 // Windows and cygwin/mingw require a prologue helper routine when allocating
2067 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
2068 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
2069 // stack and adjust the stack pointer in one go. The 64-bit version of
2070 // __chkstk is only responsible for probing the stack. The 64-bit prologue is
2071 // responsible for adjusting the stack pointer. Touching the stack at 4K
2072 // increments is necessary to ensure that the guard pages used by the OS
2073 // virtual memory manager are allocated in correct sequence.
2074 uint64_t AlignedNumBytes = NumBytes;
2075 if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF))
2076 AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
2077
2078 auto EmitSEHStackAlloc = [&]() {
2079 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
2080 .addImm(NumBytes)
2082 };
2083 if (NumBytes)
2084 EmitSEHBefore(EmitSEHStackAlloc);
2085
2086 if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) {
2087 assert(!X86FI->getUsesRedZone() &&
2088 "The Red Zone is not accounted for in stack probes");
2089
2090 // Check whether EAX is livein for this block.
2091 bool isEAXAlive = isEAXLiveIn(MBB);
2092
2093 if (isEAXAlive) {
2094 if (Is64Bit) {
2095 // Save RAX
2096 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
2097 .addReg(X86::RAX, RegState::Kill)
2099 } else {
2100 // Save EAX
2101 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
2102 .addReg(X86::EAX, RegState::Kill)
2104 }
2105 }
2106
2107 if (Is64Bit) {
2108 // Handle the 64-bit Windows ABI case where we need to call __chkstk.
2109 // Function prologue is responsible for adjusting the stack pointer.
2110 int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes;
2112 X86::RAX)
2113 .addImm(Alloc)
2115 } else {
2116 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
2117 // We'll also use 4 already allocated bytes for EAX.
2118 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
2119 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
2121 }
2122
2123 // Call __chkstk, __chkstk_ms, or __alloca.
2124 emitStackProbe(MF, MBB, MBBI, DL, true);
2125
2126 if (isEAXAlive) {
2127 // Restore RAX/EAX
2129 if (Is64Bit)
2130 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX),
2131 StackPtr, false, NumBytes - 8);
2132 else
2133 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX),
2134 StackPtr, false, NumBytes - 4);
2135 MI->setFlag(MachineInstr::FrameSetup);
2136 MBB.insert(MBBI, MI);
2137 }
2138 } else if (NumBytes) {
2139 emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false);
2140 }
2141
2142 if (NumBytes)
2143 EmitSEHAfter(EmitSEHStackAlloc);
2144
2145 int SEHFrameOffset = 0;
2146 Register SPOrEstablisher;
2147 if (IsFunclet) {
2148 if (IsClrFunclet) {
2149 // The establisher parameter passed to a CLR funclet is actually a pointer
2150 // to the (mostly empty) frame of its nearest enclosing funclet; we have
2151 // to find the root function establisher frame by loading the PSPSym from
2152 // the intermediate frame.
2153 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2154 MachinePointerInfo NoInfo;
2155 MBB.addLiveIn(Establisher);
2156 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher),
2157 Establisher, false, PSPSlotOffset)
2160 ;
2161 // Save the root establisher back into the current funclet's (mostly
2162 // empty) frame, in case a sub-funclet or the GC needs it.
2163 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr,
2164 false, PSPSlotOffset)
2165 .addReg(Establisher)
2167 NoInfo,
2170 }
2171 SPOrEstablisher = Establisher;
2172 } else {
2173 SPOrEstablisher = StackPtr;
2174 }
2175
2176 if (IsWin64Prologue && HasFP) {
2177 // Set RBP to a small fixed offset from RSP. In the funclet case, we base
2178 // this calculation on the incoming establisher, which holds the value of
2179 // RSP from the parent frame at the end of the prologue.
2180 SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
2181
2182 // If this is not a funclet, emit the CFI describing our frame pointer.
2183 if (NeedsWinCFI && !IsFunclet) {
2184 assert(!NeedsWinFPO && "this setframe incompatible with FPO data");
2185 HasWinCFI = true;
2186 if (isAsynchronousEHPersonality(Personality) || MF.hasEHFunclets()) {
2187 if (TRI->hasBasePointer(MF))
2190 else
2191 MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset;
2192 }
2193 }
2194
2195 auto EmitSEHSetFrame = [&]() {
2196 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
2198 .addImm(SEHFrameOffset)
2200 };
2201
2202 if (!IsFunclet)
2203 EmitSEHBefore(EmitSEHSetFrame);
2204
2205 if (SEHFrameOffset)
2206 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
2207 SPOrEstablisher, false, SEHFrameOffset);
2208 else
2209 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr)
2210 .addReg(SPOrEstablisher);
2211
2212 if (!IsFunclet)
2213 EmitSEHAfter(EmitSEHSetFrame);
2214 } else if (IsFunclet && STI.is32Bit()) {
2215 // Reset EBP / ESI to something good for funclets.
2217 // If we're a catch funclet, we can be returned to via catchret. Save ESP
2218 // into the registration node so that the runtime will restore it for us.
2219 if (!MBB.isCleanupFuncletEntry()) {
2220 assert(Personality == EHPersonality::MSVC_CXX);
2221 Register FrameReg;
2223 int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed();
2224 // ESP is the first field, so no extra displacement is needed.
2225 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg,
2226 false, EHRegOffset)
2227 .addReg(X86::ESP);
2228 }
2229 }
2230
2231 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
2232 const MachineInstr &FrameInstr = *MBBI;
2233
2234 if (NeedsWinCFI) {
2235 int FI;
2236 if (Register Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
2237 if (X86::FR64RegClass.contains(Reg)) {
2238 int Offset;
2239 Register IgnoredFrameReg;
2240 if (IsWin64Prologue && IsFunclet)
2241 Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg);
2242 else
2243 Offset =
2244 getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() +
2245 SEHFrameOffset;
2246
2247 assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
2248 auto EmitSEHSaveXMM = [&]() {
2249 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
2250 .addImm(Reg)
2251 .addImm(Offset)
2253 };
2254 EmitSEHBefore(EmitSEHSaveXMM);
2255 ++MBBI;
2256 EmitSEHAfter(EmitSEHSaveXMM);
2257 continue;
2258 }
2259 }
2260 }
2261 ++MBBI;
2262 }
2263
2264 if (NeedsWinCFI && HasWinCFI) {
2265 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
2267 }
2268
2269 if (FnHasClrFunclet && !IsFunclet) {
2270 // Save the so-called Initial-SP (i.e. the value of the stack pointer
2271 // immediately after the prolog) into the PSPSlot so that funclets
2272 // and the GC can recover it.
2273 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2274 auto PSPInfo = MachinePointerInfo::getFixedStack(
2276 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false,
2277 PSPSlotOffset)
2282 }
2283
2284 // Realign stack after we spilled callee-saved registers (so that we'll be
2285 // able to calculate their offsets from the frame pointer).
2286 // Win64 requires aligning the stack after the prologue.
2287 if (IsWin64Prologue && TRI->hasStackRealignment(MF)) {
2288 assert(HasFP && "There should be a frame pointer if stack is realigned.");
2289 BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign);
2290 }
2291
2292 // We already dealt with stack realignment and funclets above.
2293 if (IsFunclet && STI.is32Bit())
2294 return;
2295
2296 // If we need a base pointer, set it up here. It's whatever the value
2297 // of the stack pointer is at this point. Any variable size objects
2298 // will be allocated after this, so we can still use the base pointer
2299 // to reference locals.
2300 if (TRI->hasBasePointer(MF)) {
2301 // Update the base pointer with the current stack pointer.
2302 unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
2303 BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
2304 .addReg(SPOrEstablisher)
2306 if (X86FI->getRestoreBasePointer()) {
2307 // Stash value of base pointer. Saving RSP instead of EBP shortens
2308 // dependence chain. Used by SjLj EH.
2309 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2310 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), FramePtr, true,
2312 .addReg(SPOrEstablisher)
2314 }
2315
2316 if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
2317 // Stash the value of the frame pointer relative to the base pointer for
2318 // Win32 EH. This supports Win32 EH, which does the inverse of the above:
2319 // it recovers the frame pointer from the base pointer rather than the
2320 // other way around.
2321 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2322 Register UsedReg;
2323 int Offset =
2324 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
2325 .getFixed();
2326 assert(UsedReg == BasePtr);
2327 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
2330 }
2331 }
2332 if (ArgBaseReg.isValid()) {
2333 // Save argument base pointer.
2334 auto *MI = X86FI->getStackPtrSaveMI();
2335 int FI = MI->getOperand(1).getIndex();
2336 unsigned MOVmr = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
2337 // movl %basereg, offset(%ebp)
2338 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), FI)
2339 .addReg(ArgBaseReg)
2341 }
2342
2343 if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
2344 // Mark end of stack pointer adjustment.
2345 if (!HasFP && NumBytes) {
2346 // Define the current CFA rule to use the provided offset.
2347 assert(StackSize);
2348 BuildCFI(
2349 MBB, MBBI, DL,
2350 MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth),
2352 }
2353
2354 // Emit DWARF info specifying the offsets of the callee-saved registers.
2356 }
2357
2358 // X86 Interrupt handling function cannot assume anything about the direction
2359 // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
2360 // in each prologue of interrupt handler function.
2361 //
2362 // Create "cld" instruction only in these cases:
2363 // 1. The interrupt handling function uses any of the "rep" instructions.
2364 // 2. Interrupt handling function calls another function.
2365 // 3. If there are any inline asm blocks, as we do not know what they do
2366 //
2367 // TODO: We should also emit cld if we detect the use of std, but as of now,
2368 // the compiler does not even emit that instruction or even define it, so in
2369 // practice, this would only happen with inline asm, which we cover anyway.
2371 bool NeedsCLD = false;
2372
2373 for (const MachineBasicBlock &B : MF) {
2374 for (const MachineInstr &MI : B) {
2375 if (MI.isCall()) {
2376 NeedsCLD = true;
2377 break;
2378 }
2379
2380 if (isOpcodeRep(MI.getOpcode())) {
2381 NeedsCLD = true;
2382 break;
2383 }
2384
2385 if (MI.isInlineAsm()) {
2386 // TODO: Parse asm for rep instructions or call sites?
2387 // For now, let's play it safe and emit a cld instruction
2388 // just in case.
2389 NeedsCLD = true;
2390 break;
2391 }
2392 }
2393 }
2394
2395 if (NeedsCLD) {
2396 BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
2398 }
2399 }
2400
2401 // At this point we know if the function has WinCFI or not.
2402 MF.setHasWinCFI(HasWinCFI);
2403}
2404
2406 const MachineFunction &MF) const {
2407 // We can't use LEA instructions for adjusting the stack pointer if we don't
2408 // have a frame pointer in the Win64 ABI. Only ADD instructions may be used
2409 // to deallocate the stack.
2410 // This means that we can use LEA for SP in two situations:
2411 // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
2412 // 2. We *have* a frame pointer which means we are permitted to use LEA.
2413 return !MF.getTarget().getMCAsmInfo().usesWindowsCFI() || hasFP(MF);
2414}
2415
2417 switch (MI.getOpcode()) {
2418 case X86::CATCHRET:
2419 case X86::CLEANUPRET:
2420 return true;
2421 default:
2422 return false;
2423 }
2424 llvm_unreachable("impossible");
2425}
2426
2427// CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
2428// stack. It holds a pointer to the bottom of the root function frame. The
2429// establisher frame pointer passed to a nested funclet may point to the
2430// (mostly empty) frame of its parent funclet, but it will need to find
2431// the frame of the root function to access locals. To facilitate this,
2432// every funclet copies the pointer to the bottom of the root function
2433// frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
2434// same offset for the PSPSym in the root function frame that's used in the
2435// funclets' frames allows each funclet to dynamically accept any ancestor
2436// frame as its establisher argument (the runtime doesn't guarantee the
2437// immediate parent for some reason lost to history), and also allows the GC,
2438// which uses the PSPSym for some bookkeeping, to find it in any funclet's
2439// frame with only a single offset reported for the entire method.
2440unsigned
2441X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
2442 const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
2444 int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg,
2445 /*IgnoreSPUpdates*/ true)
2446 .getFixed();
2447 assert(Offset >= 0 && SPReg == TRI->getStackRegister());
2448 return static_cast<unsigned>(Offset);
2449}
2450
2451unsigned
2452X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
2453 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
2454 // This is the size of the pushed CSRs.
2455 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2456 // This is the size of callee saved XMMs.
2457 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2458 unsigned XMMSize =
2459 WinEHXMMSlotInfo.size() * TRI->getSpillSize(X86::VR128RegClass);
2460 // This is the amount of stack a funclet needs to allocate.
2461 unsigned UsedSize;
2462 EHPersonality Personality =
2464 if (Personality == EHPersonality::CoreCLR) {
2465 // CLR funclets need to hold enough space to include the PSPSym, at the
2466 // same offset from the stack pointer (immediately after the prolog) as it
2467 // resides at in the main function.
2468 UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
2469 } else {
2470 // Other funclets just need enough stack for outgoing call arguments.
2471 UsedSize = MF.getFrameInfo().getMaxCallFrameSize();
2472 }
2473 // RBP is not included in the callee saved register block. After pushing RBP,
2474 // everything is 16 byte aligned. Everything we allocate before an outgoing
2475 // call must also be 16 byte aligned.
2476 unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlign());
2477 // Subtract out the size of the callee saved registers. This is how much stack
2478 // each funclet will allocate.
2479 return FrameSizeMinusRBP + XMMSize - CSSize;
2480}
2481
2482static bool isTailCallOpcode(unsigned Opc) {
2483 return Opc == X86::TCRETURNri || Opc == X86::TCRETURN_WIN64ri ||
2484 Opc == X86::TCRETURN_HIPE32ri || Opc == X86::TCRETURNdi ||
2485 Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 ||
2486 Opc == X86::TCRETURNri64_ImpCall || Opc == X86::TCRETURNdi64 ||
2487 Opc == X86::TCRETURNmi64 || Opc == X86::TCRETURN_WINmi64;
2488}
2489
2491 MachineBasicBlock &MBB) const {
2492 const MachineFrameInfo &MFI = MF.getFrameInfo();
2494 MachineBasicBlock::iterator Terminator = MBB.getFirstTerminator();
2495 MachineBasicBlock::iterator MBBI = Terminator;
2496 DebugLoc DL;
2497 if (MBBI != MBB.end())
2498 DL = MBBI->getDebugLoc();
2499 // standard x86_64 uses 64-bit frame/stack pointers, x32 - 32-bit.
2500 const bool Is64BitILP32 = STI.isTarget64BitILP32();
2501 Register FramePtr = TRI->getFrameRegister(MF);
2502 Register MachineFramePtr =
2503 Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
2504
2505 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo().usesWindowsCFI();
2506 bool NeedsWin64CFI =
2507 IsWin64Prologue && MF.getFunction().needsUnwindTableEntry();
2508 // For V3 unwind, epilog SEH pseudos are emitted inline before each
2509 // unwind-effecting instruction.
2510 bool IsWin64UnwindV3 =
2511 NeedsWin64CFI && MF.hasWinCFI() &&
2514 bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI);
2515
2516 // Get the number of bytes to allocate from the FrameInfo.
2517 uint64_t StackSize = MFI.getStackSize();
2518 uint64_t MaxAlign = calculateMaxStackAlign(MF);
2519 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2520 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
2521 bool HasFP = hasFP(MF);
2522 uint64_t NumBytes = 0;
2523
2524 bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() &&
2526 !MF.getTarget().getTargetTriple().isUEFI()) &&
2527 MF.needsFrameMoves();
2528
2529 Register ArgBaseReg;
2530 if (auto *MI = X86FI->getStackPtrSaveMI()) {
2531 unsigned Opc = X86::LEA32r;
2532 Register StackReg = X86::ESP;
2533 ArgBaseReg = MI->getOperand(0).getReg();
2534 if (STI.is64Bit()) {
2535 Opc = X86::LEA64r;
2536 StackReg = X86::RSP;
2537 }
2538 // leal -4(%basereg), %esp
2539 // .cfi_def_cfa %esp, 4
2540 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackReg)
2541 .addUse(ArgBaseReg)
2542 .addImm(1)
2543 .addUse(X86::NoRegister)
2544 .addImm(-(int64_t)SlotSize)
2545 .addUse(X86::NoRegister)
2547 if (NeedsDwarfCFI) {
2548 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackReg, true);
2549 BuildCFI(MBB, MBBI, DL,
2550 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2552 --MBBI;
2553 }
2554 --MBBI;
2555 }
2556
2557 if (IsFunclet) {
2558 assert(HasFP && "EH funclets without FP not yet implemented");
2559 NumBytes = getWinEHFuncletFrameSize(MF);
2560 } else if (HasFP) {
2561 // Calculate required stack adjustment.
2562 uint64_t FrameSize = StackSize - SlotSize;
2563 NumBytes = FrameSize - CSSize - TailCallArgReserveSize;
2564
2565 // Callee-saved registers were pushed on stack before the stack was
2566 // realigned.
2567 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
2568 NumBytes = alignTo(FrameSize, MaxAlign);
2569 } else {
2570 NumBytes = StackSize - CSSize - TailCallArgReserveSize;
2571 }
2572 uint64_t SEHStackAllocAmt = NumBytes;
2573
2574 unsigned SEHFrameOffset = 0;
2575 if (IsWin64Prologue && HasFP)
2576 SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
2577
2578 // AfterPop is the position to insert .cfi_restore.
2580 if (HasFP) {
2581 if (X86FI->hasSwiftAsyncContext()) {
2582 // Discard the context.
2583 int64_t Offset = mergeSPAdd(MBB, MBBI, 16, true);
2584 emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/ true);
2585 }
2586 // Pop EBP.
2587 if (IsWin64UnwindV3)
2588 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
2591 BuildMI(MBB, MBBI, DL,
2593 MachineFramePtr)
2595
2596 // We need to reset FP to its untagged state on return. Bit 60 is currently
2597 // used to show the presence of an extended frame.
2598 if (X86FI->hasSwiftAsyncContext()) {
2599 BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8), MachineFramePtr)
2600 .addUse(MachineFramePtr)
2601 .addImm(60)
2603 }
2604
2605 if (NeedsDwarfCFI) {
2606 if (!ArgBaseReg.isValid()) {
2607 unsigned DwarfStackPtr =
2608 TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
2609 BuildCFI(MBB, MBBI, DL,
2610 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2612 }
2613 if (!MBB.succ_empty() && !MBB.isReturnBlock()) {
2614 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
2615 BuildCFI(MBB, AfterPop, DL,
2616 MCCFIInstruction::createRestore(nullptr, DwarfFramePtr),
2618 --MBBI;
2619 --AfterPop;
2620 }
2621 --MBBI;
2622 }
2623 }
2624
2625 MachineBasicBlock::iterator FirstCSPop = MBBI;
2626 // Skip the callee-saved pop instructions.
2627 while (MBBI != MBB.begin()) {
2628 MachineBasicBlock::iterator PI = std::prev(MBBI);
2629 unsigned Opc = PI->getOpcode();
2630
2631 if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
2632 if (!PI->getFlag(MachineInstr::FrameDestroy) ||
2633 (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::BTR64ri8 &&
2634 Opc != X86::ADD64ri32 && Opc != X86::POPP64r && Opc != X86::POP2 &&
2635 Opc != X86::POP2P && Opc != X86::LEA64r && Opc != X86::SEH_PushReg &&
2636 Opc != X86::SEH_Push2Regs && Opc != X86::SEH_StackAlloc &&
2637 Opc != X86::ADD64ri32_NF))
2638 break;
2639 FirstCSPop = PI;
2640 }
2641
2642 --MBBI;
2643 }
2644 if (ArgBaseReg.isValid()) {
2645 // Restore argument base pointer.
2646 auto *MI = X86FI->getStackPtrSaveMI();
2647 int FI = MI->getOperand(1).getIndex();
2648 unsigned MOVrm = Is64Bit ? X86::MOV64rm : X86::MOV32rm;
2649 // movl offset(%ebp), %basereg
2650 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVrm), ArgBaseReg), FI)
2652 }
2653 MBBI = FirstCSPop;
2654
2655 if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET)
2656 emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator);
2657
2658 if (MBBI != MBB.end())
2659 DL = MBBI->getDebugLoc();
2660 // If there is an ADD32ri or SUB32ri of ESP immediately before this
2661 // instruction, merge the two instructions.
2662 if (NumBytes || MFI.hasVarSizedObjects())
2663 NumBytes = mergeSPAdd(MBB, MBBI, NumBytes, true);
2664
2665 if (IsWin64UnwindV3 && NeedsWin64CFI && MF.hasWinCFI()) {
2666 // Find the XMM restores that were tagged with FrameDestroy, now that we
2667 // know the offset we can emit the SEH pseudos for them.
2668 auto EpilogStart = MBBI;
2669 {
2670 auto ScanIt = MBBI;
2671 while (ScanIt != MBB.begin()) {
2672 auto PI = std::prev(ScanIt);
2673 int FI;
2674 if (PI->getFlag(MachineInstr::FrameDestroy) &&
2675 TII.isLoadFromStackSlot(*PI, FI)) {
2676 Register Reg = PI->getOperand(0).getReg();
2677 if (X86::FR64RegClass.contains(Reg)) {
2678 Register IgnoredFrameReg;
2679 int Offset =
2680 getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() +
2681 SEHFrameOffset;
2682 BuildMI(MBB, PI, DL, TII.get(X86::SEH_SaveXMM))
2683 .addImm(Reg)
2684 .addImm(Offset)
2686 // std::prev(PI) is the SEH_SaveXMM we just inserted (before PI).
2687 // We start ScanIt from that point so that the next
2688 // std::prev(ScanIt) will examine the instruction before the pseudo,
2689 // i.e. the next potential XMM restore further up the block.
2690 EpilogStart = std::prev(PI);
2691 ScanIt = EpilogStart;
2692 continue;
2693 }
2694 }
2695 break;
2696 }
2697 }
2698
2699 // For V3, SEH_BeginEpilogue must be emitted before any epilog SEH pseudos.
2700 BuildMI(MBB, EpilogStart, DL, TII.get(X86::SEH_BeginEpilogue));
2701 }
2702
2703 // If dynamic alloca is used, then reset esp to point to the last callee-saved
2704 // slot before popping them off! Same applies for the case, when stack was
2705 // realigned. Don't do this if this was a funclet epilogue, since the funclets
2706 // will not do realignment or dynamic stack allocation.
2707 if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) &&
2708 !IsFunclet) {
2709 if (TRI->hasStackRealignment(MF))
2710 MBBI = FirstCSPop;
2711 uint64_t LEAAmount =
2712 IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
2713
2714 if (X86FI->hasSwiftAsyncContext())
2715 LEAAmount -= 16;
2716
2717 // There are only two legal forms of epilogue:
2718 // - add SEHAllocationSize, %rsp
2719 // - lea SEHAllocationSize(%FramePtr), %rsp
2720 //
2721 // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
2722 // However, we may use this sequence if we have a frame pointer because the
2723 // effects of the prologue can safely be undone.
2724 if (IsWin64UnwindV3) {
2725 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
2727 .addImm(SEHFrameOffset)
2729 if (SEHStackAllocAmt)
2730 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
2731 .addImm(SEHStackAllocAmt)
2733 }
2734 if (LEAAmount != 0) {
2737 false, LEAAmount);
2738 --MBBI;
2739 } else {
2740 unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
2742 --MBBI;
2743 }
2744 } else if (NumBytes) {
2745 // Adjust stack pointer back: ESP += numbytes.
2746 if (IsWin64UnwindV3)
2747 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
2748 .addImm(NumBytes)
2750 emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
2751 if (!HasFP && NeedsDwarfCFI) {
2752 // Define the current CFA rule to use the provided offset.
2753 BuildCFI(MBB, MBBI, DL,
2755 nullptr, CSSize + TailCallArgReserveSize + SlotSize),
2757 }
2758 --MBBI;
2759 }
2760
2761 // For V1/V2, emit SEH_BeginEpilogue after stack restore code.
2762 if (!IsWin64UnwindV3 && NeedsWin64CFI && MF.hasWinCFI())
2763 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_BeginEpilogue));
2764
2765 if (!HasFP && NeedsDwarfCFI) {
2766 MBBI = FirstCSPop;
2767 int64_t Offset = -(int64_t)CSSize - SlotSize;
2768 // Mark callee-saved pop instruction.
2769 // Define the current CFA rule to use the provided offset.
2770 while (MBBI != MBB.end()) {
2772 unsigned Opc = PI->getOpcode();
2773 ++MBBI;
2774 if (Opc == X86::POP32r || Opc == X86::POP64r || Opc == X86::POPP64r ||
2775 Opc == X86::POP2 || Opc == X86::POP2P) {
2776 Offset += SlotSize;
2777 // Compared to pop, pop2 introduces more stack offset (one more
2778 // register).
2779 if (Opc == X86::POP2 || Opc == X86::POP2P)
2780 Offset += SlotSize;
2781 BuildCFI(MBB, MBBI, DL,
2784 }
2785 }
2786 }
2787
2788 // Emit DWARF info specifying the restores of the callee-saved registers.
2789 // For epilogue with return inside or being other block without successor,
2790 // no need to generate .cfi_restore for callee-saved registers.
2791 if (NeedsDwarfCFI && !MBB.succ_empty())
2792 emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false);
2793
2794 if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
2795 // Add the return addr area delta back since we are not tail calling.
2796 int64_t Delta = X86FI->getTCReturnAddrDelta();
2797 assert(Delta <= 0 && "TCDelta should never be positive");
2798 if (Delta) {
2799 // Check for possible merge with preceding ADD instruction.
2800 int64_t Offset = mergeSPAdd(MBB, Terminator, -Delta, true);
2801 emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true);
2802 }
2803 }
2804
2805 // Emit tilerelease for AMX kernel.
2807 BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
2808
2809 if (NeedsWin64CFI && MF.hasWinCFI())
2810 BuildMI(MBB, Terminator, DL, TII.get(X86::SEH_EndEpilogue));
2811}
2812
2814 int FI,
2815 Register &FrameReg) const {
2816 const MachineFrameInfo &MFI = MF.getFrameInfo();
2817
2818 bool IsFixed = MFI.isFixedObjectIndex(FI);
2819 // We can't calculate offset from frame pointer if the stack is realigned,
2820 // so enforce usage of stack/base pointer. The base pointer is used when we
2821 // have dynamic allocas in addition to dynamic realignment.
2822 if (TRI->hasBasePointer(MF))
2823 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister();
2824 else if (TRI->hasStackRealignment(MF))
2825 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister();
2826 else
2827 FrameReg = TRI->getFrameRegister(MF);
2828
2829 // Offset will hold the offset from the stack pointer at function entry to the
2830 // object.
2831 // We need to factor in additional offsets applied during the prologue to the
2832 // frame, base, and stack pointer depending on which is used.
2833 int64_t Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea();
2835 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2836 uint64_t StackSize = MFI.getStackSize();
2837 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo().usesWindowsCFI();
2838 int64_t FPDelta = 0;
2839
2840 // In an x86 interrupt, remove the offset we added to account for the return
2841 // address from any stack object allocated in the caller's frame. Interrupts
2842 // do not have a standard return address. Fixed objects in the current frame,
2843 // such as SSE register spills, should not get this treatment.
2845 Offset >= 0) {
2847 }
2848
2849 if (IsWin64Prologue) {
2850 assert(!MFI.hasCalls() || (StackSize % 16) == 8);
2851
2852 // Calculate required stack adjustment.
2853 uint64_t FrameSize = StackSize - SlotSize;
2854 // If required, include space for extra hidden slot for stashing base
2855 // pointer.
2856 if (X86FI->getRestoreBasePointer())
2857 FrameSize += SlotSize;
2858 uint64_t NumBytes = FrameSize - CSSize;
2859
2860 uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
2861 if (FI && FI == X86FI->getFAIndex())
2862 return StackOffset::getFixed(-SEHFrameOffset);
2863
2864 // FPDelta is the offset from the "traditional" FP location of the old base
2865 // pointer followed by return address and the location required by the
2866 // restricted Win64 prologue.
2867 // Add FPDelta to all offsets below that go through the frame pointer.
2868 FPDelta = FrameSize - SEHFrameOffset;
2869 assert((!MFI.hasCalls() || (FPDelta % 16) == 0) &&
2870 "FPDelta isn't aligned per the Win64 ABI!");
2871 }
2872
2873 if (FrameReg == TRI->getFramePtr()) {
2874 // Skip saved EBP/RBP
2875 Offset += SlotSize;
2876
2877 // Account for restricted Windows prologue.
2878 Offset += FPDelta;
2879
2880 // Skip the RETADDR move area
2881 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2882 if (TailCallReturnAddrDelta < 0)
2883 Offset -= TailCallReturnAddrDelta;
2884
2886 }
2887
2888 // FrameReg is either the stack pointer or a base pointer. But the base is
2889 // located at the end of the statically known StackSize so the distinction
2890 // doesn't really matter.
2891 if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF))
2892 assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
2893 return StackOffset::getFixed(Offset + StackSize);
2894}
2895
2897 Register &FrameReg) const {
2898 const MachineFrameInfo &MFI = MF.getFrameInfo();
2900 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2901 const auto it = WinEHXMMSlotInfo.find(FI);
2902
2903 if (it == WinEHXMMSlotInfo.end())
2904 return getFrameIndexReference(MF, FI, FrameReg).getFixed();
2905
2906 FrameReg = TRI->getStackRegister();
2907 return alignDown(MFI.getMaxCallFrameSize(), getStackAlign().value()) +
2908 it->second;
2909}
2910
2913 Register &FrameReg,
2914 int Adjustment) const {
2915 const MachineFrameInfo &MFI = MF.getFrameInfo();
2916 FrameReg = TRI->getStackRegister();
2917 return StackOffset::getFixed(MFI.getObjectOffset(FI) -
2918 getOffsetOfLocalArea() + Adjustment);
2919}
2920
2923 int FI, Register &FrameReg,
2924 bool IgnoreSPUpdates) const {
2925
2926 const MachineFrameInfo &MFI = MF.getFrameInfo();
2927 // Does not include any dynamic realign.
2928 const uint64_t StackSize = MFI.getStackSize();
2929 // LLVM arranges the stack as follows:
2930 // ...
2931 // ARG2
2932 // ARG1
2933 // RETADDR
2934 // PUSH RBP <-- RBP points here
2935 // PUSH CSRs
2936 // ~~~~~~~ <-- possible stack realignment (non-win64)
2937 // ...
2938 // STACK OBJECTS
2939 // ... <-- RSP after prologue points here
2940 // ~~~~~~~ <-- possible stack realignment (win64)
2941 //
2942 // if (hasVarSizedObjects()):
2943 // ... <-- "base pointer" (ESI/RBX) points here
2944 // DYNAMIC ALLOCAS
2945 // ... <-- RSP points here
2946 //
2947 // Case 1: In the simple case of no stack realignment and no dynamic
2948 // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
2949 // with fixed offsets from RSP.
2950 //
2951 // Case 2: In the case of stack realignment with no dynamic allocas, fixed
2952 // stack objects are addressed with RBP and regular stack objects with RSP.
2953 //
2954 // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
2955 // to address stack arguments for outgoing calls and nothing else. The "base
2956 // pointer" points to local variables, and RBP points to fixed objects.
2957 //
2958 // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
2959 // answer we give is relative to the SP after the prologue, and not the
2960 // SP in the middle of the function.
2961
2962 if (MFI.isFixedObjectIndex(FI) && TRI->hasStackRealignment(MF) &&
2963 !STI.isTargetWin64())
2964 return getFrameIndexReference(MF, FI, FrameReg);
2965
2966 // If !hasReservedCallFrame the function might have SP adjustement in the
2967 // body. So, even though the offset is statically known, it depends on where
2968 // we are in the function.
2969 if (!IgnoreSPUpdates && !hasReservedCallFrame(MF))
2970 return getFrameIndexReference(MF, FI, FrameReg);
2971
2972 // We don't handle tail calls, and shouldn't be seeing them either.
2974 "we don't handle this case!");
2975
2976 // This is how the math works out:
2977 //
2978 // %rsp grows (i.e. gets lower) left to right. Each box below is
2979 // one word (eight bytes). Obj0 is the stack slot we're trying to
2980 // get to.
2981 //
2982 // ----------------------------------
2983 // | BP | Obj0 | Obj1 | ... | ObjN |
2984 // ----------------------------------
2985 // ^ ^ ^ ^
2986 // A B C E
2987 //
2988 // A is the incoming stack pointer.
2989 // (B - A) is the local area offset (-8 for x86-64) [1]
2990 // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2]
2991 //
2992 // |(E - B)| is the StackSize (absolute value, positive). For a
2993 // stack that grown down, this works out to be (B - E). [3]
2994 //
2995 // E is also the value of %rsp after stack has been set up, and we
2996 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now
2997 // (C - E) == (C - A) - (B - A) + (B - E)
2998 // { Using [1], [2] and [3] above }
2999 // == getObjectOffset - LocalAreaOffset + StackSize
3000
3001 return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize);
3002}
3003
3006 std::vector<CalleeSavedInfo> &CSI) const {
3007 MachineFrameInfo &MFI = MF.getFrameInfo();
3009
3010 unsigned CalleeSavedFrameSize = 0;
3011 unsigned XMMCalleeSavedFrameSize = 0;
3012 auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
3013 int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
3014
3015 int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
3016
3017 if (TailCallReturnAddrDelta < 0) {
3018 // create RETURNADDR area
3019 // arg
3020 // arg
3021 // RETADDR
3022 // { ...
3023 // RETADDR area
3024 // ...
3025 // }
3026 // [EBP]
3027 MFI.CreateFixedObject(-TailCallReturnAddrDelta,
3028 TailCallReturnAddrDelta - SlotSize, true);
3029 }
3030
3031 // Spill the BasePtr if it's used.
3032 if (this->TRI->hasBasePointer(MF)) {
3033 // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
3034 if (MF.hasEHFunclets()) {
3036 X86FI->setHasSEHFramePtrSave(true);
3037 X86FI->setSEHFramePtrSaveIndex(FI);
3038 }
3039 }
3040
3041 bool IsFPRemovedFromCSI = false;
3042 if (hasFP(MF)) {
3043 // emitPrologue always spills frame register the first thing.
3044 SpillSlotOffset -= SlotSize;
3045 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
3046
3047 // The async context lives directly before the frame pointer, and we
3048 // allocate a second slot to preserve stack alignment.
3049 if (X86FI->hasSwiftAsyncContext()) {
3050 SpillSlotOffset -= SlotSize;
3051 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
3052 SpillSlotOffset -= SlotSize;
3053 }
3054
3055 // Since emitPrologue and emitEpilogue will handle spilling and restoring of
3056 // the frame register, we can delete it from CSI list and not have to worry
3057 // about avoiding it later.
3058 Register FPReg = TRI->getFrameRegister(MF);
3059 for (unsigned i = 0; i < CSI.size(); ++i) {
3060 if (TRI->regsOverlap(CSI[i].getReg(), FPReg)) {
3061 CSI.erase(CSI.begin() + i);
3062 IsFPRemovedFromCSI = true;
3063 break;
3064 }
3065 }
3066 }
3067
3068 // Strategy:
3069 // 1. Use push2 when
3070 // a) number of CSR > 1 if no need padding
3071 // b) number of CSR > 2 if need padding
3072 // c) stack alignment >= 16 bytes
3073 // 2. When the number of CSR push is odd
3074 // a. Start to use push2 from the 1st push if stack is 16B aligned.
3075 // b. Start to use push2 from the 2nd push if stack is not 16B aligned.
3076 // 3. When the number of CSR push is even, start to use push2 from the 1st
3077 // push and make the stack 16B aligned before the push
3078 unsigned NumRegsForPush2 = 0;
3079 if (STI.hasPush2Pop2() && getStackAlignment() >= 16) {
3080 unsigned NumCSGPR = llvm::count_if(CSI, [](const CalleeSavedInfo &I) {
3081 return X86::GR64RegClass.contains(I.getReg());
3082 });
3083 bool UsePush2Pop2 = !IsFPRemovedFromCSI ? NumCSGPR > 2 : NumCSGPR > 1;
3084 NumRegsForPush2 =
3085 UsePush2Pop2
3086 ? alignDown(IsFPRemovedFromCSI ? NumCSGPR : NumCSGPR - 1, 2)
3087 : 0;
3088 }
3089
3090 // Assign slots for GPRs. It increases frame size.
3091 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
3092 MCRegister Reg = I.getReg();
3093
3094 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3095 continue;
3096
3097 // A CSR is a candidate for push2/pop2 when it's slot offset is 16B aligned
3098 // or only an odd number of registers in the candidates.
3099 if (X86FI->getNumCandidatesForPush2Pop2() < NumRegsForPush2 &&
3100 (SpillSlotOffset % 16 == 0 ||
3101 X86FI->getNumCandidatesForPush2Pop2() % 2))
3102 X86FI->addCandidateForPush2Pop2(Reg);
3103
3104 SpillSlotOffset -= SlotSize;
3105 CalleeSavedFrameSize += SlotSize;
3106
3107 int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
3108 I.setFrameIdx(SlotIndex);
3109 }
3110
3111 // Adjust the offset of spill slot as we know the accurate callee saved frame
3112 // size.
3113 if (X86FI->getRestoreBasePointer()) {
3114 SpillSlotOffset -= SlotSize;
3115 CalleeSavedFrameSize += SlotSize;
3116
3117 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
3118 // TODO: saving the slot index is better?
3119 X86FI->setRestoreBasePointer(CalleeSavedFrameSize);
3120 }
3121 assert(X86FI->getNumCandidatesForPush2Pop2() % 2 == 0 &&
3122 "Expect even candidates for push2/pop2");
3123 if (X86FI->getNumCandidatesForPush2Pop2())
3124 ++NumFunctionUsingPush2Pop2;
3125 X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
3126 MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
3127
3128 // Assign slots for XMMs.
3129 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
3130 MCRegister Reg = I.getReg();
3131 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3132 continue;
3133
3135 unsigned Size = TRI->getSpillSize(*RC);
3136 Align Alignment = TRI->getSpillAlign(*RC);
3137 // ensure alignment
3138 assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86");
3139 SpillSlotOffset = -alignTo(-SpillSlotOffset, Alignment);
3140
3141 // spill into slot
3142 SpillSlotOffset -= Size;
3143 int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
3144 I.setFrameIdx(SlotIndex);
3145 MFI.ensureMaxAlignment(Alignment);
3146
3147 // Save the start offset and size of XMM in stack frame for funclets.
3148 if (X86::VR128RegClass.contains(Reg)) {
3149 WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize;
3150 XMMCalleeSavedFrameSize += Size;
3151 }
3152 }
3153
3154 return true;
3155}
3156
3160 DebugLoc DL = MBB.findDebugLoc(MI);
3161
3162 // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
3163 // for us, and there are no XMM CSRs on Win32.
3164 if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows())
3165 return true;
3166
3167 // Push GPRs. It increases frame size.
3168 const MachineFunction &MF = *MBB.getParent();
3170
3171 // Update LiveIn of the basic block and decide whether we can add a kill flag
3172 // to the use.
3173 auto UpdateLiveInCheckCanKill = [&](Register Reg) {
3174 const MachineRegisterInfo &MRI = MF.getRegInfo();
3175 // Do not set a kill flag on values that are also marked as live-in. This
3176 // happens with the @llvm-returnaddress intrinsic and with arguments
3177 // passed in callee saved registers.
3178 // Omitting the kill flags is conservatively correct even if the live-in
3179 // is not used after all.
3180 if (MRI.isLiveIn(Reg))
3181 return false;
3182 MBB.addLiveIn(Reg);
3183 // Check if any subregister is live-in
3184 for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg)
3185 if (MRI.isLiveIn(*AReg))
3186 return false;
3187 return true;
3188 };
3189 auto UpdateLiveInGetKillRegState = [&](Register Reg) {
3190 return getKillRegState(UpdateLiveInCheckCanKill(Reg));
3191 };
3192
3193 for (auto RI = CSI.rbegin(), RE = CSI.rend(); RI != RE; ++RI) {
3194 MCRegister Reg = RI->getReg();
3195 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3196 continue;
3197
3198 if (X86FI->isCandidateForPush2Pop2(Reg)) {
3199 MCRegister Reg2 = (++RI)->getReg();
3201 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
3202 .addReg(Reg2, UpdateLiveInGetKillRegState(Reg2))
3204 } else {
3205 BuildMI(MBB, MI, DL, TII.get(getPUSHOpcode(STI)))
3206 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
3208 }
3209 }
3210
3211 if (X86FI->getRestoreBasePointer()) {
3212 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
3213 Register BaseReg = this->TRI->getBaseRegister();
3214 BuildMI(MBB, MI, DL, TII.get(Opc))
3215 .addReg(BaseReg, getKillRegState(true))
3217 }
3218
3219 // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
3220 // It can be done by spilling XMMs to stack frame.
3221 for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
3222 MCRegister Reg = I.getReg();
3223 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3224 continue;
3225
3226 // Add the callee-saved register as live-in. It's killed at the spill.
3227 MBB.addLiveIn(Reg);
3229
3230 TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, Register(),
3232 }
3233
3234 return true;
3235}
3236
3237void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB,
3239 MachineInstr *CatchRet) const {
3240 // SEH shouldn't use catchret.
3242 MBB.getParent()->getFunction().getPersonalityFn())) &&
3243 "SEH should not use CATCHRET");
3244 const DebugLoc &DL = CatchRet->getDebugLoc();
3245 MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB();
3246
3247 // Fill EAX/RAX with the address of the target block.
3248 if (STI.is64Bit()) {
3249 // LEA64r CatchRetTarget(%rip), %rax
3250 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX)
3251 .addReg(X86::RIP)
3252 .addImm(0)
3253 .addReg(0)
3254 .addMBB(CatchRetTarget)
3255 .addReg(0);
3256 } else {
3257 // MOV32ri $CatchRetTarget, %eax
3258 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
3259 .addMBB(CatchRetTarget);
3260 }
3261
3262 // Record that we've taken the address of CatchRetTarget and no longer just
3263 // reference it in a terminator.
3264 CatchRetTarget->setMachineBlockAddressTaken();
3265}
3266
3270 if (CSI.empty())
3271 return false;
3272
3273 if (MI != MBB.end() && isFuncletReturnInstr(*MI) && STI.isOSWindows()) {
3274 // Don't restore CSRs in 32-bit EH funclets. Matches
3275 // spillCalleeSavedRegisters.
3276 if (STI.is32Bit())
3277 return true;
3278 // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
3279 // funclets. emitEpilogue transforms these to normal jumps.
3280 if (MI->getOpcode() == X86::CATCHRET) {
3281 const Function &F = MBB.getParent()->getFunction();
3282 bool IsSEH = isAsynchronousEHPersonality(
3283 classifyEHPersonality(F.getPersonalityFn()));
3284 if (IsSEH)
3285 return true;
3286 }
3287 }
3288
3289 DebugLoc DL = MBB.findDebugLoc(MI);
3290 MachineFunction &MF = *MBB.getParent();
3292
3293 bool NeedsWin64CFI =
3294 isWin64Prologue(MF) && MF.getFunction().needsUnwindTableEntry();
3295 bool IsWin64UnwindV3 =
3296 NeedsWin64CFI && MF.getFunction().getParent()->getWinX64EHUnwindMode() ==
3298
3299 // Reload XMMs from stack frame.
3300 for (const CalleeSavedInfo &I : CSI) {
3301 MCRegister Reg = I.getReg();
3302 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3303 continue;
3304
3306 TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, Register(), 0,
3308 }
3309
3310 // Clear the stack slot for spill base pointer register.
3311 if (X86FI->getRestoreBasePointer()) {
3312 if (IsWin64UnwindV3)
3313 BuildMI(MBB, MI, DL, TII.get(X86::SEH_PushReg))
3314 .addImm(this->TRI->getBaseRegister())
3316 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
3317 Register BaseReg = this->TRI->getBaseRegister();
3318 BuildMI(MBB, MI, DL, TII.get(Opc), BaseReg)
3320 }
3321
3322 // POP GPRs.
3323 for (auto I = CSI.begin(), E = CSI.end(); I != E; ++I) {
3324 MCRegister Reg = I->getReg();
3325 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3326 continue;
3327
3328 if (X86FI->isCandidateForPush2Pop2(Reg)) {
3329 MCRegister Reg2 = (++I)->getReg();
3330 if (IsWin64UnwindV3) {
3331 BuildMI(MBB, MI, DL, TII.get(X86::SEH_Push2Regs))
3332 .addImm(Reg)
3333 .addImm(Reg2)
3335 }
3336 BuildMI(MBB, MI, DL, TII.get(getPOP2Opcode(STI)), Reg)
3337 .addReg(Reg2, RegState::Define)
3339 } else {
3340 if (IsWin64UnwindV3)
3341 BuildMI(MBB, MI, DL, TII.get(X86::SEH_PushReg))
3342 .addImm(Reg)
3344 BuildMI(MBB, MI, DL, TII.get(getPOPOpcode(STI)), Reg)
3346 }
3347 }
3348
3349 return true;
3350}
3351
3353 BitVector &SavedRegs,
3354 RegScavenger *RS) const {
3356
3357 // Spill the BasePtr if it's used.
3358 if (TRI->hasBasePointer(MF)) {
3359 Register BasePtr = TRI->getBaseRegister();
3360 if (STI.isTarget64BitILP32())
3361 BasePtr = getX86SubSuperRegister(BasePtr, 64);
3362 SavedRegs.set(BasePtr);
3363 }
3364 if (STI.hasUserReservedRegisters()) {
3365 for (int Reg = SavedRegs.find_first(); Reg != -1;
3366 Reg = SavedRegs.find_next(Reg)) {
3367 if (STI.isRegisterReservedByUser(Reg)) {
3368 SavedRegs.reset(Reg);
3369 }
3370 }
3371 }
3372}
3373
3374static bool HasNestArgument(const MachineFunction *MF) {
3375 const Function &F = MF->getFunction();
3376 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
3377 I++) {
3378 if (I->hasNestAttr() && !I->use_empty())
3379 return true;
3380 }
3381 return false;
3382}
3383
3384/// GetScratchRegister - Get a temp register for performing work in the
3385/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
3386/// and the properties of the function either one or two registers will be
3387/// needed. Set primary to true for the first register, false for the second.
3388static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64,
3389 const MachineFunction &MF, bool Primary) {
3390 CallingConv::ID CallingConvention = MF.getFunction().getCallingConv();
3391
3392 // Erlang stuff.
3393 if (CallingConvention == CallingConv::HiPE) {
3394 if (Is64Bit)
3395 return Primary ? X86::R14 : X86::R13;
3396 else
3397 return Primary ? X86::EBX : X86::EDI;
3398 }
3399
3400 if (Is64Bit) {
3401 if (IsLP64)
3402 return Primary ? X86::R11 : X86::R12;
3403 else
3404 return Primary ? X86::R11D : X86::R12D;
3405 }
3406
3407 bool IsNested = HasNestArgument(&MF);
3408
3409 if (CallingConvention == CallingConv::X86_FastCall ||
3410 CallingConvention == CallingConv::Fast ||
3411 CallingConvention == CallingConv::Tail) {
3412 if (IsNested)
3413 report_fatal_error("Segmented stacks does not support fastcall with "
3414 "nested function.");
3415 return Primary ? X86::EAX : X86::ECX;
3416 }
3417 if (IsNested)
3418 return Primary ? X86::EDX : X86::EAX;
3419 return Primary ? X86::ECX : X86::EAX;
3420}
3421
3422// The stack limit in the TCB is set to this many bytes above the actual stack
3423// limit.
3425
3427 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3428 MachineFrameInfo &MFI = MF.getFrameInfo();
3429 uint64_t StackSize;
3430 unsigned TlsReg, TlsOffset;
3431 DebugLoc DL;
3432
3433 // To support shrink-wrapping we would need to insert the new blocks
3434 // at the right place and update the branches to PrologueMBB.
3435 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3436
3437 unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3438 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3439 "Scratch register is live-in");
3440
3441 if (MF.getFunction().isVarArg())
3442 report_fatal_error("Segmented stacks do not support vararg functions.");
3443 if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
3444 !STI.isTargetWin64() && !STI.isTargetFreeBSD() &&
3445 !STI.isTargetDragonFly())
3446 report_fatal_error("Segmented stacks not supported on this platform.");
3447
3448 // Eventually StackSize will be calculated by a link-time pass; which will
3449 // also decide whether checking code needs to be injected into this particular
3450 // prologue.
3451 StackSize = MFI.getStackSize();
3452
3453 if (!MFI.needsSplitStackProlog())
3454 return;
3455
3459 bool IsNested = false;
3460
3461 // We need to know if the function has a nest argument only in 64 bit mode.
3462 if (Is64Bit)
3463 IsNested = HasNestArgument(&MF);
3464
3465 // The MOV R10, RAX needs to be in a different block, since the RET we emit in
3466 // allocMBB needs to be last (terminating) instruction.
3467
3468 for (const auto &LI : PrologueMBB.liveins()) {
3469 allocMBB->addLiveIn(LI);
3470 checkMBB->addLiveIn(LI);
3471 }
3472
3473 if (IsNested)
3474 allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
3475
3476 MF.push_front(allocMBB);
3477 MF.push_front(checkMBB);
3478
3479 // When the frame size is less than 256 we just compare the stack
3480 // boundary directly to the value of the stack pointer, per gcc.
3481 bool CompareStackPointer = StackSize < kSplitStackAvailable;
3482
3483 // Read the limit off the current stacklet off the stack_guard location.
3484 if (Is64Bit) {
3485 if (STI.isTargetLinux()) {
3486 TlsReg = X86::FS;
3487 TlsOffset = IsLP64 ? 0x70 : 0x40;
3488 } else if (STI.isTargetDarwin()) {
3489 TlsReg = X86::GS;
3490 TlsOffset = 0x60 + 90 * 8; // See pthread_machdep.h. Steal TLS slot 90.
3491 } else if (STI.isTargetWin64()) {
3492 TlsReg = X86::GS;
3493 TlsOffset = 0x28; // pvArbitrary, reserved for application use
3494 } else if (STI.isTargetFreeBSD()) {
3495 TlsReg = X86::FS;
3496 TlsOffset = 0x18;
3497 } else if (STI.isTargetDragonFly()) {
3498 TlsReg = X86::FS;
3499 TlsOffset = 0x20; // use tls_tcb.tcb_segstack
3500 } else {
3501 report_fatal_error("Segmented stacks not supported on this platform.");
3502 }
3503
3504 if (CompareStackPointer)
3505 ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
3506 else
3507 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r),
3508 ScratchReg)
3509 .addReg(X86::RSP)
3510 .addImm(1)
3511 .addReg(0)
3512 .addImm(-StackSize)
3513 .addReg(0);
3514
3515 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm))
3516 .addReg(ScratchReg)
3517 .addReg(0)
3518 .addImm(1)
3519 .addReg(0)
3520 .addImm(TlsOffset)
3521 .addReg(TlsReg);
3522 } else {
3523 if (STI.isTargetLinux()) {
3524 TlsReg = X86::GS;
3525 TlsOffset = 0x30;
3526 } else if (STI.isTargetDarwin()) {
3527 TlsReg = X86::GS;
3528 TlsOffset = 0x48 + 90 * 4;
3529 } else if (STI.isTargetWin32()) {
3530 TlsReg = X86::FS;
3531 TlsOffset = 0x14; // pvArbitrary, reserved for application use
3532 } else if (STI.isTargetDragonFly()) {
3533 TlsReg = X86::FS;
3534 TlsOffset = 0x10; // use tls_tcb.tcb_segstack
3535 } else if (STI.isTargetFreeBSD()) {
3536 report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
3537 } else {
3538 report_fatal_error("Segmented stacks not supported on this platform.");
3539 }
3540
3541 if (CompareStackPointer)
3542 ScratchReg = X86::ESP;
3543 else
3544 BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg)
3545 .addReg(X86::ESP)
3546 .addImm(1)
3547 .addReg(0)
3548 .addImm(-StackSize)
3549 .addReg(0);
3550
3551 if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64() ||
3552 STI.isTargetDragonFly()) {
3553 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3554 .addReg(ScratchReg)
3555 .addReg(0)
3556 .addImm(0)
3557 .addReg(0)
3558 .addImm(TlsOffset)
3559 .addReg(TlsReg);
3560 } else if (STI.isTargetDarwin()) {
3561
3562 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
3563 unsigned ScratchReg2;
3564 bool SaveScratch2;
3565 if (CompareStackPointer) {
3566 // The primary scratch register is available for holding the TLS offset.
3567 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3568 SaveScratch2 = false;
3569 } else {
3570 // Need to use a second register to hold the TLS offset
3571 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
3572
3573 // Unfortunately, with fastcc the second scratch register may hold an
3574 // argument.
3575 SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
3576 }
3577
3578 // If Scratch2 is live-in then it needs to be saved.
3579 assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
3580 "Scratch register is live-in and not saved");
3581
3582 if (SaveScratch2)
3583 BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
3584 .addReg(ScratchReg2, RegState::Kill);
3585
3586 BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
3587 .addImm(TlsOffset);
3588 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3589 .addReg(ScratchReg)
3590 .addReg(ScratchReg2)
3591 .addImm(1)
3592 .addReg(0)
3593 .addImm(0)
3594 .addReg(TlsReg);
3595
3596 if (SaveScratch2)
3597 BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
3598 }
3599 }
3600
3601 // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
3602 // It jumps to normal execution of the function body.
3603 BuildMI(checkMBB, DL, TII.get(X86::JCC_1))
3604 .addMBB(&PrologueMBB)
3606
3607 // On 32 bit we first push the arguments size and then the frame size. On 64
3608 // bit, we pass the stack frame size in r10 and the argument size in r11.
3609 if (Is64Bit) {
3610 // Functions with nested arguments use R10, so it needs to be saved across
3611 // the call to _morestack
3612
3613 const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
3614 const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
3615 const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
3616 const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
3617
3618 if (IsNested)
3619 BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
3620
3621 BuildMI(allocMBB, DL, TII.get(X86::getMOVriOpcode(IsLP64, StackSize)),
3622 Reg10)
3623 .addImm(StackSize);
3624 BuildMI(allocMBB, DL,
3626 Reg11)
3627 .addImm(X86FI->getArgumentStackSize());
3628 } else {
3629 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i))
3630 .addImm(X86FI->getArgumentStackSize());
3631 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i)).addImm(StackSize);
3632 }
3633
3634 // __morestack is in libgcc
3636 // Under the large code model, we cannot assume that __morestack lives
3637 // within 2^31 bytes of the call site, so we cannot use pc-relative
3638 // addressing. We cannot perform the call via a temporary register,
3639 // as the rax register may be used to store the static chain, and all
3640 // other suitable registers may be either callee-save or used for
3641 // parameter passing. We cannot use the stack at this point either
3642 // because __morestack manipulates the stack directly.
3643 //
3644 // To avoid these issues, perform an indirect call via a read-only memory
3645 // location containing the address.
3646 //
3647 // This solution is not perfect, as it assumes that the .rodata section
3648 // is laid out within 2^31 bytes of each function body, but this seems
3649 // to be sufficient for JIT.
3650 // FIXME: Add retpoline support and remove the error here..
3651 if (STI.useIndirectThunkCalls())
3652 report_fatal_error("Emitting morestack calls on 64-bit with the large "
3653 "code model and thunks not yet implemented.");
3654 BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
3655 .addReg(X86::RIP)
3656 .addImm(0)
3657 .addReg(0)
3658 .addExternalSymbol("__morestack_addr")
3659 .addReg(0);
3660 } else {
3661 if (Is64Bit)
3662 BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
3663 .addExternalSymbol("__morestack");
3664 else
3665 BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
3666 .addExternalSymbol("__morestack");
3667 }
3668
3669 if (IsNested)
3670 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
3671 else
3672 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
3673
3674 allocMBB->addSuccessor(&PrologueMBB);
3675
3676 checkMBB->addSuccessor(allocMBB, BranchProbability::getZero());
3677 checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne());
3678
3679#ifdef EXPENSIVE_CHECKS
3680 MF.verify();
3681#endif
3682}
3683
3684/// Lookup an ERTS parameter in the !hipe.literals named metadata node.
3685/// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets
3686/// to fields it needs, through a named metadata node "hipe.literals" containing
3687/// name-value pairs.
3688static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD,
3689 const StringRef LiteralName) {
3690 for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) {
3691 MDNode *Node = HiPELiteralsMD->getOperand(i);
3692 if (Node->getNumOperands() != 2)
3693 continue;
3694 MDString *NodeName = dyn_cast<MDString>(Node->getOperand(0));
3695 ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Node->getOperand(1));
3696 if (!NodeName || !NodeVal)
3697 continue;
3698 ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(NodeVal->getValue());
3699 if (ValConst && NodeName->getString() == LiteralName) {
3700 return ValConst->getZExtValue();
3701 }
3702 }
3703
3704 report_fatal_error("HiPE literal " + LiteralName +
3705 " required but not provided");
3706}
3707
3708// Return true if there are no non-ehpad successors to MBB and there are no
3709// non-meta instructions between MBBI and MBB.end().
3712 return llvm::all_of(
3713 MBB.successors(),
3714 [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) &&
3715 std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) {
3716 return MI.isMetaInstruction();
3717 });
3718}
3719
3720/// Erlang programs may need a special prologue to handle the stack size they
3721/// might need at runtime. That is because Erlang/OTP does not implement a C
3722/// stack but uses a custom implementation of hybrid stack/heap architecture.
3723/// (for more information see Eric Stenman's Ph.D. thesis:
3724/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
3725///
3726/// CheckStack:
3727/// temp0 = sp - MaxStack
3728/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3729/// OldStart:
3730/// ...
3731/// IncStack:
3732/// call inc_stack # doubles the stack space
3733/// temp0 = sp - MaxStack
3734/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3736 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3737 MachineFrameInfo &MFI = MF.getFrameInfo();
3738 DebugLoc DL;
3739
3740 // To support shrink-wrapping we would need to insert the new blocks
3741 // at the right place and update the branches to PrologueMBB.
3742 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3743
3744 // HiPE-specific values
3745 NamedMDNode *HiPELiteralsMD =
3746 MF.getFunction().getParent()->getNamedMetadata("hipe.literals");
3747 if (!HiPELiteralsMD)
3749 "Can't generate HiPE prologue without runtime parameters");
3750 const unsigned HipeLeafWords = getHiPELiteral(
3751 HiPELiteralsMD, Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS");
3752 const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
3753 const unsigned Guaranteed = HipeLeafWords * SlotSize;
3754 unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs
3755 ? MF.getFunction().arg_size() - CCRegisteredArgs
3756 : 0;
3757 unsigned MaxStack = MFI.getStackSize() + CallerStkArity * SlotSize + SlotSize;
3758
3759 assert(STI.isTargetLinux() &&
3760 "HiPE prologue is only supported on Linux operating systems.");
3761
3762 // Compute the largest caller's frame that is needed to fit the callees'
3763 // frames. This 'MaxStack' is computed from:
3764 //
3765 // a) the fixed frame size, which is the space needed for all spilled temps,
3766 // b) outgoing on-stack parameter areas, and
3767 // c) the minimum stack space this function needs to make available for the
3768 // functions it calls (a tunable ABI property).
3769 if (MFI.hasCalls()) {
3770 unsigned MoreStackForCalls = 0;
3771
3772 for (auto &MBB : MF) {
3773 for (auto &MI : MBB) {
3774 if (!MI.isCall())
3775 continue;
3776
3777 // Get callee operand.
3778 const MachineOperand &MO = MI.getOperand(0);
3779
3780 // Only take account of global function calls (no closures etc.).
3781 if (!MO.isGlobal())
3782 continue;
3783
3784 const Function *F = dyn_cast<Function>(MO.getGlobal());
3785 if (!F)
3786 continue;
3787
3788 // Do not update 'MaxStack' for primitive and built-in functions
3789 // (encoded with names either starting with "erlang."/"bif_" or not
3790 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
3791 // "_", such as the BIF "suspend_0") as they are executed on another
3792 // stack.
3793 if (F->getName().contains("erlang.") || F->getName().contains("bif_") ||
3794 F->getName().find_first_of("._") == StringRef::npos)
3795 continue;
3796
3797 unsigned CalleeStkArity = F->arg_size() > CCRegisteredArgs
3798 ? F->arg_size() - CCRegisteredArgs
3799 : 0;
3800 if (HipeLeafWords - 1 > CalleeStkArity)
3801 MoreStackForCalls =
3802 std::max(MoreStackForCalls,
3803 (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
3804 }
3805 }
3806 MaxStack += MoreStackForCalls;
3807 }
3808
3809 // If the stack frame needed is larger than the guaranteed then runtime checks
3810 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
3811 if (MaxStack > Guaranteed) {
3812 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
3813 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
3814
3815 for (const auto &LI : PrologueMBB.liveins()) {
3816 stackCheckMBB->addLiveIn(LI);
3817 incStackMBB->addLiveIn(LI);
3818 }
3819
3820 MF.push_front(incStackMBB);
3821 MF.push_front(stackCheckMBB);
3822
3823 unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
3824 unsigned LEAop, CMPop, CALLop;
3825 SPLimitOffset = getHiPELiteral(HiPELiteralsMD, "P_NSP_LIMIT");
3826 if (Is64Bit) {
3827 SPReg = X86::RSP;
3828 PReg = X86::RBP;
3829 LEAop = X86::LEA64r;
3830 CMPop = X86::CMP64rm;
3831 CALLop = X86::CALL64pcrel32;
3832 } else {
3833 SPReg = X86::ESP;
3834 PReg = X86::EBP;
3835 LEAop = X86::LEA32r;
3836 CMPop = X86::CMP32rm;
3837 CALLop = X86::CALLpcrel32;
3838 }
3839
3840 ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3841 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3842 "HiPE prologue scratch register is live-in");
3843
3844 // Create new MBB for StackCheck:
3845 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3846 false, -MaxStack);
3847 // SPLimitOffset is in a fixed heap location (pointed by BP).
3848 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3849 PReg, false, SPLimitOffset);
3850 BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1))
3851 .addMBB(&PrologueMBB)
3853
3854 // Create new MBB for IncStack:
3855 BuildMI(incStackMBB, DL, TII.get(CALLop)).addExternalSymbol("inc_stack_0");
3856 addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3857 false, -MaxStack);
3858 addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3859 PReg, false, SPLimitOffset);
3860 BuildMI(incStackMBB, DL, TII.get(X86::JCC_1))
3861 .addMBB(incStackMBB)
3863
3864 stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
3865 stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
3866 incStackMBB->addSuccessor(&PrologueMBB, {99, 100});
3867 incStackMBB->addSuccessor(incStackMBB, {1, 100});
3868 }
3869#ifdef EXPENSIVE_CHECKS
3870 MF.verify();
3871#endif
3872}
3873
3874bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
3876 const DebugLoc &DL,
3877 int Offset) const {
3878 if (Offset <= 0)
3879 return false;
3880
3881 if (Offset % SlotSize)
3882 return false;
3883
3884 int NumPops = Offset / SlotSize;
3885 // This is only worth it if we have at most 2 pops.
3886 if (NumPops != 1 && NumPops != 2)
3887 return false;
3888
3889 // Handle only the trivial case where the adjustment directly follows
3890 // a call. This is the most common one, anyway.
3891 if (MBBI == MBB.begin())
3892 return false;
3893 MachineBasicBlock::iterator Prev = std::prev(MBBI);
3894 if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
3895 return false;
3896
3897 unsigned Regs[2];
3898 unsigned FoundRegs = 0;
3899
3900 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3901 const MachineOperand &RegMask = Prev->getOperand(1);
3902
3903 auto &RegClass =
3904 Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass;
3905 // Try to find up to NumPops free registers.
3906 for (auto Candidate : RegClass) {
3907 // Poor man's liveness:
3908 // Since we're immediately after a call, any register that is clobbered
3909 // by the call and not defined by it can be considered dead.
3910 if (!RegMask.clobbersPhysReg(Candidate))
3911 continue;
3912
3913 // Don't clobber reserved registers
3914 if (MRI.isReserved(Candidate))
3915 continue;
3916
3917 bool IsDef = false;
3918 for (const MachineOperand &MO : Prev->implicit_operands()) {
3919 if (MO.isReg() && MO.isDef() &&
3920 TRI->isSuperOrSubRegisterEq(MO.getReg(), Candidate)) {
3921 IsDef = true;
3922 break;
3923 }
3924 }
3925
3926 if (IsDef)
3927 continue;
3928
3929 Regs[FoundRegs++] = Candidate;
3930 if (FoundRegs == (unsigned)NumPops)
3931 break;
3932 }
3933
3934 if (FoundRegs == 0)
3935 return false;
3936
3937 // If we found only one free register, but need two, reuse the same one twice.
3938 while (FoundRegs < (unsigned)NumPops)
3939 Regs[FoundRegs++] = Regs[0];
3940
3941 for (int i = 0; i < NumPops; ++i)
3942 BuildMI(MBB, MBBI, DL, TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r),
3943 Regs[i]);
3944
3945 return true;
3946}
3947
3951 bool reserveCallFrame = hasReservedCallFrame(MF);
3952 unsigned Opcode = I->getOpcode();
3953 bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
3954 DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased.
3955 uint64_t Amount = TII.getFrameSize(*I);
3956 uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0;
3957 I = MBB.erase(I);
3958 auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
3959
3960 // Try to avoid emitting dead SP adjustments if the block end is unreachable,
3961 // typically because the function is marked noreturn (abort, throw,
3962 // assert_fail, etc).
3963 if (isDestroy && blockEndIsUnreachable(MBB, I))
3964 return I;
3965
3966 if (!reserveCallFrame) {
3967 // If the stack pointer can be changed after prologue, turn the
3968 // adjcallstackup instruction into a 'sub ESP, <amt>' and the
3969 // adjcallstackdown instruction into 'add ESP, <amt>'
3970
3971 // We need to keep the stack aligned properly. To do this, we round the
3972 // amount of space needed for the outgoing arguments up to the next
3973 // alignment boundary.
3974 Amount = alignTo(Amount, getStackAlign());
3975
3976 const Function &F = MF.getFunction();
3977 bool WindowsCFI = MF.getTarget().getMCAsmInfo().usesWindowsCFI();
3978 bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves();
3979
3980 // If we have any exception handlers in this function, and we adjust
3981 // the SP before calls, we may need to indicate this to the unwinder
3982 // using GNU_ARGS_SIZE. Note that this may be necessary even when
3983 // Amount == 0, because the preceding function may have set a non-0
3984 // GNU_ARGS_SIZE.
3985 // TODO: We don't need to reset this between subsequent functions,
3986 // if it didn't change.
3987 bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty();
3988
3989 if (HasDwarfEHHandlers && !isDestroy &&
3991 BuildCFI(MBB, InsertPos, DL,
3992 MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
3993
3994 if (Amount == 0)
3995 return I;
3996
3997 // Factor out the amount that gets handled inside the sequence
3998 // (Pushes of argument for frame setup, callee pops for frame destroy)
3999 Amount -= InternalAmt;
4000
4001 // TODO: This is needed only if we require precise CFA.
4002 // If this is a callee-pop calling convention, emit a CFA adjust for
4003 // the amount the callee popped.
4004 if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
4005 BuildCFI(MBB, InsertPos, DL,
4006 MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
4007
4008 // Add Amount to SP to destroy a frame, or subtract to setup.
4009 int64_t StackAdjustment = isDestroy ? Amount : -Amount;
4010 int64_t CfaAdjustment = StackAdjustment;
4011
4012 if (StackAdjustment) {
4013 // Merge with any previous or following adjustment instruction. Note: the
4014 // instructions merged with here do not have CFI, so their stack
4015 // adjustments do not feed into CfaAdjustment
4016
4017 auto CalcCfaAdjust = [&CfaAdjustment](MachineBasicBlock::iterator PI,
4018 int64_t Offset) {
4019 CfaAdjustment += Offset;
4020 };
4021 auto CalcNewOffset = [&StackAdjustment](int64_t Offset) {
4022 return StackAdjustment + Offset;
4023 };
4024 StackAdjustment =
4025 mergeSPUpdates(MBB, InsertPos, CalcCfaAdjust, CalcNewOffset, true);
4026 StackAdjustment =
4027 mergeSPUpdates(MBB, InsertPos, CalcCfaAdjust, CalcNewOffset, false);
4028
4029 if (StackAdjustment) {
4030 if (!(F.hasMinSize() &&
4031 adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
4032 BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
4033 /*InEpilogue=*/false);
4034 }
4035 }
4036
4037 if (DwarfCFI && !hasFP(MF) && CfaAdjustment) {
4038 // If we don't have FP, but need to generate unwind information,
4039 // we need to set the correct CFA offset after the stack adjustment.
4040 // How much we adjust the CFA offset depends on whether we're emitting
4041 // CFI only for EH purposes or for debugging. EH only requires the CFA
4042 // offset to be correct at each call site, while for debugging we want
4043 // it to be more precise.
4044
4045 // TODO: When not using precise CFA, we also need to adjust for the
4046 // InternalAmt here.
4047 BuildCFI(
4048 MBB, InsertPos, DL,
4049 MCCFIInstruction::createAdjustCfaOffset(nullptr, -CfaAdjustment));
4050 }
4051
4052 return I;
4053 }
4054
4055 if (InternalAmt) {
4058 while (CI != B && !std::prev(CI)->isCall())
4059 --CI;
4060 BuildStackAdjustment(MBB, CI, DL, -InternalAmt, /*InEpilogue=*/false);
4061 }
4062
4063 return I;
4064}
4065
4067 assert(MBB.getParent() && "Block is not attached to a function!");
4068 const MachineFunction &MF = *MBB.getParent();
4069 if (!MBB.isLiveIn(X86::EFLAGS))
4070 return true;
4071
4072 // If stack probes have to loop inline or call, that will clobber EFLAGS.
4073 // FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock.
4075 const X86TargetLowering &TLI = *STI.getTargetLowering();
4076 if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF))
4077 return false;
4078
4080 return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext();
4081}
4082
4084 assert(MBB.getParent() && "Block is not attached to a function!");
4085
4086 // Win64 has strict requirements in terms of epilogue and we are
4087 // not taking a chance at messing with them.
4088 // I.e., unless this block is already an exit block, we can't use
4089 // it as an epilogue.
4090 if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
4091 return false;
4092
4093 // Swift async context epilogue has a BTR instruction that clobbers parts of
4094 // EFLAGS.
4095 const MachineFunction &MF = *MBB.getParent();
4098
4099 if (canUseLEAForSPInEpilogue(*MBB.getParent()))
4100 return true;
4101
4102 // If we cannot use LEA to adjust SP, we may need to use ADD, which
4103 // clobbers the EFLAGS. Check that we do not need to preserve it,
4104 // otherwise, conservatively assume this is not
4105 // safe to insert the epilogue here.
4107}
4108
4110 // If we may need to emit frameless compact unwind information, give
4111 // up as this is currently broken: PR25614.
4112 bool CompactUnwind =
4114 return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF) ||
4115 !CompactUnwind) &&
4116 // The lowering of segmented stack and HiPE only support entry
4117 // blocks as prologue blocks: PR26107. This limitation may be
4118 // lifted if we fix:
4119 // - adjustForSegmentedStacks
4120 // - adjustForHiPEPrologue
4122 !MF.shouldSplitStack();
4123}
4124
4127 const DebugLoc &DL, bool RestoreSP) const {
4128 assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
4129 assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
4130 assert(STI.is32Bit() && !Uses64BitFramePtr &&
4131 "restoring EBP/ESI on non-32-bit target");
4132
4133 MachineFunction &MF = *MBB.getParent();
4134 Register FramePtr = TRI->getFrameRegister(MF);
4135 Register BasePtr = TRI->getBaseRegister();
4136 WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo();
4138 MachineFrameInfo &MFI = MF.getFrameInfo();
4139
4140 // FIXME: Don't set FrameSetup flag in catchret case.
4141
4142 int FI = FuncInfo.EHRegNodeFrameIndex;
4143 int EHRegSize = MFI.getObjectSize(FI);
4144
4145 if (RestoreSP) {
4146 // MOV32rm -EHRegSize(%ebp), %esp
4147 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP),
4148 X86::EBP, true, -EHRegSize)
4150 }
4151
4152 Register UsedReg;
4153 int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg).getFixed();
4154 int EndOffset = -EHRegOffset - EHRegSize;
4155 FuncInfo.EHRegNodeEndOffset = EndOffset;
4156
4157 if (UsedReg == FramePtr) {
4158 // ADD $offset, %ebp
4159 unsigned ADDri = getADDriOpcode(false);
4160 BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
4162 .addImm(EndOffset)
4164 ->getOperand(3)
4165 .setIsDead();
4166 assert(EndOffset >= 0 &&
4167 "end of registration object above normal EBP position!");
4168 } else if (UsedReg == BasePtr) {
4169 // LEA offset(%ebp), %esi
4170 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr),
4171 FramePtr, false, EndOffset)
4173 // MOV32rm SavedEBPOffset(%esi), %ebp
4174 assert(X86FI->getHasSEHFramePtrSave());
4175 int Offset =
4176 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
4177 .getFixed();
4178 assert(UsedReg == BasePtr);
4179 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr),
4180 UsedReg, true, Offset)
4182 } else {
4183 llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
4184 }
4185 return MBBI;
4186}
4187
4189 return TRI->getSlotSize();
4190}
4191
4196
4200 Register FrameRegister = RI->getFrameRegister(MF);
4201 if (getInitialCFARegister(MF) == FrameRegister &&
4203 DwarfFrameBase FrameBase;
4204 FrameBase.Kind = DwarfFrameBase::CFA;
4205 FrameBase.Location.Offset =
4207 return FrameBase;
4208 }
4209
4210 return DwarfFrameBase{DwarfFrameBase::Register, {FrameRegister}};
4211}
4212
4213namespace {
4214// Struct used by orderFrameObjects to help sort the stack objects.
4215struct X86FrameSortingObject {
4216 bool IsValid = false; // true if we care about this Object.
4217 unsigned ObjectIndex = 0; // Index of Object into MFI list.
4218 unsigned ObjectSize = 0; // Size of Object in bytes.
4219 Align ObjectAlignment = Align(1); // Alignment of Object in bytes.
4220 unsigned ObjectNumUses = 0; // Object static number of uses.
4221};
4222
4223// The comparison function we use for std::sort to order our local
4224// stack symbols. The current algorithm is to use an estimated
4225// "density". This takes into consideration the size and number of
4226// uses each object has in order to roughly minimize code size.
4227// So, for example, an object of size 16B that is referenced 5 times
4228// will get higher priority than 4 4B objects referenced 1 time each.
4229// It's not perfect and we may be able to squeeze a few more bytes out of
4230// it (for example : 0(esp) requires fewer bytes, symbols allocated at the
4231// fringe end can have special consideration, given their size is less
4232// important, etc.), but the algorithmic complexity grows too much to be
4233// worth the extra gains we get. This gets us pretty close.
4234// The final order leaves us with objects with highest priority going
4235// at the end of our list.
4236struct X86FrameSortingComparator {
4237 inline bool operator()(const X86FrameSortingObject &A,
4238 const X86FrameSortingObject &B) const {
4239 uint64_t DensityAScaled, DensityBScaled;
4240
4241 // For consistency in our comparison, all invalid objects are placed
4242 // at the end. This also allows us to stop walking when we hit the
4243 // first invalid item after it's all sorted.
4244 if (!A.IsValid)
4245 return false;
4246 if (!B.IsValid)
4247 return true;
4248
4249 // The density is calculated by doing :
4250 // (double)DensityA = A.ObjectNumUses / A.ObjectSize
4251 // (double)DensityB = B.ObjectNumUses / B.ObjectSize
4252 // Since this approach may cause inconsistencies in
4253 // the floating point <, >, == comparisons, depending on the floating
4254 // point model with which the compiler was built, we're going
4255 // to scale both sides by multiplying with
4256 // A.ObjectSize * B.ObjectSize. This ends up factoring away
4257 // the division and, with it, the need for any floating point
4258 // arithmetic.
4259 DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) *
4260 static_cast<uint64_t>(B.ObjectSize);
4261 DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) *
4262 static_cast<uint64_t>(A.ObjectSize);
4263
4264 // If the two densities are equal, prioritize highest alignment
4265 // objects. This allows for similar alignment objects
4266 // to be packed together (given the same density).
4267 // There's room for improvement here, also, since we can pack
4268 // similar alignment (different density) objects next to each
4269 // other to save padding. This will also require further
4270 // complexity/iterations, and the overall gain isn't worth it,
4271 // in general. Something to keep in mind, though.
4272 if (DensityAScaled == DensityBScaled)
4273 return A.ObjectAlignment < B.ObjectAlignment;
4274
4275 return DensityAScaled < DensityBScaled;
4276 }
4277};
4278} // namespace
4279
4280// Order the symbols in the local stack.
4281// We want to place the local stack objects in some sort of sensible order.
4282// The heuristic we use is to try and pack them according to static number
4283// of uses and size of object in order to minimize code size.
4285 const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
4286 const MachineFrameInfo &MFI = MF.getFrameInfo();
4287
4288 // Don't waste time if there's nothing to do.
4289 if (ObjectsToAllocate.empty())
4290 return;
4291
4292 // Create an array of all MFI objects. We won't need all of these
4293 // objects, but we're going to create a full array of them to make
4294 // it easier to index into when we're counting "uses" down below.
4295 // We want to be able to easily/cheaply access an object by simply
4296 // indexing into it, instead of having to search for it every time.
4297 std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd());
4298
4299 // Walk the objects we care about and mark them as such in our working
4300 // struct.
4301 for (auto &Obj : ObjectsToAllocate) {
4302 SortingObjects[Obj].IsValid = true;
4303 SortingObjects[Obj].ObjectIndex = Obj;
4304 SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj);
4305 // Set the size.
4306 int ObjectSize = MFI.getObjectSize(Obj);
4307 if (ObjectSize == 0)
4308 // Variable size. Just use 4.
4309 SortingObjects[Obj].ObjectSize = 4;
4310 else
4311 SortingObjects[Obj].ObjectSize = ObjectSize;
4312 }
4313
4314 // Count the number of uses for each object.
4315 for (auto &MBB : MF) {
4316 for (auto &MI : MBB) {
4317 if (MI.isDebugInstr())
4318 continue;
4319 for (const MachineOperand &MO : MI.operands()) {
4320 // Check to see if it's a local stack symbol.
4321 if (!MO.isFI())
4322 continue;
4323 int Index = MO.getIndex();
4324 // Check to see if it falls within our range, and is tagged
4325 // to require ordering.
4326 if (Index >= 0 && Index < MFI.getObjectIndexEnd() &&
4327 SortingObjects[Index].IsValid)
4328 SortingObjects[Index].ObjectNumUses++;
4329 }
4330 }
4331 }
4332
4333 // Sort the objects using X86FrameSortingAlgorithm (see its comment for
4334 // info).
4335 llvm::stable_sort(SortingObjects, X86FrameSortingComparator());
4336
4337 // Now modify the original list to represent the final order that
4338 // we want. The order will depend on whether we're going to access them
4339 // from the stack pointer or the frame pointer. For SP, the list should
4340 // end up with the END containing objects that we want with smaller offsets.
4341 // For FP, it should be flipped.
4342 int i = 0;
4343 for (auto &Obj : SortingObjects) {
4344 // All invalid items are sorted at the end, so it's safe to stop.
4345 if (!Obj.IsValid)
4346 break;
4347 ObjectsToAllocate[i++] = Obj.ObjectIndex;
4348 }
4349
4350 // Flip it if we're accessing off of the FP.
4351 if (!TRI->hasStackRealignment(MF) && hasFP(MF))
4352 std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end());
4353}
4354
4355unsigned
4357 // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
4358 unsigned Offset = 16;
4359 // RBP is immediately pushed.
4360 Offset += SlotSize;
4361 // All callee-saved registers are then pushed.
4362 Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
4363 // Every funclet allocates enough stack space for the largest outgoing call.
4364 Offset += getWinEHFuncletFrameSize(MF);
4365 return Offset;
4366}
4367
4369 MachineFunction &MF, RegScavenger *RS) const {
4370 // Mark the function as not having WinCFI. We will set it back to true in
4371 // emitPrologue if it gets called and emits CFI.
4372 MF.setHasWinCFI(false);
4373
4374 MachineFrameInfo &MFI = MF.getFrameInfo();
4375 // If the frame is big enough that we might need to scavenge a register to
4376 // handle huge offsets, reserve a stack slot for that now.
4377 if (!isInt<32>(MFI.estimateStackSize(MF))) {
4378 int FI = MFI.CreateStackObject(SlotSize, Align(SlotSize), false);
4379 RS->addScavengingFrameIndex(FI);
4380 }
4381
4382 // If we are using Windows x64 CFI, ensure that the stack is always 8 byte
4383 // aligned. The format doesn't support misaligned stack adjustments.
4386
4387 // If this function isn't doing Win64-style C++ EH, we don't need to do
4388 // anything.
4389 if (STI.is64Bit() && MF.hasEHFunclets() &&
4392 adjustFrameForMsvcCxxEh(MF);
4393 }
4394}
4395
4396void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const {
4397 // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
4398 // relative to RSP after the prologue. Find the offset of the last fixed
4399 // object, so that we can allocate a slot immediately following it. If there
4400 // were no fixed objects, use offset -SlotSize, which is immediately after the
4401 // return address. Fixed objects have negative frame indices.
4402 MachineFrameInfo &MFI = MF.getFrameInfo();
4403 WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
4404 int64_t MinFixedObjOffset = -SlotSize;
4405 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I)
4406 MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
4407
4408 for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
4409 for (WinEHHandlerType &H : TBME.HandlerArray) {
4410 int FrameIndex = H.CatchObj.FrameIndex;
4411 if ((FrameIndex != INT_MAX) && MFI.getObjectOffset(FrameIndex) == 0) {
4412 // Ensure alignment.
4413 unsigned Align = MFI.getObjectAlign(FrameIndex).value();
4414 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align;
4415 MinFixedObjOffset -= MFI.getObjectSize(FrameIndex);
4416 MFI.setObjectOffset(FrameIndex, MinFixedObjOffset);
4417 }
4418 }
4419 }
4420
4421 // Ensure alignment.
4422 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8;
4423 int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
4424 int UnwindHelpFI =
4425 MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false);
4426 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
4427
4428 // Store -2 into UnwindHelp on function entry. We have to scan forwards past
4429 // other frame setup instructions.
4430 MachineBasicBlock &MBB = MF.front();
4431 auto MBBI = MBB.begin();
4432 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
4433 ++MBBI;
4434
4436 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
4437 UnwindHelpFI)
4438 .addImm(-2);
4439}
4440
4442 MachineFunction &MF, RegScavenger *RS) const {
4443 auto *X86FI = MF.getInfo<X86MachineFunctionInfo>();
4444
4445 if (STI.is32Bit() && MF.hasEHFunclets())
4447 // We have emitted prolog and epilog. Don't need stack pointer saving
4448 // instruction any more.
4449 if (MachineInstr *MI = X86FI->getStackPtrSaveMI()) {
4450 MI->eraseFromParent();
4451 X86FI->setStackPtrSaveMI(nullptr);
4452 }
4453}
4454
4456 MachineFunction &MF) const {
4457 // 32-bit functions have to restore stack pointers when control is transferred
4458 // back to the parent function. These blocks are identified as eh pads that
4459 // are not funclet entries.
4460 bool IsSEH = isAsynchronousEHPersonality(
4462 for (MachineBasicBlock &MBB : MF) {
4463 bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry();
4464 if (NeedsRestore)
4466 /*RestoreSP=*/IsSEH);
4467 }
4468}
4469
4470// Compute the alignment gap between current SP after spilling FP/BP and the
4471// next properly aligned stack offset.
4473 const TargetRegisterClass *RC,
4474 unsigned NumSpilledRegs) {
4476 unsigned AllocSize = TRI->getSpillSize(*RC) * NumSpilledRegs;
4477 Align StackAlign = MF.getSubtarget().getFrameLowering()->getStackAlign();
4478 unsigned AlignedSize = alignTo(AllocSize, StackAlign);
4479 return AlignedSize - AllocSize;
4480}
4481
4482void X86FrameLowering::spillFPBPUsingSP(MachineFunction &MF,
4484 Register FP, Register BP,
4485 int SPAdjust) const {
4486 assert(FP.isValid() || BP.isValid());
4487
4488 MachineBasicBlock *MBB = BeforeMI->getParent();
4489 DebugLoc DL = BeforeMI->getDebugLoc();
4490
4491 // Spill FP.
4492 if (FP.isValid()) {
4493 BuildMI(*MBB, BeforeMI, DL,
4494 TII.get(getPUSHOpcode(MF.getSubtarget<X86Subtarget>())))
4495 .addReg(FP);
4496 }
4497
4498 // Spill BP.
4499 if (BP.isValid()) {
4500 BuildMI(*MBB, BeforeMI, DL,
4501 TII.get(getPUSHOpcode(MF.getSubtarget<X86Subtarget>())))
4502 .addReg(BP);
4503 }
4504
4505 // Make sure SP is aligned.
4506 if (SPAdjust)
4507 emitSPUpdate(*MBB, BeforeMI, DL, -SPAdjust, false);
4508
4509 // Emit unwinding information.
4510 if (FP.isValid() && needsDwarfCFI(MF)) {
4511 // Emit .cfi_remember_state to remember old frame.
4512 unsigned CFIIndex =
4514 BuildMI(*MBB, BeforeMI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
4515 .addCFIIndex(CFIIndex);
4516
4517 // Setup new CFA value with DW_CFA_def_cfa_expression:
4518 // DW_OP_breg7+offset, DW_OP_deref, DW_OP_consts 16, DW_OP_plus
4519 SmallString<64> CfaExpr;
4520 uint8_t buffer[16];
4521 int Offset = SPAdjust;
4522 if (BP.isValid())
4523 Offset += TRI->getSpillSize(*TRI->getMinimalPhysRegClass(BP));
4524 // If BeforeMI is a frame setup instruction, we need to adjust the position
4525 // and offset of the new cfi instruction.
4526 if (TII.isFrameSetup(*BeforeMI)) {
4527 Offset += alignTo(TII.getFrameSize(*BeforeMI), getStackAlign());
4528 BeforeMI = std::next(BeforeMI);
4529 }
4530 Register StackPtr = TRI->getStackRegister();
4531 if (STI.isTarget64BitILP32())
4533 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackPtr, true);
4534 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfStackPtr));
4535 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
4536 CfaExpr.push_back(dwarf::DW_OP_deref);
4537 CfaExpr.push_back(dwarf::DW_OP_consts);
4538 CfaExpr.append(buffer, buffer + encodeSLEB128(SlotSize * 2, buffer));
4539 CfaExpr.push_back((uint8_t)dwarf::DW_OP_plus);
4540
4541 SmallString<64> DefCfaExpr;
4542 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
4543 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
4544 DefCfaExpr.append(CfaExpr.str());
4545 BuildCFI(*MBB, BeforeMI, DL,
4546 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
4548 }
4549}
4550
4551void X86FrameLowering::restoreFPBPUsingSP(MachineFunction &MF,
4553 Register FP, Register BP,
4554 int SPAdjust) const {
4555 assert(FP.isValid() || BP.isValid());
4556
4557 // Adjust SP so it points to spilled FP or BP.
4558 MachineBasicBlock *MBB = AfterMI->getParent();
4559 MachineBasicBlock::iterator Pos = std::next(AfterMI);
4560 DebugLoc DL = AfterMI->getDebugLoc();
4561 if (SPAdjust)
4562 emitSPUpdate(*MBB, Pos, DL, SPAdjust, false);
4563
4564 // Restore BP.
4565 if (BP.isValid()) {
4566 BuildMI(*MBB, Pos, DL,
4567 TII.get(getPOPOpcode(MF.getSubtarget<X86Subtarget>())), BP);
4568 }
4569
4570 // Restore FP.
4571 if (FP.isValid()) {
4572 BuildMI(*MBB, Pos, DL,
4573 TII.get(getPOPOpcode(MF.getSubtarget<X86Subtarget>())), FP);
4574
4575 // Emit unwinding information.
4576 if (needsDwarfCFI(MF)) {
4577 // Restore original frame with .cfi_restore_state.
4578 unsigned CFIIndex =
4580 BuildMI(*MBB, Pos, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
4581 .addCFIIndex(CFIIndex);
4582 }
4583 }
4584}
4585
4586void X86FrameLowering::saveAndRestoreFPBPUsingSP(
4588 MachineBasicBlock::iterator AfterMI, bool SpillFP, bool SpillBP) const {
4589 assert(SpillFP || SpillBP);
4590
4591 Register FP, BP;
4592 const TargetRegisterClass *RC;
4593 unsigned NumRegs = 0;
4594
4595 if (SpillFP) {
4596 FP = TRI->getFrameRegister(MF);
4597 if (STI.isTarget64BitILP32())
4599 RC = TRI->getMinimalPhysRegClass(FP);
4600 ++NumRegs;
4601 }
4602 if (SpillBP) {
4603 BP = TRI->getBaseRegister();
4604 if (STI.isTarget64BitILP32())
4605 BP = Register(getX86SubSuperRegister(BP, 64));
4606 RC = TRI->getMinimalPhysRegClass(BP);
4607 ++NumRegs;
4608 }
4609 int SPAdjust = computeFPBPAlignmentGap(MF, RC, NumRegs);
4610
4611 spillFPBPUsingSP(MF, BeforeMI, FP, BP, SPAdjust);
4612 restoreFPBPUsingSP(MF, AfterMI, FP, BP, SPAdjust);
4613}
4614
4615bool X86FrameLowering::skipSpillFPBP(
4617 if (MI->getOpcode() == X86::LCMPXCHG16B_SAVE_RBX) {
4618 // The pseudo instruction LCMPXCHG16B_SAVE_RBX is generated in the form
4619 // SaveRbx = COPY RBX
4620 // SaveRbx = LCMPXCHG16B_SAVE_RBX ..., SaveRbx, implicit-def rbx
4621 // And later LCMPXCHG16B_SAVE_RBX is expanded to restore RBX from SaveRbx.
4622 // We should skip this instruction sequence.
4623 int FI;
4624 Register Reg;
4625 while (!(MI->getOpcode() == TargetOpcode::COPY &&
4626 MI->getOperand(1).getReg() == X86::RBX) &&
4627 !((Reg = TII.isStoreToStackSlot(*MI, FI)) && Reg == X86::RBX))
4628 ++MI;
4629 return true;
4630 }
4631 return false;
4632}
4633
4635 const TargetRegisterInfo *TRI, bool &AccessFP,
4636 bool &AccessBP) {
4637 AccessFP = AccessBP = false;
4638 if (FP) {
4639 if (MI.findRegisterUseOperandIdx(FP, TRI, false) != -1 ||
4640 MI.findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
4641 AccessFP = true;
4642 }
4643 if (BP) {
4644 if (MI.findRegisterUseOperandIdx(BP, TRI, false) != -1 ||
4645 MI.findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
4646 AccessBP = true;
4647 }
4648 return AccessFP || AccessBP;
4649}
4650
4651// Invoke instruction has been lowered to normal function call. We try to figure
4652// out if MI comes from Invoke.
4653// Do we have any better method?
4654static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels) {
4655 if (!MI.isCall())
4656 return false;
4657 if (InsideEHLabels)
4658 return true;
4659
4660 const MachineBasicBlock *MBB = MI.getParent();
4661 if (!MBB->hasEHPadSuccessor())
4662 return false;
4663
4664 // Check if there is another call instruction from MI to the end of MBB.
4666 for (++MBBI; MBBI != ME; ++MBBI)
4667 if (MBBI->isCall())
4668 return false;
4669 return true;
4670}
4671
4672/// Given the live range of FP or BP (DefMI, KillMI), check if there is any
4673/// interfered stack access in the range, usually generated by register spill.
4674void X86FrameLowering::checkInterferedAccess(
4676 MachineBasicBlock::reverse_iterator KillMI, bool SpillFP,
4677 bool SpillBP) const {
4678 if (DefMI == KillMI)
4679 return;
4680 if (TRI->hasBasePointer(MF)) {
4681 if (!SpillBP)
4682 return;
4683 } else {
4684 if (!SpillFP)
4685 return;
4686 }
4687
4688 auto MI = KillMI;
4689 while (MI != DefMI) {
4690 if (any_of(MI->operands(),
4691 [](const MachineOperand &MO) { return MO.isFI(); }))
4692 MF.getContext().reportError(SMLoc(),
4693 "Interference usage of base pointer/frame "
4694 "pointer.");
4695 MI++;
4696 }
4697}
4698
4699/// If a function uses base pointer and the base pointer is clobbered by inline
4700/// asm, RA doesn't detect this case, and after the inline asm, the base pointer
4701/// contains garbage value.
4702/// For example if a 32b x86 function uses base pointer esi, and esi is
4703/// clobbered by following inline asm
4704/// asm("rep movsb" : "+D"(ptr), "+S"(x), "+c"(c)::"memory");
4705/// We need to save esi before the asm and restore it after the asm.
4706///
4707/// The problem can also occur to frame pointer if there is a function call, and
4708/// the callee uses a different calling convention and clobbers the fp.
4709///
4710/// Because normal frame objects (spill slots) are accessed through fp/bp
4711/// register, so we can't spill fp/bp to normal spill slots.
4712///
4713/// FIXME: There are 2 possible enhancements:
4714/// 1. In many cases there are different physical registers not clobbered by
4715/// inline asm, we can use one of them as base pointer. Or use a virtual
4716/// register as base pointer and let RA allocate a physical register to it.
4717/// 2. If there is no other instructions access stack with fp/bp from the
4718/// inline asm to the epilog, and no cfi requirement for a correct fp, we can
4719/// skip the save and restore operations.
4721 Register FP, BP;
4723 if (TFI.hasFP(MF))
4724 FP = TRI->getFrameRegister(MF);
4725 if (TRI->hasBasePointer(MF))
4726 BP = TRI->getBaseRegister();
4727
4728 // Currently only inline asm and function call can clobbers fp/bp. So we can
4729 // do some quick test and return early.
4730 if (!MF.hasInlineAsm()) {
4732 if (!X86FI->getFPClobberedByCall())
4733 FP = 0;
4734 if (!X86FI->getBPClobberedByCall())
4735 BP = 0;
4736 }
4737 if (!FP && !BP)
4738 return;
4739
4740 for (MachineBasicBlock &MBB : MF) {
4741 bool InsideEHLabels = false;
4742 auto MI = MBB.rbegin(), ME = MBB.rend();
4743 auto TermMI = MBB.getFirstTerminator();
4744 if (TermMI == MBB.begin())
4745 continue;
4746 MI = *(std::prev(TermMI));
4747
4748 while (MI != ME) {
4749 // Skip frame setup/destroy instructions.
4750 // Skip Invoke (call inside try block) instructions.
4751 // Skip instructions handled by target.
4752 if (MI->getFlag(MachineInstr::MIFlag::FrameSetup) ||
4754 isInvoke(*MI, InsideEHLabels) || skipSpillFPBP(MF, MI)) {
4755 ++MI;
4756 continue;
4757 }
4758
4759 if (MI->getOpcode() == TargetOpcode::EH_LABEL) {
4760 InsideEHLabels = !InsideEHLabels;
4761 ++MI;
4762 continue;
4763 }
4764
4765 bool AccessFP, AccessBP;
4766 // Check if fp or bp is used in MI.
4767 if (!isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)) {
4768 ++MI;
4769 continue;
4770 }
4771
4772 // Look for the range [DefMI, KillMI] in which fp or bp is defined and
4773 // used.
4774 bool FPLive = false, BPLive = false;
4775 bool SpillFP = false, SpillBP = false;
4776 auto DefMI = MI, KillMI = MI;
4777 do {
4778 SpillFP |= AccessFP;
4779 SpillBP |= AccessBP;
4780
4781 // Maintain FPLive and BPLive.
4782 if (FPLive && MI->findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
4783 FPLive = false;
4784 if (FP && MI->findRegisterUseOperandIdx(FP, TRI, false) != -1)
4785 FPLive = true;
4786 if (BPLive && MI->findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
4787 BPLive = false;
4788 if (BP && MI->findRegisterUseOperandIdx(BP, TRI, false) != -1)
4789 BPLive = true;
4790
4791 DefMI = MI++;
4792 } while ((MI != ME) &&
4793 (FPLive || BPLive ||
4794 isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)));
4795
4796 // Don't need to save/restore if FP is accessed through llvm.frameaddress.
4797 if (FPLive && !SpillBP)
4798 continue;
4799
4800 // If the bp is clobbered by a call, we should save and restore outside of
4801 // the frame setup instructions.
4802 if (KillMI->isCall() && DefMI != ME) {
4803 auto FrameSetup = std::next(DefMI);
4804 // Look for frame setup instruction toward the start of the BB.
4805 // If we reach another call instruction, it means no frame setup
4806 // instruction for the current call instruction.
4807 while (FrameSetup != ME && !TII.isFrameSetup(*FrameSetup) &&
4808 !FrameSetup->isCall())
4809 ++FrameSetup;
4810 // If a frame setup instruction is found, we need to find out the
4811 // corresponding frame destroy instruction.
4812 if (FrameSetup != ME && TII.isFrameSetup(*FrameSetup) &&
4813 (TII.getFrameSize(*FrameSetup) ||
4814 TII.getFrameAdjustment(*FrameSetup))) {
4815 while (!TII.isFrameInstr(*KillMI))
4816 --KillMI;
4817 DefMI = FrameSetup;
4818 MI = DefMI;
4819 ++MI;
4820 }
4821 }
4822
4823 checkInterferedAccess(MF, DefMI, KillMI, SpillFP, SpillBP);
4824
4825 // Call target function to spill and restore FP and BP registers.
4826 saveAndRestoreFPBPUsingSP(MF, &(*DefMI), &(*KillMI), SpillFP, SpillBP);
4827 }
4828 }
4829}
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static const uint64_t kSplitStackAvailable
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
static cl::opt< int > PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), cl::init(4096), cl::Hidden)
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static bool isTailCallOpcode(unsigned Opc)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static constexpr MCPhysReg FPReg
static constexpr MCPhysReg SPReg
This file declares the machine register scavenger class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:484
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
static bool is64Bit(const char *name)
static unsigned calculateSetFPREG(uint64_t SPAdjust)
static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary)
GetScratchRegister - Get a temp register for performing work in the segmented stack and the Erlang/Hi...
static unsigned getADDriOpcode(bool IsLP64)
static unsigned getPUSH2Opcode(const X86Subtarget &ST)
static unsigned getLEArOpcode(bool IsLP64)
static unsigned getSUBriOpcode(bool IsLP64)
static bool flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB)
Check if the flags need to be preserved before the terminators.
static bool isFPBPAccess(const MachineInstr &MI, Register FP, Register BP, const TargetRegisterInfo *TRI, bool &AccessFP, bool &AccessBP)
static const TargetRegisterClass * getCalleeSavedSpillRC(MCRegister Reg, const X86Subtarget &STI, const TargetRegisterInfo &TRI)
static bool isOpcodeRep(unsigned Opcode)
Return true if an opcode is part of the REP group of instructions.
static unsigned getANDriOpcode(bool IsLP64, int64_t Imm)
static bool isEAXLiveIn(MachineBasicBlock &MBB)
static int computeFPBPAlignmentGap(MachineFunction &MF, const TargetRegisterClass *RC, unsigned NumSpilledRegs)
static unsigned getADDrrOpcode(bool IsLP64)
static bool HasNestArgument(const MachineFunction *MF)
static unsigned getPOPOpcode(const X86Subtarget &ST)
static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels)
static unsigned getPOP2Opcode(const X86Subtarget &ST)
static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD, const StringRef LiteralName)
Lookup an ERTS parameter in the !hipe.literals named metadata node.
static bool blockEndIsUnreachable(const MachineBasicBlock &MBB, MachineBasicBlock::const_iterator MBBI)
static unsigned getSUBrrOpcode(bool IsLP64)
static unsigned getPUSHOpcode(const X86Subtarget &ST)
constexpr uint64_t MaxSPChunk
static const unsigned FramePtr
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
reverse_iterator rend() const
Definition ArrayRef.h:133
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
reverse_iterator rbegin() const
Definition ArrayRef.h:132
BitVector & reset()
Reset all bits in the bitvector.
Definition BitVector.h:409
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
Definition BitVector.h:317
BitVector & set()
Set all bits in the bitvector.
Definition BitVector.h:366
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
Definition BitVector.h:324
iterator_range< const_set_bits_iterator > set_bits() const
Definition BitVector.h:159
static BranchProbability getOne()
static BranchProbability getZero()
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
A debug info location.
Definition DebugLoc.h:126
unsigned size() const
Definition DenseMap.h:174
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition Function.h:879
Constant * getPersonalityFn() const
Get the personality function associated with this function.
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:328
size_t arg_size() const
Definition Function.h:875
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition Function.h:663
const Argument * const_arg_iterator
Definition Function.h:74
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:229
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:723
Module * getParent()
Get the module that this global value is contained inside of...
bool usesWindowsCFI() const
Definition MCAsmInfo.h:674
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition MCDwarf.h:622
static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int64_t Size, SMLoc Loc={})
A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE.
Definition MCDwarf.h:736
static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_restore says that the rule for Register is now the same as it was at the beginning of the functi...
Definition MCDwarf.h:696
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition MCDwarf.h:615
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition MCDwarf.h:657
static MCCFIInstruction createRememberState(MCSymbol *L, SMLoc Loc={})
.cfi_remember_state Save all current rules for all registers.
Definition MCDwarf.h:716
OpType getOperation() const
Definition MCDwarf.h:804
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition MCDwarf.h:630
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition MCDwarf.h:727
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int64_t Adjustment, SMLoc Loc={})
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Definition MCDwarf.h:638
static MCCFIInstruction createRestoreState(MCSymbol *L, SMLoc Loc={})
.cfi_restore_state Restore the previously saved state.
Definition MCDwarf.h:721
const MCObjectFileInfo * getObjectFileInfo() const
Definition MCContext.h:413
const MCRegisterInfo * getRegisterInfo() const
Definition MCContext.h:411
LLVM_ABI void reportError(SMLoc L, const Twine &Msg)
MCSection * getCompactUnwindSection() const
MCRegAliasIterator enumerates all registers aliasing Reg.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
virtual int64_t getDwarfRegNum(MCRegister Reg, bool isEH) const
Map a target register to an equivalent dwarf register number.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
Metadata node.
Definition Metadata.h:1069
A single uniqued string.
Definition Metadata.h:722
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:632
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MachineInstrBundleIterator< const MachineInstr > const_iterator
iterator_range< livein_iterator > liveins() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Live
Register is known to be (at least partially) live.
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool adjustsStack() const
Return true if this function adjusts the stack – e.g., when calling another function.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
LLVM_ABI void ensureMaxAlignment(Align Alignment)
Make sure the function is at least Align bytes aligned.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
void setObjectOffset(int ObjectIdx, int64_t SPOffset)
Set the stack frame offset of the specified object.
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool hasOpaqueSPAdjustment() const
Returns true if the function contains opaque dynamic stack adjustments.
void setCVBytesOfCalleeSavedRegisters(unsigned S)
LLVM_ABI uint64_t estimateStackSize(const MachineFunction &MF) const
Estimate and return the size of the stack frame.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment, TargetStackID::Value StackID=TargetStackID::Default)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasCopyImplyingStackAdjustment() const
Returns true if the function contains operations which will lower down to instructions which manipula...
bool hasStackObjects() const
Return true if there are any stack objects in this function.
LLVM_ABI int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset, bool IsImmutable=false)
Create a spill slot at a fixed location on the stack.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
void setStackSize(uint64_t Size)
Set the size of the stack.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
void setOffsetAdjustment(int64_t Adj)
Set the correction for frame offsets.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
bool hasInlineAsm() const
Returns true if the function contains any inline assembly.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
bool needsFrameMoves() const
True if this function needs frame moves for debug or exceptions.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_front(MachineBasicBlock *MBB)
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, raw_ostream *OS=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const std::vector< LandingPadInfo > & getLandingPads() const
Return a reference to the landing pad info for the current function.
BasicBlockListType::iterator iterator
bool shouldSplitStack() const
Should we be emitting segmented stack stuff for the function.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
unsigned getNumOperands() const
Retuns the total number of operands.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLVM_ABI bool isLiveIn(Register Reg) const
NamedMDNode * getNamedMetadata(StringRef Name) const
Return the first NamedMDNode in the module with the specified name.
Definition Module.cpp:301
WinX64EHUnwindMode getWinX64EHUnwindMode() const
Get how unwind information should be generated for x64 Windows.
Definition Module.cpp:960
unsigned getCodeViewFlag() const
Returns the CodeView Version by checking module flags.
Definition Module.cpp:607
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
iterator end() const
Definition ArrayRef.h:339
iterator begin() const
Definition ArrayRef.h:338
A tuple of MDNodes.
Definition Metadata.h:1742
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
SlotIndex - An opaque wrapper around machine indexes.
Definition SlotIndexes.h:66
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:46
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:39
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
static constexpr size_t npos
Definition StringRef.h:58
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
int getOffsetOfLocalArea() const
getOffsetOfLocalArea - This method returns the offset of the local area from the stack pointer on ent...
TargetFrameLowering(StackDirection D, Align StackAl, int LAO, Align TransAl=Align(1), bool StackReal=true)
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
const Triple & getTargetTriple() const
const MCAsmInfo & getMCAsmInfo() const
Return target specific asm information.
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
SwiftAsyncFramePointerMode SwiftAsyncFramePointer
Control when and how the Swift async frame pointer bit should be set.
LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual Register getFrameRegister(const MachineFunction &MF) const =0
Debug information queries.
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
bool isUEFI() const
Tests whether the OS is UEFI.
Definition Triple.h:685
bool isOSWindows() const
Tests whether the OS is Windows.
Definition Triple.h:688
Value wrapper in the Metadata hierarchy.
Definition Metadata.h:459
Value * getValue() const
Definition Metadata.h:499
bool has128ByteRedZone(const MachineFunction &MF) const
Return true if the function has a redzone (accessible bytes past the frame of the top of stack functi...
void spillFPBP(MachineFunction &MF) const override
If a function uses base pointer and the base pointer is clobbered by inline asm, RA doesn't detect th...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
bool needsFrameIndexResolution(const MachineFunction &MF) const override
X86FrameLowering(const X86Subtarget &STI, MaybeAlign StackAlignOverride)
const X86RegisterInfo * TRI
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
bool hasFPImpl(const MachineFunction &MF) const override
hasFPImpl - Return true if the specified function should have a dedicated frame pointer register.
MachineBasicBlock::iterator restoreWin32EHStackPointers(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool RestoreSP=false) const
Sets up EBP and optionally ESI based on the incoming EBP value.
int getInitialCFAOffset(const MachineFunction &MF) const override
Return initial CFA offset value i.e.
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a prologue for the target.
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
void emitStackProbe(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, std::optional< MachineFunction::DebugInstrOperandPair > InstrNum=std::nullopt) const
Emit target stack probe code.
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsPrologue) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
int64_t mergeSPAdd(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int64_t AddOffset, bool doMergeWithPrevious) const
Equivalent to: mergeSPUpdates(MBB, MBBI, [AddOffset](int64_t Offset) { return AddOffset + Offset; }...
StackOffset getFrameIndexReferenceSP(const MachineFunction &MF, int FI, Register &SPReg, int Adjustment) const
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
assignCalleeSavedSpillSlots - Allows target to override spill slot assignment logic.
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const override
Replace a StackProbe inline-stub with the actual probe code inline.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
const X86InstrInfo & TII
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &DL, int64_t NumBytes, bool InEpilogue) const
Emit a series of instructions to increment / decrement the stack pointer by a constant value.
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a epilogue for the target.
bool Is64Bit
Is64Bit implies that x86_64 instructions are available.
Register getInitialCFARegister(const MachineFunction &MF) const override
Return initial CFA register value i.e.
bool Uses64BitFramePtr
True if the 64-bit frame or stack pointer should be used.
unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Adjust the prologue to have the function use segmented stacks.
DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const override
Return the frame base information to be encoded in the DWARF subprogram debug info.
void emitCalleeSavedFrameMovesFullCFA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const override
Emits Dwarf Info specifying offsets of callee saved registers and frame pointer.
int getWin64EHFrameIndexRef(const MachineFunction &MF, int FI, Register &SPReg) const
bool canUseLEAForSPInEpilogue(const MachineFunction &MF) const
Check that LEA can be used on SP in an epilogue sequence for MF.
bool stackProbeFunctionModifiesSP() const override
Does the stack probe function call return with a modified stack pointer?
void orderFrameObjects(const MachineFunction &MF, SmallVectorImpl< int > &ObjectsToAllocate) const override
Order the symbols in the local stack.
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst, MachineInstr::MIFlag Flag=MachineInstr::NoFlags) const
Wraps up getting a CFI index and building a MachineInstr for it.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
StackOffset getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, Register &FrameReg, bool IgnoreSPUpdates) const override
Same as getFrameIndexReference, except that the stack pointer (as opposed to the frame pointer) will ...
void restoreWinEHStackPointersInParent(MachineFunction &MF) const
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
void adjustForHiPEPrologue(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Erlang programs may need a special prologue to handle the stack size they might need at runtime.
const X86Subtarget & STI
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
bool isCandidateForPush2Pop2(Register Reg) const
void setRestoreBasePointer(const MachineFunction *MF)
DenseMap< int, unsigned > & getWinEHXMMSlotInfo()
MachineInstr * getStackPtrSaveMI() const
AMXProgModelEnum getAMXProgModel() const
void setStackPtrSaveMI(MachineInstr *MI)
void setCalleeSavedFrameSize(unsigned bytes)
const X86TargetLowering * getTargetLowering() const override
bool isTargetWindowsCoreCLR() const
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
uint16_t StackAdjustment(const RuntimeFunction &RF)
StackAdjustment - calculated stack adjustment in words.
Definition ARMWinEH.h:200
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition CallingConv.h:53
@ X86_INTR
x86 hardware interrupt context.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ X86_FastCall
'fast' analog of X86_StdCall.
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm)
Return a MOVri opcode for materializing Imm into a 32- or 64-bit GPR.
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:573
void stable_sort(R &&Range)
Definition STLExtras.h:2116
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ Define
Register definition.
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
@ DwarfCFI
DWARF-like instruction based exceptions.
Definition CodeGen.h:55
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
static bool isFuncletReturnInstr(const MachineInstr &MI)
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
@ DeploymentBased
Determine whether to set the bit statically or dynamically based on the deployment target.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
constexpr RegState getDefRegState(bool B)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a SLEB128 value to an output stream.
Definition LEB128.h:24
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2019
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
LLVM_ABI void computeAndAddLiveIns(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB)
Convenience function combining computeLiveIns() and addLiveIns().
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition LEB128.h:79
static const MachineInstrBuilder & addRegOffset(const MachineInstrBuilder &MIB, Register Reg, bool isKill, int Offset)
addRegOffset - This function is used to add a memory reference of the form [Reg + Offset],...
constexpr RegState getUndefRegState(bool B)
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Pair of physical register and lane mask.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
union llvm::TargetFrameLowering::DwarfFrameBase::@004076321055032247336074224075335064105264310375 Location
enum llvm::TargetFrameLowering::DwarfFrameBase::FrameBaseKind Kind
SmallVector< WinEHTryBlockMapEntry, 4 > TryBlockMap
SmallVector< WinEHHandlerType, 1 > HandlerArray